[ooo-build-commit] .: sal/rtl

Cédric Bosdonnat cbosdo at kemper.freedesktop.org
Fri Sep 17 04:17:13 PDT 2010


 sal/rtl/source/hash.cxx |  202 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 165 insertions(+), 37 deletions(-)

New commits:
commit 5ffc9bc812c1d4d068590ff28d32cf6287ee9eb8
Author: Michael Meeks <michael.meeks at novell.com>
Date:   Fri Sep 17 13:13:04 2010 +0200

    sal-strintern-speed.diff: sal-strintern - speedup
    
    i#78496

diff --git a/sal/rtl/source/hash.cxx b/sal/rtl/source/hash.cxx
index 63de2b4..7ab014a 100644
--- a/sal/rtl/source/hash.cxx
+++ b/sal/rtl/source/hash.cxx
@@ -27,67 +27,164 @@
 
 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_sal.hxx"
-#include "rtl/allocator.hxx"
 
 #include "hash.h"
 #include "strimp.h"
+#include <osl/diagnose.h>
 
+struct StringHashTableImpl {
+    sal_uInt32    nEntries;
+    sal_uInt32    nSize;
+    rtl_uString **pData;
+};
+
+typedef StringHashTableImpl StringHashTable;
 
-#include <hash_set>
+// Only for use in the implementation
+static StringHashTable *rtl_str_hash_new (sal_uInt32 nSize);
+static void rtl_str_hash_free (StringHashTable *pHash);
 
-namespace {
+}
 
-struct UStringHash
+StringHashTable *
+getHashTable ()
 {
-    size_t operator()(rtl_uString * const &rString) const
-    { return (size_t)rtl_ustr_hashCode_WithLength( rString->buffer, rString->length ); }
-};
+    static StringHashTable *pInternPool = NULL;
+    if (pInternPool == NULL) {
+        static StringHashTable* pHash = rtl_str_hash_new(1024);
+        pInternPool = pHash;
+    }
+    return pInternPool;
+}
+
+// Better / smaller / faster hash set ....
+
+// TODO: add bottom bit-set list terminator to string list
 
-struct UStringEqual
+static sal_uInt32
+getNextSize (sal_uInt32 nSize)
 {
-    sal_Bool operator() ( rtl_uString * const &pStringA,
-                          rtl_uString * const &pStringB) const
+    // Sedgewick - Algorithms in C P577.
+    static const sal_uInt32 nPrimes[] = { 1021, 2039, 4093, 8191, 16381, 32749,
+                                          65521, 131071,262139, 524287, 1048573,
+                                          2097143, 4194301, 8388593, 16777213,
+                                          33554393, 67108859, 134217689 };
+    #define NUM_PRIMES (sizeof (nPrimes)/ sizeof (nPrimes[0]))
+    for (sal_uInt32 i = 0; i < NUM_PRIMES; i++)
     {
-        if (pStringA == pStringB)
-            return true;
-        if (pStringA->length != pStringB->length)
-            return false;
-        return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length,
-                                             pStringB->buffer, pStringB->length);
+        if (nPrimes[i] > nSize)
+            return nPrimes[i];
     }
-};
+    return nSize * 2;
+}
 
-typedef std::hash_set< rtl_uString *, UStringHash, UStringEqual,
-                       rtl::Allocator<rtl_uString *> > StringHashTable;
+static sal_uInt32
+hashString (rtl_uString *pString)
+{
+    return (sal_uInt32) rtl_ustr_hashCode_WithLength (pString->buffer,
+                                                      pString->length);
+}
 
-StringHashTable *
-getHashTable ()
+static StringHashTable *
+rtl_str_hash_new (sal_uInt32 nSize)
 {
-    static StringHashTable *pInternPool = NULL;
-    if (pInternPool == NULL) {
-        static StringHashTable aImpl(1024);
-        pInternPool = &aImpl;
+    StringHashTable *pHash = (StringHashTable *)malloc (sizeof (StringHashTable));
+
+    pHash->nEntries = 0;
+    pHash->nSize = getNextSize (nSize);
+    pHash->pData = (rtl_uString **) calloc (sizeof (rtl_uString *), pHash->nSize);
+
+    return pHash;
+}
+
+static void
+rtl_str_hash_free (StringHashTable *pHash)
+{
+    if (!pHash)
+        return;
+    if (pHash->pData)
+        free (pHash->pData);
+    free (pHash);
+}
+
+static void
+rtl_str_hash_insert_nonequal (StringHashTable   *pHash,
+                              rtl_uString       *pString)
+{
+    sal_uInt32  nHash = hashString (pString);
+    sal_uInt32  n;
+    rtl_uString *pHashStr;
+
+    n = nHash % pHash->nSize;
+    while ((pHashStr = pHash->pData[n]) != NULL) {
+        n++;
+        if (n >= pHash->nSize)
+            n = 0;
     }
-    return pInternPool;
+    pHash->pData[n] = pString;
+}
+
+static void
+rtl_str_hash_resize (sal_uInt32        nNewSize)
+{
+    sal_uInt32 i;
+    StringHashTable *pNewHash;
+    StringHashTable *pHash = getHashTable();
+
+    OSL_ASSERT (nNewSize > pHash->nEntries);
+
+    pNewHash = rtl_str_hash_new (nNewSize);
+
+    for (i = 0; i < pHash->nSize; i++)
+    {
+        if (pHash->pData[i] != NULL)
+            rtl_str_hash_insert_nonequal (pNewHash, pHash->pData[i]);
+    }
+    pNewHash->nEntries = pHash->nEntries;
+    free (pHash->pData);
+    *pHash = *pNewHash;
+    pNewHash->pData = NULL;
+    rtl_str_hash_free (pNewHash);
 }
 
+static int
+compareEqual (rtl_uString *pStringA, rtl_uString *pStringB)
+{
+    if (pStringA == pStringB)
+        return 1;
+    if (pStringA->length != pStringB->length)
+        return 0;
+    return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length,
+                                         pStringB->buffer, pStringB->length);
 }
 
-extern "C" {
 
 rtl_uString *
 rtl_str_hash_intern (rtl_uString       *pString,
                      int                can_return)
 {
+    sal_uInt32  nHash = hashString (pString);
+    sal_uInt32  n;
+    rtl_uString *pHashStr;
+
     StringHashTable *pHash = getHashTable();
-    StringHashTable::iterator aIter;
-    aIter = pHash->find(pString);
-    if (aIter != pHash->end())
-    {
-        rtl_uString *pHashStr = *aIter;
-        rtl_uString_acquire (pHashStr);
-        return pHashStr;
+
+    // Should we resize ?
+    if (pHash->nEntries >= pHash->nSize/2)
+        rtl_str_hash_resize (getNextSize(pHash->nSize));
+
+    n = nHash % pHash->nSize;
+    while ((pHashStr = pHash->pData[n]) != NULL) {
+        if (compareEqual (pHashStr, pString))
+        {
+            rtl_uString_acquire (pHashStr);
+            return pHashStr;
+        }
+        n++;
+        if (n >= pHash->nSize)
+            n = 0;
     }
+
     if (!can_return)
     {
         rtl_uString *pCopy = NULL;
@@ -99,7 +196,8 @@ rtl_str_hash_intern (rtl_uString       *pString,
 
     if (!SAL_STRING_IS_STATIC (pString))
         pString->refCount |= SAL_STRING_INTERN_FLAG;
-    pHash->insert(pString);
+    pHash->pData[n] = pString;
+    pHash->nEntries++;
 
     return pString;
 }
@@ -107,7 +205,37 @@ rtl_str_hash_intern (rtl_uString       *pString,
 void
 rtl_str_hash_remove (rtl_uString       *pString)
 {
-    getHashTable()->erase(pString);
-}
+    sal_uInt32   n;
+    sal_uInt32   nHash = hashString (pString);
+    rtl_uString *pHashStr;
+
+    StringHashTable *pHash = getHashTable();
 
+    n = nHash % pHash->nSize;
+    while ((pHashStr = pHash->pData[n]) != NULL) {
+        if (compareEqual (pHashStr, pString))
+            break;
+        n++;
+        if (n >= pHash->nSize)
+            n = 0;
+    }
+    OSL_ASSERT (pHash->pData[n] != 0);
+    if (pHash->pData[n] == NULL)
+        return;
+
+    pHash->pData[n++] = NULL;
+    pHash->nEntries--;
+
+    if (n >= pHash->nSize)
+        n = 0;
+
+    while ((pHashStr = pHash->pData[n]) != NULL) {
+        pHash->pData[n] = NULL;
+        // FIXME: rather unsophisticated and N^2 in chain-length, but robust.
+        rtl_str_hash_insert_nonequal (pHash, pHashStr);
+        n++;
+        if (n >= pHash->nSize)
+            n = 0;
+    }
+    // FIXME: Should we down-size ?
 }


More information about the ooo-build-commit mailing list