[Mesa-dev] [PATCH 1/3] util: Change hash_table to use quadratic probing

Thomas Helland thomashelland90 at gmail.com
Sun Mar 29 11:05:38 PDT 2015


This should give better cache locality, less memory consumption,
less code, and should also be faster since we avoid a modulo operation.
Also change table size to be power of two.
This gives better performance as we can do bitmasking instead of
modulo operations for fitting the hash in the address space.
By using the algorithm hash = sh + i/2 + i*i/2
we are guaranteed that all retries from the quad probing
are distinct, and so should be able to completely fill the table.
This passes the test added to exercise a worst case collision scenario.
Also, start at size = 16 instead of 4.
This should reduce some allocation overhead
when constantly using tables larger than 3 entries.
The amount of space used before rehash is changed to 70%.
This should decrease collisions slightly, leading to better performance.

V3: Feedback from Connor Abbott
    - Remove hash_size table
    - Correct comment-style

    Feedback from Eric Anholt
    - Correct quadratic probing algorithm

    Feedback from Jason Ekstrand
    - Add "unreachable" if we fail to insert in table

Signed-off-by: Thomas Helland <thomashelland90 at gmail.com>
---
 src/util/hash_table.c | 108 +++++++++++++++++---------------------------------
 src/util/hash_table.h |   3 +-
 2 files changed, 38 insertions(+), 73 deletions(-)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 3247593..24184c0 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -33,11 +33,16 @@
  */
 
 /**
- * Implements an open-addressing, linear-reprobing hash table.
+ * Implements an open-addressing, quadratic probing hash table.
  *
- * For more information, see:
- *
- * http://cgit.freedesktop.org/~anholt/hash_table/tree/README
+ * We choose table sizes that's a power of two.
+ * This is computationally less expensive than primes.
+ * As a bonus the size and free space can be calculated instead of looked up.
+ * FNV-1a has good avalanche properties, so collision is not an issue.
+ * These tables are sized to have an extra 30% free to avoid
+ * exponential performance degradation as the hash table fills.
+ * The table has a starting size of 16 to avoid spamming
+ * rzalloc and friends in the start of most of our tables.
  */
 
 #include <stdlib.h>
@@ -50,47 +55,6 @@
 
 static const uint32_t deleted_key_value;
 
-/**
- * From Knuth -- a good choice for hash/rehash values is p, p-2 where
- * p and p-2 are both prime.  These tables are sized to have an extra 10%
- * free to avoid exponential performance degradation as the hash table fills
- */
-static const struct {
-   uint32_t max_entries, size, rehash;
-} hash_sizes[] = {
-   { 2,			5,		3	  },
-   { 4,			7,		5	  },
-   { 8,			13,		11	  },
-   { 16,		19,		17	  },
-   { 32,		43,		41        },
-   { 64,		73,		71        },
-   { 128,		151,		149       },
-   { 256,		283,		281       },
-   { 512,		571,		569       },
-   { 1024,		1153,		1151      },
-   { 2048,		2269,		2267      },
-   { 4096,		4519,		4517      },
-   { 8192,		9013,		9011      },
-   { 16384,		18043,		18041     },
-   { 32768,		36109,		36107     },
-   { 65536,		72091,		72089     },
-   { 131072,		144409,		144407    },
-   { 262144,		288361,		288359    },
-   { 524288,		576883,		576881    },
-   { 1048576,		1153459,	1153457   },
-   { 2097152,		2307163,	2307161   },
-   { 4194304,		4613893,	4613891   },
-   { 8388608,		9227641,	9227639   },
-   { 16777216,		18455029,	18455027  },
-   { 33554432,		36911011,	36911009  },
-   { 67108864,		73819861,	73819859  },
-   { 134217728,		147639589,	147639587 },
-   { 268435456,		295279081,	295279079 },
-   { 536870912,		590559793,	590559791 },
-   { 1073741824,	1181116273,	1181116271},
-   { 2147483648ul,	2362232233ul,	2362232231ul}
-};
-
 static int
 entry_is_free(const struct hash_entry *entry)
 {
@@ -121,10 +85,13 @@ _mesa_hash_table_create(void *mem_ctx,
    if (ht == NULL)
       return NULL;
 
-   ht->size_index = 0;
-   ht->size = hash_sizes[ht->size_index].size;
-   ht->rehash = hash_sizes[ht->size_index].rehash;
-   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   /* Start the table at an initial size of 16
+    * We use a bit more memory, but avoid spamming
+    * malloc and friends when starting a new table
+    */
+   ht->size_iteration = 4;
+   ht->size = 1 << ht->size_iteration;
+   ht->max_entries = ht->size * 0.7;
    ht->key_hash_function = key_hash_function;
    ht->key_equals_function = key_equals_function;
    ht->table = rzalloc_array(ht, struct hash_entry, ht->size);
@@ -182,12 +149,11 @@ _mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key)
 static struct hash_entry *
 hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
 {
-   uint32_t start_hash_address = hash % ht->size;
+   uint32_t start_hash_address = hash & (ht->size - 1);
    uint32_t hash_address = start_hash_address;
+   uint32_t quad_hash = 1;
 
    do {
-      uint32_t double_hash;
-
       struct hash_entry *entry = ht->table + hash_address;
 
       if (entry_is_free(entry)) {
@@ -198,9 +164,9 @@ hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
          }
       }
 
-      double_hash = 1 + hash % ht->rehash;
-
-      hash_address = (hash_address + double_hash) % ht->size;
+      hash_address = (start_hash_address +
+                (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+      quad_hash++;
    } while (hash_address != start_hash_address);
 
    return NULL;
@@ -232,26 +198,25 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
                   const void *key, void *data);
 
 static void
-_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index)
+_mesa_hash_table_rehash(struct hash_table *ht, uint32_t new_size_iteration)
 {
    struct hash_table old_ht;
    struct hash_entry *table, *entry;
 
-   if (new_size_index >= ARRAY_SIZE(hash_sizes))
+   if (new_size_iteration >= 31)
       return;
 
    table = rzalloc_array(ht, struct hash_entry,
-                         hash_sizes[new_size_index].size);
+                         1 << new_size_iteration);
    if (table == NULL)
       return;
 
    old_ht = *ht;
 
    ht->table = table;
-   ht->size_index = new_size_index;
-   ht->size = hash_sizes[ht->size_index].size;
-   ht->rehash = hash_sizes[ht->size_index].rehash;
-   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->size_iteration = new_size_iteration;
+   ht->size = 1 << new_size_iteration;
+   ht->max_entries = ht->size * 0.7;
    ht->entries = 0;
    ht->deleted_entries = 0;
 
@@ -267,19 +232,20 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
                   const void *key, void *data)
 {
    uint32_t start_hash_address, hash_address;
+   uint32_t quad_hash = 1;
    struct hash_entry *available_entry = NULL;
 
    if (ht->entries >= ht->max_entries) {
-      _mesa_hash_table_rehash(ht, ht->size_index + 1);
+      _mesa_hash_table_rehash(ht, ht->size_iteration + 1);
    } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
-      _mesa_hash_table_rehash(ht, ht->size_index);
+      _mesa_hash_table_rehash(ht, ht->size_iteration);
    }
 
-   start_hash_address = hash % ht->size;
+   start_hash_address = hash & (ht->size - 1);
    hash_address = start_hash_address;
+
    do {
       struct hash_entry *entry = ht->table + hash_address;
-      uint32_t double_hash;
 
       if (!entry_is_present(ht, entry)) {
          /* Stash the first available entry we find */
@@ -307,10 +273,9 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
          return entry;
       }
 
-
-      double_hash = 1 + hash % ht->rehash;
-
-      hash_address = (hash_address + double_hash) % ht->size;
+      hash_address = (start_hash_address +
+                (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+      quad_hash++;
    } while (hash_address != start_hash_address);
 
    if (available_entry) {
@@ -326,6 +291,7 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
    /* We could hit here if a required resize failed. An unchecked-malloc
     * application could ignore this result.
     */
+   unreachable("Failed to insert entry in hash table");
    return NULL;
 }
 
@@ -405,7 +371,7 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
                               bool (*predicate)(struct hash_entry *entry))
 {
    struct hash_entry *entry;
-   uint32_t i = rand() % ht->size;
+   uint32_t i = rand() & (ht->size - 1);
 
    if (ht->entries == 0)
       return NULL;
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index eb9dbc3..6b464f1 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -50,9 +50,8 @@ struct hash_table {
    bool (*key_equals_function)(const void *a, const void *b);
    const void *deleted_key;
    uint32_t size;
-   uint32_t rehash;
    uint32_t max_entries;
-   uint32_t size_index;
+   uint32_t size_iteration;
    uint32_t entries;
    uint32_t deleted_entries;
 };
-- 
2.3.4



More information about the mesa-dev mailing list