[Mesa-dev] [PATCH 2/7] util: Change hash_table to use quadratic probing

Thomas Helland thomashelland90 at gmail.com
Tue Feb 21 20:53:32 UTC 2017


This will allow us to remove the large static table and use a
power of two hash table size that we can compute on the fly.
We can use bitmasking instead of modulo to fit our hash in the table,
and it's less code.

By using the algorithm hash = sh + i/2 + i*i/2
we are guaranteed that all retries from the quad probing
are distinct, and so we should be able to completely fill the table.
This has been verified separately by Eric Anholt and Jason Ekstrand.
This passes the test added to exercise a worst case collision scenario.

This also exhibits much better collision avoidance performance than
the existing implementation; the worst case collision I've found by
running shader-db on a subset of my shader-collection is 78 runs of
the double hashing loop before insetion succeeded. With this
implementation the worst case is 25. With the existing implementation
99% of insetions had succeeded after 10 retries, while with the new
implementation that is dropped to 7. 99.5% of insertions succeeded after
13 retries, while with the new implementation that number is down to 8-9.
While the new implementation performs better on collisions in general,
the biggest difference is that the old one uses more retries to insert
an entry when it exhibits collisions. While the new implementation sees
12506 insertions that have not found a location after 10 retries,
that drops to only 74 after 20 retries. The old implementation sees
13864 insertions not having a spot after 16 retries. 1370 of these
have still not found a spot in the table after 26 retries.

V4: Feedback from Eric Anholt
   - Don't change load factor or starting size.

V3: Feedback from Connor Abbott
   - Remove hash_size table
   - Correct comment-style

   Feedback from Eric Anholt
   - Correct quadratic probing algorithm

   Feedback from Jason Ekstrand
   - Add "unreachable" if we fail to insert in table
---
 src/util/hash_table.c | 102 +++++++++++++++-----------------------------------
 src/util/hash_table.h |   3 +-
 2 files changed, 32 insertions(+), 73 deletions(-)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 9e643af8b2..a93326ec27 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -33,11 +33,14 @@
  */
 
 /**
- * Implements an open-addressing, linear-reprobing hash table.
+ * Implements an open-addressing, quadratic probing hash table.
  *
- * For more information, see:
- *
- * http://cgit.freedesktop.org/~anholt/hash_table/tree/README
+ * We choose table sizes that's a power of two.
+ * This is computationally less expensive than primes.
+ * As a bonus the size and free space can be calculated instead of looked up.
+ * FNV-1a has good avalanche properties, so collision is not an issue.
+ * These tables are sized to have an extra 10% free to avoid
+ * exponential performance degradation as the hash table fills.
  */
 
 #include <stdlib.h>
@@ -50,47 +53,6 @@
 
 static const uint32_t deleted_key_value;
 
-/**
- * From Knuth -- a good choice for hash/rehash values is p, p-2 where
- * p and p-2 are both prime.  These tables are sized to have an extra 10%
- * free to avoid exponential performance degradation as the hash table fills
- */
-static const struct {
-   uint32_t max_entries, size, rehash;
-} hash_sizes[] = {
-   { 2,			5,		3	  },
-   { 4,			7,		5	  },
-   { 8,			13,		11	  },
-   { 16,		19,		17	  },
-   { 32,		43,		41        },
-   { 64,		73,		71        },
-   { 128,		151,		149       },
-   { 256,		283,		281       },
-   { 512,		571,		569       },
-   { 1024,		1153,		1151      },
-   { 2048,		2269,		2267      },
-   { 4096,		4519,		4517      },
-   { 8192,		9013,		9011      },
-   { 16384,		18043,		18041     },
-   { 32768,		36109,		36107     },
-   { 65536,		72091,		72089     },
-   { 131072,		144409,		144407    },
-   { 262144,		288361,		288359    },
-   { 524288,		576883,		576881    },
-   { 1048576,		1153459,	1153457   },
-   { 2097152,		2307163,	2307161   },
-   { 4194304,		4613893,	4613891   },
-   { 8388608,		9227641,	9227639   },
-   { 16777216,		18455029,	18455027  },
-   { 33554432,		36911011,	36911009  },
-   { 67108864,		73819861,	73819859  },
-   { 134217728,		147639589,	147639587 },
-   { 268435456,		295279081,	295279079 },
-   { 536870912,		590559793,	590559791 },
-   { 1073741824,	1181116273,	1181116271},
-   { 2147483648ul,	2362232233ul,	2362232231ul}
-};
-
 static int
 entry_is_free(const struct hash_entry *entry)
 {
@@ -121,10 +83,9 @@ _mesa_hash_table_create(void *mem_ctx,
    if (ht == NULL)
       return NULL;
 
-   ht->size_index = 0;
-   ht->size = hash_sizes[ht->size_index].size;
-   ht->rehash = hash_sizes[ht->size_index].rehash;
-   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->size_iteration = 2;
+   ht->size = 1 << ht->size_iteration;
+   ht->max_entries = ht->size * 0.9;
    ht->key_hash_function = key_hash_function;
    ht->key_equals_function = key_equals_function;
    ht->table = rzalloc_array(ht, struct hash_entry, ht->size);
@@ -208,12 +169,11 @@ _mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key)
 static struct hash_entry *
 hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
 {
-   uint32_t start_hash_address = hash % ht->size;
+   uint32_t start_hash_address = hash & (ht->size - 1);
    uint32_t hash_address = start_hash_address;
+   uint32_t quad_hash = 1;
 
    do {
-      uint32_t double_hash;
-
       struct hash_entry *entry = ht->table + hash_address;
 
       if (entry_is_free(entry)) {
@@ -224,9 +184,9 @@ hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
          }
       }
 
-      double_hash = 1 + hash % ht->rehash;
-
-      hash_address = (hash_address + double_hash) % ht->size;
+      hash_address = (start_hash_address +
+                (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+      quad_hash++;
    } while (hash_address != start_hash_address);
 
    return NULL;
@@ -258,26 +218,25 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
                   const void *key, void *data);
 
 static void
-_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index)
+_mesa_hash_table_rehash(struct hash_table *ht, uint32_t new_size_iteration)
 {
    struct hash_table old_ht;
    struct hash_entry *table, *entry;
 
-   if (new_size_index >= ARRAY_SIZE(hash_sizes))
+   if (new_size_iteration >= 31)
       return;
 
    table = rzalloc_array(ht, struct hash_entry,
-                         hash_sizes[new_size_index].size);
+                         1 << new_size_iteration);
    if (table == NULL)
       return;
 
    old_ht = *ht;
 
    ht->table = table;
-   ht->size_index = new_size_index;
-   ht->size = hash_sizes[ht->size_index].size;
-   ht->rehash = hash_sizes[ht->size_index].rehash;
-   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->size_iteration = new_size_iteration;
+   ht->size = 1 << new_size_iteration;
+   ht->max_entries = ht->size * 0.7;
    ht->entries = 0;
    ht->deleted_entries = 0;
 
@@ -293,21 +252,22 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
                   const void *key, void *data)
 {
    uint32_t start_hash_address, hash_address;
+   uint32_t quad_hash = 1;
    struct hash_entry *available_entry = NULL;
 
    assert(key != NULL);
 
    if (ht->entries >= ht->max_entries) {
-      _mesa_hash_table_rehash(ht, ht->size_index + 1);
+      _mesa_hash_table_rehash(ht, ht->size_iteration + 1);
    } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
-      _mesa_hash_table_rehash(ht, ht->size_index);
+      _mesa_hash_table_rehash(ht, ht->size_iteration);
    }
 
-   start_hash_address = hash % ht->size;
+   start_hash_address = hash & (ht->size - 1);
    hash_address = start_hash_address;
+
    do {
       struct hash_entry *entry = ht->table + hash_address;
-      uint32_t double_hash;
 
       if (!entry_is_present(ht, entry)) {
          /* Stash the first available entry we find */
@@ -336,10 +296,9 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
          return entry;
       }
 
-
-      double_hash = 1 + hash % ht->rehash;
-
-      hash_address = (hash_address + double_hash) % ht->size;
+      hash_address = (start_hash_address +
+                (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+      quad_hash++;
    } while (hash_address != start_hash_address);
 
    if (available_entry) {
@@ -355,6 +314,7 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
    /* We could hit here if a required resize failed. An unchecked-malloc
     * application could ignore this result.
     */
+   unreachable("Failed to insert entry in hash table");
    return NULL;
 }
 
@@ -434,7 +394,7 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
                               bool (*predicate)(struct hash_entry *entry))
 {
    struct hash_entry *entry;
-   uint32_t i = rand() % ht->size;
+   uint32_t i = rand() & (ht->size - 1);
 
    if (ht->entries == 0)
       return NULL;
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index b35ee871bb..fa0e241b03 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -50,9 +50,8 @@ struct hash_table {
    bool (*key_equals_function)(const void *a, const void *b);
    const void *deleted_key;
    uint32_t size;
-   uint32_t rehash;
    uint32_t max_entries;
-   uint32_t size_index;
+   uint32_t size_iteration;
    uint32_t entries;
    uint32_t deleted_entries;
 };
-- 
2.11.1



More information about the mesa-dev mailing list