[Mesa-dev] [PATCH 2/4] util: Change hash_table to use quadratic probing
Thomas Helland
thomashelland90 at gmail.com
Wed Feb 8 20:35:38 UTC 2017
This will allow us to remove the large static table and use a
power of two hash table size that we can compute on the fly.
We can use bitmasking instead of modulo to fit our hash in the table,
and it's less code.
By using the algorithm hash = sh + i/2 + i*i/2
we are guaranteed that all retries from the quad probing
are distinct, and so we should be able to completely fill the table.
This has been verified separately by Eric Anholt and Jason Ekstrand.
This passes the test added to exercise a worst case collision scenario.
V4: Feedback from Eric Anholt
- Don't change load factor or starting size.
V3: Feedback from Connor Abbott
- Remove hash_size table
- Correct comment-style
Feedback from Eric Anholt
- Correct quadratic probing algorithm
Feedback from Jason Ekstrand
- Add "unreachable" if we fail to insert in table
---
src/util/hash_table.c | 102 +++++++++++++++-----------------------------------
src/util/hash_table.h | 3 +-
2 files changed, 32 insertions(+), 73 deletions(-)
diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index 9e643af8b2..a93326ec27 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -33,11 +33,14 @@
*/
/**
- * Implements an open-addressing, linear-reprobing hash table.
+ * Implements an open-addressing, quadratic probing hash table.
*
- * For more information, see:
- *
- * http://cgit.freedesktop.org/~anholt/hash_table/tree/README
+ * We choose table sizes that's a power of two.
+ * This is computationally less expensive than primes.
+ * As a bonus the size and free space can be calculated instead of looked up.
+ * FNV-1a has good avalanche properties, so collision is not an issue.
+ * These tables are sized to have an extra 10% free to avoid
+ * exponential performance degradation as the hash table fills.
*/
#include <stdlib.h>
@@ -50,47 +53,6 @@
static const uint32_t deleted_key_value;
-/**
- * From Knuth -- a good choice for hash/rehash values is p, p-2 where
- * p and p-2 are both prime. These tables are sized to have an extra 10%
- * free to avoid exponential performance degradation as the hash table fills
- */
-static const struct {
- uint32_t max_entries, size, rehash;
-} hash_sizes[] = {
- { 2, 5, 3 },
- { 4, 7, 5 },
- { 8, 13, 11 },
- { 16, 19, 17 },
- { 32, 43, 41 },
- { 64, 73, 71 },
- { 128, 151, 149 },
- { 256, 283, 281 },
- { 512, 571, 569 },
- { 1024, 1153, 1151 },
- { 2048, 2269, 2267 },
- { 4096, 4519, 4517 },
- { 8192, 9013, 9011 },
- { 16384, 18043, 18041 },
- { 32768, 36109, 36107 },
- { 65536, 72091, 72089 },
- { 131072, 144409, 144407 },
- { 262144, 288361, 288359 },
- { 524288, 576883, 576881 },
- { 1048576, 1153459, 1153457 },
- { 2097152, 2307163, 2307161 },
- { 4194304, 4613893, 4613891 },
- { 8388608, 9227641, 9227639 },
- { 16777216, 18455029, 18455027 },
- { 33554432, 36911011, 36911009 },
- { 67108864, 73819861, 73819859 },
- { 134217728, 147639589, 147639587 },
- { 268435456, 295279081, 295279079 },
- { 536870912, 590559793, 590559791 },
- { 1073741824, 1181116273, 1181116271},
- { 2147483648ul, 2362232233ul, 2362232231ul}
-};
-
static int
entry_is_free(const struct hash_entry *entry)
{
@@ -121,10 +83,9 @@ _mesa_hash_table_create(void *mem_ctx,
if (ht == NULL)
return NULL;
- ht->size_index = 0;
- ht->size = hash_sizes[ht->size_index].size;
- ht->rehash = hash_sizes[ht->size_index].rehash;
- ht->max_entries = hash_sizes[ht->size_index].max_entries;
+ ht->size_iteration = 2;
+ ht->size = 1 << ht->size_iteration;
+ ht->max_entries = ht->size * 0.9;
ht->key_hash_function = key_hash_function;
ht->key_equals_function = key_equals_function;
ht->table = rzalloc_array(ht, struct hash_entry, ht->size);
@@ -208,12 +169,11 @@ _mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key)
static struct hash_entry *
hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
{
- uint32_t start_hash_address = hash % ht->size;
+ uint32_t start_hash_address = hash & (ht->size - 1);
uint32_t hash_address = start_hash_address;
+ uint32_t quad_hash = 1;
do {
- uint32_t double_hash;
-
struct hash_entry *entry = ht->table + hash_address;
if (entry_is_free(entry)) {
@@ -224,9 +184,9 @@ hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
}
}
- double_hash = 1 + hash % ht->rehash;
-
- hash_address = (hash_address + double_hash) % ht->size;
+ hash_address = (start_hash_address +
+ (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+ quad_hash++;
} while (hash_address != start_hash_address);
return NULL;
@@ -258,26 +218,25 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
const void *key, void *data);
static void
-_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index)
+_mesa_hash_table_rehash(struct hash_table *ht, uint32_t new_size_iteration)
{
struct hash_table old_ht;
struct hash_entry *table, *entry;
- if (new_size_index >= ARRAY_SIZE(hash_sizes))
+ if (new_size_iteration >= 31)
return;
table = rzalloc_array(ht, struct hash_entry,
- hash_sizes[new_size_index].size);
+ 1 << new_size_iteration);
if (table == NULL)
return;
old_ht = *ht;
ht->table = table;
- ht->size_index = new_size_index;
- ht->size = hash_sizes[ht->size_index].size;
- ht->rehash = hash_sizes[ht->size_index].rehash;
- ht->max_entries = hash_sizes[ht->size_index].max_entries;
+ ht->size_iteration = new_size_iteration;
+ ht->size = 1 << new_size_iteration;
+ ht->max_entries = ht->size * 0.7;
ht->entries = 0;
ht->deleted_entries = 0;
@@ -293,21 +252,22 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
const void *key, void *data)
{
uint32_t start_hash_address, hash_address;
+ uint32_t quad_hash = 1;
struct hash_entry *available_entry = NULL;
assert(key != NULL);
if (ht->entries >= ht->max_entries) {
- _mesa_hash_table_rehash(ht, ht->size_index + 1);
+ _mesa_hash_table_rehash(ht, ht->size_iteration + 1);
} else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
- _mesa_hash_table_rehash(ht, ht->size_index);
+ _mesa_hash_table_rehash(ht, ht->size_iteration);
}
- start_hash_address = hash % ht->size;
+ start_hash_address = hash & (ht->size - 1);
hash_address = start_hash_address;
+
do {
struct hash_entry *entry = ht->table + hash_address;
- uint32_t double_hash;
if (!entry_is_present(ht, entry)) {
/* Stash the first available entry we find */
@@ -336,10 +296,9 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
return entry;
}
-
- double_hash = 1 + hash % ht->rehash;
-
- hash_address = (hash_address + double_hash) % ht->size;
+ hash_address = (start_hash_address +
+ (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+ quad_hash++;
} while (hash_address != start_hash_address);
if (available_entry) {
@@ -355,6 +314,7 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
/* We could hit here if a required resize failed. An unchecked-malloc
* application could ignore this result.
*/
+ unreachable("Failed to insert entry in hash table");
return NULL;
}
@@ -434,7 +394,7 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
bool (*predicate)(struct hash_entry *entry))
{
struct hash_entry *entry;
- uint32_t i = rand() % ht->size;
+ uint32_t i = rand() & (ht->size - 1);
if (ht->entries == 0)
return NULL;
diff --git a/src/util/hash_table.h b/src/util/hash_table.h
index b35ee871bb..fa0e241b03 100644
--- a/src/util/hash_table.h
+++ b/src/util/hash_table.h
@@ -50,9 +50,8 @@ struct hash_table {
bool (*key_equals_function)(const void *a, const void *b);
const void *deleted_key;
uint32_t size;
- uint32_t rehash;
uint32_t max_entries;
- uint32_t size_index;
+ uint32_t size_iteration;
uint32_t entries;
uint32_t deleted_entries;
};
--
2.11.1
More information about the mesa-dev
mailing list