[Mesa-dev] [PATCH 3/4] util: Change util/set to use quadratic probing
Connor Abbott
cwabbott0 at gmail.com
Sun Mar 15 22:49:09 PDT 2015
Ok, so I think I managed to find the source of the bug. When inserting
elements into the set/hash table, we computed the initial probe
address *before* doing the rehash. In the case where inserting an
element led to a rehash, this meant that we'd use the wrong starting
address when actually inserting it, and then afterwards when we'd go
back and search for it we'd start at a different address => boom.
Fixing that makes the pubilc shader-db run without any errors, at
least for me. In addition, while examining the algorithm I found one
other thing that I've mentioned inline. Obviously, all these comments
apply equally to the hash table code as well, although I'm not sure
why the bug didn't affect it as well, or for that matter why we didn't
notice the bug with the old code as it also seems to have it. As Jason
said, hash table bugs are really terrible and tricky.
On Fri, Mar 13, 2015 at 6:37 PM, Thomas Helland
<thomashelland90 at gmail.com> wrote:
> The same rationale applies here as for the hash table.
> Power of two size should give better performance,
> and using the algorithm hash = sh + i/2 + i*i/2
> should result in only distinct hash values when hitting collisions.
> Should give a performance increase as we can do bitmasking instead
> of a modulo operation for fitting the hash in the address space.
> ---
> src/util/set.c | 103 ++++++++++++++++++++++++++++++---------------------------
> src/util/set.h | 1 -
> 2 files changed, 54 insertions(+), 50 deletions(-)
>
> diff --git a/src/util/set.c b/src/util/set.c
> index f01f869..8f0ad0d 100644
> --- a/src/util/set.c
> +++ b/src/util/set.c
> @@ -48,40 +48,46 @@
> uint32_t deleted_key_value;
> const void *deleted_key = &deleted_key_value;
>
> +/**
> + * We chose table sizes that's a power of two.
> + * This is computationally less expensive than primes.
> + * FNV-1a has good avalanche properties, so collision is not an issue.
> + * These tables are sized to have an extra 10% free to avoid
> + * exponential performance degradation as the hash table fills
> + */
> static const struct {
> - uint32_t max_entries, size, rehash;
> + uint32_t max_entries, size;
> } hash_sizes[] = {
> - { 2, 5, 3 },
> - { 4, 7, 5 },
> - { 8, 13, 11 },
> - { 16, 19, 17 },
> - { 32, 43, 41 },
> - { 64, 73, 71 },
> - { 128, 151, 149 },
> - { 256, 283, 281 },
> - { 512, 571, 569 },
> - { 1024, 1153, 1151 },
> - { 2048, 2269, 2267 },
> - { 4096, 4519, 4517 },
> - { 8192, 9013, 9011 },
> - { 16384, 18043, 18041 },
> - { 32768, 36109, 36107 },
> - { 65536, 72091, 72089 },
> - { 131072, 144409, 144407 },
> - { 262144, 288361, 288359 },
> - { 524288, 576883, 576881 },
> - { 1048576, 1153459, 1153457 },
> - { 2097152, 2307163, 2307161 },
> - { 4194304, 4613893, 4613891 },
> - { 8388608, 9227641, 9227639 },
> - { 16777216, 18455029, 18455027 },
> - { 33554432, 36911011, 36911009 },
> - { 67108864, 73819861, 73819859 },
> - { 134217728, 147639589, 147639587 },
> - { 268435456, 295279081, 295279079 },
> - { 536870912, 590559793, 590559791 },
> - { 1073741824, 1181116273, 1181116271 },
> - { 2147483648ul, 2362232233ul, 2362232231ul }
> + { 3, 4 },
> + { 7, 8 },
> + { 14, 16 },
> + { 28, 32 },
> + { 57, 64 },
> + { 115, 128 },
> + { 230, 256 },
> + { 460, 512 },
> + { 921, 1024 },
> + { 1843, 2048 },
> + { 3686, 4096 },
> + { 7372, 8192 },
> + { 14745, 16384 },
> + { 29491, 32768 },
> + { 58982, 65536 },
> + { 117964, 131072 },
> + { 235929, 262144 },
> + { 471859, 524288 },
> + { 943718, 1048576 },
> + { 1887436, 2097152 },
> + { 3774873, 4194304 },
> + { 7549747, 8388608 },
> + { 15099494, 16777216 },
> + { 30198988, 33554432 },
> + { 60397977, 67108864 },
> + { 120795955, 134217728 },
> + { 241591910, 268435456 },
> + { 483183820, 536870912 },
> + { 966367641, 1073741824 },
> + { 1932735283ul, 2147483648ul }
> };
>
> static int
> @@ -116,7 +122,6 @@ _mesa_set_create(void *mem_ctx,
>
> ht->size_index = 0;
> ht->size = hash_sizes[ht->size_index].size;
> - ht->rehash = hash_sizes[ht->size_index].rehash;
> ht->max_entries = hash_sizes[ht->size_index].max_entries;
> ht->key_hash_function = key_hash_function;
> ht->key_equals_function = key_equals_function;
> @@ -163,12 +168,12 @@ _mesa_set_destroy(struct set *ht, void (*delete_function)(struct set_entry *entr
> static struct set_entry *
> set_search(const struct set *ht, uint32_t hash, const void *key)
> {
> - uint32_t hash_address;
> + uint32_t start_hash_address = hash & (ht->size - 1);
> + uint32_t hash_address = start_hash_address;
> + // Start at 2, or we will match start_hash_address initially and bail
> + uint32_t quad_hash = 2;
>
> - hash_address = hash % ht->size;
> do {
> - uint32_t double_hash;
> -
> struct set_entry *entry = ht->table + hash_address;
>
> if (entry_is_free(entry)) {
> @@ -179,10 +184,10 @@ set_search(const struct set *ht, uint32_t hash, const void *key)
> }
> }
>
> - double_hash = 1 + hash % ht->rehash;
> -
> - hash_address = (hash_address + double_hash) % ht->size;
> - } while (hash_address != hash % ht->size);
> + hash_address = (start_hash_address + (quad_hash / 2) +
> + ((quad_hash * quad_hash) / 2)) & (ht->size - 1);
Here I think we could do (quad_hash + quad_hash * quad_hash) / 2
instead, and then we can start with quad_hash = 1 since we'll
correctly calculate start_hash_address + 1 and we'll actually get the
full range of addresses. Obviously this change needs to be applied to
the insert path too as well as both paths in the hash table code.
> + quad_hash++;
> + } while (hash_address != start_hash_address);
>
> return NULL;
> }
> @@ -225,7 +230,6 @@ set_rehash(struct set *ht, unsigned new_size_index)
> ht->table = table;
> ht->size_index = new_size_index;
> ht->size = hash_sizes[ht->size_index].size;
> - ht->rehash = hash_sizes[ht->size_index].rehash;
> ht->max_entries = hash_sizes[ht->size_index].max_entries;
> ht->entries = 0;
> ht->deleted_entries = 0;
> @@ -250,7 +254,9 @@ set_rehash(struct set *ht, unsigned new_size_index)
> static struct set_entry *
> set_add(struct set *ht, uint32_t hash, const void *key)
> {
> - uint32_t hash_address;
> + uint32_t start_hash_address = hash & (ht->size - 1);
> + uint32_t hash_address = start_hash_address;
Here's the bug; we need to initialize these two variables after the
calls to set_rehash() below.
> + uint32_t quad_hash = 2;
> struct set_entry *available_entry = NULL;
>
> if (ht->entries >= ht->max_entries) {
> @@ -259,10 +265,8 @@ set_add(struct set *ht, uint32_t hash, const void *key)
> set_rehash(ht, ht->size_index);
> }
>
> - hash_address = hash % ht->size;
> do {
> struct set_entry *entry = ht->table + hash_address;
> - uint32_t double_hash;
>
> if (!entry_is_present(entry)) {
> /* Stash the first available entry we find */
> @@ -288,10 +292,11 @@ set_add(struct set *ht, uint32_t hash, const void *key)
> return entry;
> }
>
> - double_hash = 1 + hash % ht->rehash;
> + hash_address = (start_hash_address + (quad_hash / 2) +
> + ((quad_hash * quad_hash) / 2)) & (ht->size - 1);
> + quad_hash++;
> + } while (hash_address != start_hash_address);
>
> - hash_address = (hash_address + double_hash) % ht->size;
> - } while (hash_address != hash % ht->size);
>
> if (available_entry) {
> if (entry_is_deleted(available_entry))
> @@ -368,7 +373,7 @@ _mesa_set_random_entry(struct set *ht,
> int (*predicate)(struct set_entry *entry))
> {
> struct set_entry *entry;
> - uint32_t i = rand() % ht->size;
> + uint32_t i = rand() & (ht->size - 1);
>
> if (ht->entries == 0)
> return NULL;
> diff --git a/src/util/set.h b/src/util/set.h
> index 9acd2c2..5a1097c 100644
> --- a/src/util/set.h
> +++ b/src/util/set.h
> @@ -46,7 +46,6 @@ struct set {
> uint32_t (*key_hash_function)(const void *key);
> bool (*key_equals_function)(const void *a, const void *b);
> uint32_t size;
> - uint32_t rehash;
> uint32_t max_entries;
> uint32_t size_index;
> uint32_t entries;
> --
> 2.2.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list