[Mesa-dev] [PATCH 2/4] util: Change hash_table to use quadratic probing

Connor Abbott cwabbott0 at gmail.com
Sat Mar 14 22:34:59 PDT 2015


On Sun, Mar 15, 2015 at 1:25 AM, Connor Abbott <cwabbott0 at gmail.com> wrote:
> On Fri, Mar 13, 2015 at 6:37 PM, Thomas Helland
> <thomashelland90 at gmail.com> wrote:
>> This should give better cache locality, less memory consumption,
>> and should also be faster since we avoid a modulo operation.
>> Also change table size to be power of two.
>> This gives better performance as we can do bitmasking instead of
>> modulo operations for fitting the hash in the address space.
>> By using the algorithm hash = sh + i/2 + i*i/2
>> ee are guaranteed that all retries from the quad probing
>> are distinct, and so should be able to completely fill the table.
>> This passes the test added to exercise a worst case collision scenario.
>> ---
>>  src/util/hash_table.c | 101 +++++++++++++++++++++++++-------------------------
>>  src/util/hash_table.h |   1 -
>>  2 files changed, 50 insertions(+), 52 deletions(-)
>>
>> diff --git a/src/util/hash_table.c b/src/util/hash_table.c
>> index 3247593..92ffc10 100644
>> --- a/src/util/hash_table.c
>> +++ b/src/util/hash_table.c
>> @@ -33,7 +33,7 @@
>>   */
>>
>>  /**
>> - * Implements an open-addressing, linear-reprobing hash table.
>> + * Implements an open-addressing, quadratic probing hash table.
>>   *
>>   * For more information, see:
>>   *
>> @@ -51,44 +51,45 @@
>>  static const uint32_t deleted_key_value;
>>
>>  /**
>> - * From Knuth -- a good choice for hash/rehash values is p, p-2 where
>> - * p and p-2 are both prime.  These tables are sized to have an extra 10%
>> - * free to avoid exponential performance degradation as the hash table fills
>> + * We chose table sizes that's a power of two.
>> + * This is computationally less expensive than primes.
>> + * FNV-1a has good avalanche properties, so collision is not an issue.
>> + * These tables are sized to have an extra 10% free to avoid
>> + * exponential performance degradation as the hash table fills
>>   */
>>  static const struct {
>> -   uint32_t max_entries, size, rehash;
>> +   uint32_t max_entries, size;
>>  } hash_sizes[] = {
>> -   { 2,                        5,              3         },
>> -   { 4,                        7,              5         },
>> -   { 8,                        13,             11        },
>> -   { 16,               19,             17        },
>> -   { 32,               43,             41        },
>> -   { 64,               73,             71        },
>> -   { 128,              151,            149       },
>> -   { 256,              283,            281       },
>> -   { 512,              571,            569       },
>> -   { 1024,             1153,           1151      },
>> -   { 2048,             2269,           2267      },
>> -   { 4096,             4519,           4517      },
>> -   { 8192,             9013,           9011      },
>> -   { 16384,            18043,          18041     },
>> -   { 32768,            36109,          36107     },
>> -   { 65536,            72091,          72089     },
>> -   { 131072,           144409,         144407    },
>> -   { 262144,           288361,         288359    },
>> -   { 524288,           576883,         576881    },
>> -   { 1048576,          1153459,        1153457   },
>> -   { 2097152,          2307163,        2307161   },
>> -   { 4194304,          4613893,        4613891   },
>> -   { 8388608,          9227641,        9227639   },
>> -   { 16777216,         18455029,       18455027  },
>> -   { 33554432,         36911011,       36911009  },
>> -   { 67108864,         73819861,       73819859  },
>> -   { 134217728,                147639589,      147639587 },
>> -   { 268435456,                295279081,      295279079 },
>> -   { 536870912,                590559793,      590559791 },
>> -   { 1073741824,       1181116273,     1181116271},
>> -   { 2147483648ul,     2362232233ul,   2362232231ul}
>> +      { 3,            4            },
>
> Jason and I were talking about this on IRC, and he mentioned that
> having to rehash after only inserting 3 elements is really hurting us
> when it comes to use/def sets, and it doesn't really make much sense
> when ralloc already adds a lot of overhead. I know this patch is
> touching the hash table and not the hash set, but I suspect that
> bumping the default starting size a little higher (to, say, 14/16)
> might help here too and when we modify util/set.c we'll want the two
> things to be consistent.

And I just realized you do bump it to 6/8 later... /me needs to learn
to read good :) Have you tried bumping it a little more? Of course, at
some point there's a tradeoff to be made with increasing memory
usage...

>
> Also, now that these values are just powers of 2, could we compute
> these values on the fly? It seems to me like the extra few cycles
> aren't going to be worth the dcache pollution you're going to get from
> constantly looking them up.
>
>> +      { 7,            8            },
>> +      { 14,           16           },
>> +      { 28,           32           },
>> +      { 57,           64           },
>> +      { 115,          128          },
>> +      { 230,          256          },
>> +      { 460,          512          },
>> +      { 921,          1024         },
>> +      { 1843,         2048         },
>> +      { 3686,         4096         },
>> +      { 7372,         8192         },
>> +      { 14745,        16384        },
>> +      { 29491,        32768        },
>> +      { 58982,        65536        },
>> +      { 117964,       131072       },
>> +      { 235929,       262144       },
>> +      { 471859,       524288       },
>> +      { 943718,       1048576      },
>> +      { 1887436,      2097152      },
>> +      { 3774873,      4194304      },
>> +      { 7549747,      8388608      },
>> +      { 15099494,     16777216     },
>> +      { 30198988,     33554432     },
>> +      { 60397977,     67108864     },
>> +      { 120795955,    134217728    },
>> +      { 241591910,    268435456    },
>> +      { 483183820,    536870912    },
>> +      { 966367641,    1073741824   },
>> +      { 1932735283ul, 2147483648ul }
>>  };
>>
>>  static int
>> @@ -123,7 +124,6 @@ _mesa_hash_table_create(void *mem_ctx,
>>
>>     ht->size_index = 0;
>>     ht->size = hash_sizes[ht->size_index].size;
>> -   ht->rehash = hash_sizes[ht->size_index].rehash;
>>     ht->max_entries = hash_sizes[ht->size_index].max_entries;
>>     ht->key_hash_function = key_hash_function;
>>     ht->key_equals_function = key_equals_function;
>> @@ -182,12 +182,12 @@ _mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key)
>>  static struct hash_entry *
>>  hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
>>  {
>> -   uint32_t start_hash_address = hash % ht->size;
>> +   uint32_t start_hash_address = hash & (ht->size - 1);
>>     uint32_t hash_address = start_hash_address;
>> +   // Start at 2, or we will match start_hash_address initially and bail
>
> Mesa, including the rest of this file, uses /* */ style comments everywhere.
>
>> +   uint32_t quad_hash = 2;
>>
>>     do {
>> -      uint32_t double_hash;
>> -
>>        struct hash_entry *entry = ht->table + hash_address;
>>
>>        if (entry_is_free(entry)) {
>> @@ -198,9 +198,9 @@ hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
>>           }
>>        }
>>
>> -      double_hash = 1 + hash % ht->rehash;
>> -
>> -      hash_address = (hash_address + double_hash) % ht->size;
>> +      hash_address = (start_hash_address + (quad_hash / 2) +
>> +                      ((quad_hash * quad_hash) / 2)) & (ht->size - 1);
>> +      quad_hash++;
>>     } while (hash_address != start_hash_address);
>>
>>     return NULL;
>> @@ -250,7 +250,6 @@ _mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index)
>>     ht->table = table;
>>     ht->size_index = new_size_index;
>>     ht->size = hash_sizes[ht->size_index].size;
>> -   ht->rehash = hash_sizes[ht->size_index].rehash;
>>     ht->max_entries = hash_sizes[ht->size_index].max_entries;
>>     ht->entries = 0;
>>     ht->deleted_entries = 0;
>> @@ -267,6 +266,7 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
>>                    const void *key, void *data)
>>  {
>>     uint32_t start_hash_address, hash_address;
>> +   uint32_t quad_hash = 2;
>>     struct hash_entry *available_entry = NULL;
>>
>>     if (ht->entries >= ht->max_entries) {
>> @@ -275,11 +275,11 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
>>        _mesa_hash_table_rehash(ht, ht->size_index);
>>     }
>>
>> -   start_hash_address = hash % ht->size;
>> +   start_hash_address = hash & (ht->size - 1);
>>     hash_address = start_hash_address;
>> +
>>     do {
>>        struct hash_entry *entry = ht->table + hash_address;
>> -      uint32_t double_hash;
>>
>>        if (!entry_is_present(ht, entry)) {
>>           /* Stash the first available entry we find */
>> @@ -307,10 +307,9 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
>>           return entry;
>>        }
>>
>> -
>> -      double_hash = 1 + hash % ht->rehash;
>> -
>> -      hash_address = (hash_address + double_hash) % ht->size;
>> +      hash_address = (start_hash_address + (quad_hash / 2) +
>> +                      ((quad_hash * quad_hash) / 2)) & (ht->size - 1);
>> +      quad_hash++;
>>     } while (hash_address != start_hash_address);
>>
>>     if (available_entry) {
>> @@ -405,7 +404,7 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
>>                                bool (*predicate)(struct hash_entry *entry))
>>  {
>>     struct hash_entry *entry;
>> -   uint32_t i = rand() % ht->size;
>> +   uint32_t i = rand() & (ht->size - 1);
>>
>>     if (ht->entries == 0)
>>        return NULL;
>> diff --git a/src/util/hash_table.h b/src/util/hash_table.h
>> index eb9dbc3..6d41338 100644
>> --- a/src/util/hash_table.h
>> +++ b/src/util/hash_table.h
>> @@ -50,7 +50,6 @@ struct hash_table {
>>     bool (*key_equals_function)(const void *a, const void *b);
>>     const void *deleted_key;
>>     uint32_t size;
>> -   uint32_t rehash;
>>     uint32_t max_entries;
>>     uint32_t size_index;
>>     uint32_t entries;
>> --
>> 2.2.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list