[Mesa-dev] [PATCH 3/3] util: Change util/set to use quadratic probing

Mon Apr 13 15:25:06 PDT 2015

On Mon, Apr 13, 2015 at 3:22 PM, Thomas Helland
<thomashelland90 at gmail.com> wrote:
>
> On 14 Apr 2015 00:05, "Jason Ekstrand" <jason at jlekstrand.net> wrote:
>>
>> On Sat, Apr 11, 2015 at 4:25 PM, Thomas Helland
>> <thomashelland90 at gmail.com> wrote:
>> > The same rationale applies here as for the hash table.
>> > Power of two size should give better performance,
>> > and using the algorithm hash = sh + i/2 + i*i/2
>> > should result in only distinct hash values when hitting collisions.
>> >
>> > Difference at 95.0% confidence
>> >      -7.9505 +/- 2.44011
>> >    -5.04357% +/- 1.54794%
>> >
>> > V3: Feedback from Eric Anholt
>> >    - Don't change load factor and starting size.
>> >
>> > V2: Feedback from Connor Abbott
>> >    - Don't set initial hash address before potential rehash
>> >    - Remove hash_sizes table
>> >    - Correct the quadratic hashing algorithm
>> >    - Use correct comment style
>> >
>> >    Feedback from Jason Ekstrand
>> >    - Use unreachable() to detect if we fail to insert
>> >
>> > Signed-off-by: Thomas Helland <thomashelland90 at gmail.com>
>> > ---
>> >  src/util/set.c | 118
>> > ++++++++++++++++++++-------------------------------------
>> >  src/util/set.h |   3 +-
>> >  2 files changed, 43 insertions(+), 78 deletions(-)
>> >
>> > diff --git a/src/util/set.c b/src/util/set.c
>> > index f01f869..7ff9520 100644
>> > --- a/src/util/set.c
>> > +++ b/src/util/set.c
>> > @@ -32,6 +32,17 @@
>> >   *    Keith Packard <keithp at keithp.com>
>> >   */
>> >
>> > +/**
>> > + * Implements an open-addressing, quadratic probing hash-set.
>> > + *
>> > + * We choose set sizes that's a power of two.
>> > + * This is computationally less expensive than primes.
>> > + * As a bonus the size and free space can be calculated instead of
>> > looked up.
>> > + * FNV-1a has good avalanche properties, so collision is not an issue.
>> > + * These sets are sized to have an extra 10% free to avoid
>> > + * exponential performance degradation as the set fills.
>> > + */
>> > +
>> >  #include <stdlib.h>
>> >  #include <assert.h>
>> >
>> > @@ -39,51 +50,9 @@
>> >  #include "ralloc.h"
>> >  #include "set.h"
>> >
>> > -/*
>> > - * From Knuth -- a good choice for hash/rehash values is p, p-2 where
>> > - * p and p-2 are both prime.  These tables are sized to have an extra
>> > 10%
>> > - * free to avoid exponential performance degradation as the hash table
>> > fills
>> > - */
>> > -
>> >  uint32_t deleted_key_value;
>> >  const void *deleted_key = &deleted_key_value;
>> >
>> > -static const struct {
>> > -   uint32_t max_entries, size, rehash;
>> > -} hash_sizes[] = {
>> > -   { 2,            5,            3            },
>> > -   { 4,            7,            5            },
>> > -   { 8,            13,           11           },
>> > -   { 16,           19,           17           },
>> > -   { 32,           43,           41           },
>> > -   { 64,           73,           71           },
>> > -   { 128,          151,          149          },
>> > -   { 256,          283,          281          },
>> > -   { 512,          571,          569          },
>> > -   { 1024,         1153,         1151         },
>> > -   { 2048,         2269,         2267         },
>> > -   { 4096,         4519,         4517         },
>> > -   { 8192,         9013,         9011         },
>> > -   { 16384,        18043,        18041        },
>> > -   { 32768,        36109,        36107        },
>> > -   { 65536,        72091,        72089        },
>> > -   { 131072,       144409,       144407       },
>> > -   { 262144,       288361,       288359       },
>> > -   { 524288,       576883,       576881       },
>> > -   { 1048576,      1153459,      1153457      },
>> > -   { 2097152,      2307163,      2307161      },
>> > -   { 4194304,      4613893,      4613891      },
>> > -   { 8388608,      9227641,      9227639      },
>> > -   { 16777216,     18455029,     18455027     },
>> > -   { 33554432,     36911011,     36911009     },
>> > -   { 67108864,     73819861,     73819859     },
>> > -   { 134217728,    147639589,    147639587    },
>> > -   { 268435456,    295279081,    295279079    },
>> > -   { 536870912,    590559793,    590559791    },
>> > -   { 1073741824,   1181116273,   1181116271   },
>> > -   { 2147483648ul, 2362232233ul, 2362232231ul }
>> > -};
>> > -
>> >  static int
>> >  entry_is_free(struct set_entry *entry)
>> >  {
>> > @@ -114,10 +83,9 @@ _mesa_set_create(void *mem_ctx,
>> >     if (ht == NULL)
>> >        return NULL;
>> >
>> > -   ht->size_index = 0;
>> > -   ht->size = hash_sizes[ht->size_index].size;
>> > -   ht->rehash = hash_sizes[ht->size_index].rehash;
>> > -   ht->max_entries = hash_sizes[ht->size_index].max_entries;
>> > +   ht->size_iteration = 2;
>> > +   ht->size = 1 << ht->size_iteration;
>> > +   ht->max_entries = ht->size * 0.9;
>> >     ht->key_hash_function = key_hash_function;
>> >     ht->key_equals_function = key_equals_function;
>> >     ht->table = rzalloc_array(ht, struct set_entry, ht->size);
>> > @@ -163,12 +131,11 @@ _mesa_set_destroy(struct set *ht, void
>> > (*delete_function)(struct set_entry *entr
>> >  static struct set_entry *
>> >  set_search(const struct set *ht, uint32_t hash, const void *key)
>> >  {
>> > -   uint32_t hash_address;
>> > +   uint32_t start_hash_address = hash & (ht->size - 1);
>> > +   uint32_t hash_address = start_hash_address;
>> > +   uint32_t quad_hash = 1;
>> >
>> > -   hash_address = hash % ht->size;
>> >     do {
>> > -      uint32_t double_hash;
>> > -
>> >        struct set_entry *entry = ht->table + hash_address;
>> >
>> >        if (entry_is_free(entry)) {
>> > @@ -179,10 +146,10 @@ set_search(const struct set *ht, uint32_t hash,
>> > const void *key)
>> >           }
>> >        }
>> >
>> > -      double_hash = 1 + hash % ht->rehash;
>> > -
>> > -      hash_address = (hash_address + double_hash) % ht->size;
>> > -   } while (hash_address != hash % ht->size);
>> > +      hash_address = (start_hash_address +
>> > +                (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size
>> > - 1);
>> > +      quad_hash++;
>> > +   } while (hash_address != start_hash_address);
>> >
>> >     return NULL;
>> >  }
>> > @@ -207,35 +174,30 @@ static struct set_entry *
>> >  set_add(struct set *ht, uint32_t hash, const void *key);
>> >
>> >  static void
>> > -set_rehash(struct set *ht, unsigned new_size_index)
>> > +set_rehash(struct set *ht, uint32_t new_size_iteration)
>> >  {
>> >     struct set old_ht;
>> >     struct set_entry *table, *entry;
>> >
>> > -   if (new_size_index >= ARRAY_SIZE(hash_sizes))
>> > +   if (new_size_iteration >= 31)
>> >        return;
>> >
>> >     table = rzalloc_array(ht, struct set_entry,
>> > -                         hash_sizes[new_size_index].size);
>> > +                         1 << new_size_iteration);
>> >     if (table == NULL)
>> >        return;
>> >
>> >     old_ht = *ht;
>> >
>> >     ht->table = table;
>> > -   ht->size_index = new_size_index;
>> > -   ht->size = hash_sizes[ht->size_index].size;
>> > -   ht->rehash = hash_sizes[ht->size_index].rehash;
>> > -   ht->max_entries = hash_sizes[ht->size_index].max_entries;
>> > +   ht->size_iteration = new_size_iteration;
>> > +   ht->size = 1 << new_size_iteration;
>> > +   ht->max_entries = ht->size * 0.7;
>> >     ht->entries = 0;
>> >     ht->deleted_entries = 0;
>> >
>> > -   for (entry = old_ht.table;
>> > -        entry != old_ht.table + old_ht.size;
>> > -        entry++) {
>> > -      if (entry_is_present(entry)) {
>> > -         set_add(ht, entry->hash, entry->key);
>> > -      }
>> > +   set_foreach(&old_ht, entry) {
>> > +      set_add(ht, entry->hash, entry->key);
>> >     }
>>
>> This hunk, while I think it's correct, is unrelated.  Please drop it.
>> I think I'm ok with the cleanup, but it should be a separate patch if
>> you want to do that.
>> --Jason
>>
>
> Yup, unrelated change it is.
> I'll split it in two and post both in one of the coming days.
>
> (This is how it is done in the hash table in current master.
> Following the same pattern in the set seemed desirable.)

Yup.  Sounds good to me.  Go ahead and put my r-b on the split-out
hunk assuming it's the same as above
--Jason

> - Thomas
>
>> >     ralloc_free(old_ht.table);
>> > @@ -250,19 +212,21 @@ set_rehash(struct set *ht, unsigned
>> > new_size_index)
>> >  static struct set_entry *
>> >  set_add(struct set *ht, uint32_t hash, const void *key)
>> >  {
>> > -   uint32_t hash_address;
>> > +   uint32_t start_hash_address, hash_address;
>> > +   uint32_t quad_hash = 1;
>> >     struct set_entry *available_entry = NULL;
>> >
>> >     if (ht->entries >= ht->max_entries) {
>> > -      set_rehash(ht, ht->size_index + 1);
>> > +      set_rehash(ht, ht->size_iteration + 1);
>> >     } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
>> > -      set_rehash(ht, ht->size_index);
>> > +      set_rehash(ht, ht->size_iteration);
>> >     }
>> >
>> > -   hash_address = hash % ht->size;
>> > +   start_hash_address = hash & (ht->size - 1);
>> > +   hash_address = start_hash_address;
>> > +
>> >     do {
>> >        struct set_entry *entry = ht->table + hash_address;
>> > -      uint32_t double_hash;
>> >
>> >        if (!entry_is_present(entry)) {
>> >           /* Stash the first available entry we find */
>> > @@ -288,10 +252,11 @@ set_add(struct set *ht, uint32_t hash, const void
>> > *key)
>> >           return entry;
>> >        }
>> >
>> > -      double_hash = 1 + hash % ht->rehash;
>> > +      hash_address = (start_hash_address +
>> > +                (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size
>> > - 1);
>> > +      quad_hash++;
>> > +   } while (hash_address != start_hash_address);
>> >
>> > -      hash_address = (hash_address + double_hash) % ht->size;
>> > -   } while (hash_address != hash % ht->size);
>> >
>> >     if (available_entry) {
>> >        if (entry_is_deleted(available_entry))
>> > @@ -305,6 +270,7 @@ set_add(struct set *ht, uint32_t hash, const void
>> > *key)
>> >     /* We could hit here if a required resize failed. An
>> > unchecked-malloc
>> >      * application could ignore this result.
>> >      */
>> > +   unreachable("Failed to insert entry in hash set");
>> >     return NULL;
>> >  }
>> >
>> > @@ -368,7 +334,7 @@ _mesa_set_random_entry(struct set *ht,
>> >                         int (*predicate)(struct set_entry *entry))
>> >  {
>> >     struct set_entry *entry;
>> > -   uint32_t i = rand() % ht->size;
>> > +   uint32_t i = rand() & (ht->size - 1);
>> >
>> >     if (ht->entries == 0)
>> >        return NULL;
>> > diff --git a/src/util/set.h b/src/util/set.h
>> > index 9acd2c2..fa17d3a 100644
>> > --- a/src/util/set.h
>> > +++ b/src/util/set.h
>> > @@ -46,9 +46,8 @@ struct set {
>> >     uint32_t (*key_hash_function)(const void *key);
>> >     bool (*key_equals_function)(const void *a, const void *b);
>> >     uint32_t size;
>> > -   uint32_t rehash;
>> >     uint32_t max_entries;
>> > -   uint32_t size_index;
>> > +   uint32_t size_iteration;
>> >     uint32_t entries;
>> >     uint32_t deleted_entries;
>> >  };
>> > --
>> > 2.3.4
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev