[Mesa-dev] [PATCH 01/18] util: Add initial pointer map implementation

Thomas Helland thomashelland90 at gmail.com
Wed Apr 11 18:48:10 UTC 2018


The motivation is that for the common case of pointers as keys the
current hash table implementation has multiple disadvantages.
It stores the hash, which means we get more memory usage than
is strictly necessary. It also compares both the hash, and the
pointer against the key when searching, when simply comparing
the pointer is enough and just as cheap. Also, it has a very
cache unfriendly reprobing algorithm.

This implementation adresses all of these issue, plus more.
It uses a table of size 2^n, meaning we can simply do mask of bits
instead of computing an expensive modulo when inserting or searching
the table for entries. It also uses linear probing for cache locality.
It also has the nice effect that the CPU should be more likely to be
able to do speculative execution. To further improve cache locality
it takes a trick from the talk "Designing a Fast, Efficient,
cache-friendly Hash Table, Step by Steap" from the 2017 CppCon
held by Matt Kulundis; it stores the metadata separate from the
stored data. The way this is done is that it allocates one byte
per entry, uses 7 bits to store the lower bits of the hash,
and uses the last bit to indicate if the slot is empty. The net
result is a space saving of 7/24ths, along with a much improved
cache friendliness. This can be further improved by using SSE
instructions for processing a large number of entries at the time
but I found that to be too platform specific, so I left it out.
One can argue if the cache penalty of storing the hash in a
separate array, and having to swap cache lines to acquire the
key is as much a penalty as the gain from reduced memory usage.
I should probably swap this implementation for one that just
removes the storage of the hash, and see how that fares. That
would be similar to what is done for the set later in this series.

V2:  Use bitmask instead of modulo as map is always size 2^n

V3: Use some of the saved space to lower the load factor in map

This will reduce the length of clusters, effectively giving us shorter
probing lengths both on insertion and search. This should not affect
cache locality, as the only potential change would be that we find a
free slot more often, and that means we're done with the loop. The only
case where this hurts us in a negative way is when iterating the hash
table as we will need to iterate more entries.
---
 src/util/meson.build   |   2 +
 src/util/pointer_map.c | 323 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/util/pointer_map.h |  95 +++++++++++++++
 3 files changed, 420 insertions(+)
 create mode 100644 src/util/pointer_map.c
 create mode 100644 src/util/pointer_map.h

diff --git a/src/util/meson.build b/src/util/meson.build
index eece1cefef..9b50647f34 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -48,6 +48,8 @@ files_mesa_util = files(
   'mesa-sha1.h',
   'os_time.c',
   'os_time.h',
+  'pointer_map.c',
+  'pointer_map.h',
   'sha1/sha1.c',
   'sha1/sha1.h',
   'ralloc.c',
diff --git a/src/util/pointer_map.c b/src/util/pointer_map.c
new file mode 100644
index 0000000000..8076bd827f
--- /dev/null
+++ b/src/util/pointer_map.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * Implements a linear probing hash table specifically for pointer keys.
+ * It uses a separate metadata array for good cache locality when searching.
+ * The metadata array is an array of bytes, where the seven LSB stores a hash,
+ * and the first bit stores whether the entry is free. An important detail is
+ * that the bit being
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "pointer_map.h"
+#include "ralloc.h"
+#include "macros.h"
+
+static inline uint8_t
+get_hash(uint8_t *metadata)
+{
+   return *metadata & 0x7F;
+}
+
+static inline void
+set_hash(uint8_t *metadata, uint32_t hash)
+{
+   *metadata = (*metadata & ~0x7F) | (((uint8_t) hash) & 0x7F);
+}
+
+static inline bool
+entry_is_free(uint8_t *metadata)
+{
+   return !(*metadata >> 7);
+}
+
+static inline void
+set_occupied(uint8_t *metadata, bool occupied)
+{
+   *metadata = occupied ? *metadata | 0x80 : *metadata & 0x7F;
+}
+
+static inline uint32_t
+hash_pointer(const void *pointer)
+{
+   uintptr_t num = (uintptr_t) pointer;
+   return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14));
+}
+
+static bool
+entry_is_deleted(struct pointer_map *map, uint8_t *metadata)
+{
+   if (get_hash(metadata) != 0)
+      return false;
+
+   return map->map[metadata - map->metadata].key == NULL;
+}
+
+struct pointer_map *
+_mesa_pointer_map_create(void *mem_ctx)
+{
+   struct pointer_map *map;
+
+   map = ralloc(mem_ctx, struct pointer_map);
+   if (map == NULL)
+      return NULL;
+
+   map->size = 1 << 4;
+   map->max_entries = map->size * 0.6;
+   map->map = rzalloc_array(map, struct map_entry, map->size);
+   map->metadata = rzalloc_array(map, uint8_t, map->size);
+   map->entries = 0;
+   map->deleted_entries = 0;
+
+   if (map->map == NULL) {
+      ralloc_free(map);
+      return NULL;
+   }
+
+   return map;
+}
+
+/**
+ * Frees the pointer map.
+ */
+void
+_mesa_pointer_map_destroy(struct pointer_map *map,
+                          void (*delete_function)(struct map_entry *entry))
+{
+   if (!map)
+      return;
+
+   if (delete_function) {
+      struct map_entry *entry;
+
+      _mesa_pointer_map_foreach(map, entry) {
+         delete_function(entry);
+      }
+   }
+
+   ralloc_free(map);
+}
+
+/**
+ * Deletes all entries of the given pointer map without deleting the map
+ * itself or changing its structure.
+ */
+void
+_mesa_pointer_map_clear(struct pointer_map *map)
+{
+   memset(map->metadata, 0, map->size * sizeof(uint8_t));
+   memset(map->map, 0, sizeof(struct map_entry) * map->size);
+   map->entries = 0;
+   map->deleted_entries = 0;
+}
+
+/**
+ * Finds a hash table entry with the given key and hash of that key.
+ *
+ * Returns NULL if no entry is found.  Note that the data pointer may be
+ * modified by the user.
+ */
+struct map_entry *
+_mesa_pointer_map_search(struct pointer_map *map, const void *key)
+{
+   uint32_t hash = hash_pointer(key);
+   uint32_t start_hash_address = hash & (map->size - 1);
+   uint32_t hash_address = start_hash_address;
+
+   do {
+      uint8_t *metadata = map->metadata + hash_address;
+
+      if (entry_is_free(metadata)) {
+         return NULL;
+      } else if (get_hash(metadata) == (hash & 0x7F)) {
+         if (map->map[hash_address].key == key) {
+            return &map->map[hash_address];
+         }
+      }
+
+      hash_address = (hash_address + 1) & (map->size - 1);
+   } while (hash_address != start_hash_address);
+
+   return NULL;
+}
+
+static void
+_mesa_pointer_map_rehash(struct pointer_map *map, unsigned new_size)
+{
+   struct pointer_map old_map;
+   struct map_entry *map_entries, *entry;
+   uint8_t *metadatas;
+
+   old_map = *map;
+
+   map->size = new_size;
+   map->max_entries = map->size*0.6;
+   
+   map_entries = rzalloc_array(map, struct map_entry, map->size);
+   if (map_entries == NULL)
+      return;
+
+   metadatas = rzalloc_array(map, uint8_t, map->size);
+   if (metadatas == NULL)
+      return;
+
+   map->map = map_entries;
+   map->metadata = metadatas;
+   map->entries = 0;
+   map->deleted_entries = 0;
+
+   _mesa_pointer_map_foreach(&old_map, entry) {
+      _mesa_pointer_map_insert(map, entry->key, entry->data);
+   }
+
+   ralloc_free(old_map.map);
+   ralloc_free(old_map.metadata);
+}
+
+/**
+ * Inserts the key into the table. Note that insertion may rearrange the
+ * table on a resize or rehash, so previously found hash_entries are no
+ * longer valid after this function.
+ */
+struct map_entry *
+_mesa_pointer_map_insert(struct pointer_map *map, const void *key, void *data)
+{
+   uint32_t start_hash_address, hash_address, hash;
+   uint8_t *available_entry = NULL;
+   assert(key != NULL);
+
+   if (map->entries >= map->max_entries) {
+      _mesa_pointer_map_rehash(map, map->size * 2);
+   } else if (map->deleted_entries + map->entries >= map->max_entries) {
+      _mesa_pointer_map_rehash(map, map->size);
+   }
+
+   hash = hash_pointer(key);
+   start_hash_address = hash & (map->size - 1);
+   hash_address = start_hash_address;
+
+   do {
+      uint8_t *entry = map->metadata + hash_address;
+
+      if (entry_is_free(entry)) {
+         /* If we have not found a deleted entry yet, then we want
+          * to take the free slot at the end of the group
+          */
+         if (available_entry == NULL)
+            available_entry = entry;
+         break;
+      }
+
+      /* Implement replacement when another insert happens
+       * with a matching key.  This is a relatively common
+       * feature of hash tables, with the alternative
+       * generally being "insert the new value as well, and
+       * return it first when the key is searched for".
+       *
+       * Note that the pointer map doesn't have a delete
+       * callback.  If freeing of old data pointers is
+       * required to avoid memory leaks, perform a search
+       * before inserting.
+       */
+      if (get_hash(entry) == (hash & 0x7F) &&
+          map->map[hash_address].key == key) {
+         set_hash(entry, hash);
+         map->map[hash_address].data = data;
+         set_occupied(entry, true);
+         return &map->map[hash_address];
+      }
+
+      // Stash the first deleted entry in the group
+      if (entry_is_deleted(map, entry) &&
+          available_entry == NULL) {
+            available_entry = entry;
+      }
+
+      hash_address = (hash_address + 1) & (map->size - 1);
+   } while (hash_address != start_hash_address);
+
+   if (available_entry) {
+      if (entry_is_deleted(map, available_entry))
+         map->deleted_entries--;
+      set_hash(available_entry, hash);
+      set_occupied(available_entry, true);
+      map->map[hash_address].key = key;
+      map->map[hash_address].data = data;
+      map->entries++;
+      return &map->map[hash_address];
+   }
+
+   /* We could hit here if a required resize failed. An unchecked-malloc
+    * application could ignore this result.
+    */
+   return NULL;
+}
+
+/**
+ * This function deletes the given pointer map entry.
+ *
+ * Note that deletion doesn't otherwise modify the table, so an iteration over
+ * the table deleting entries is safe.
+ */
+void
+_mesa_pointer_map_remove(struct pointer_map *map,
+                        struct map_entry *entry)
+{
+   if (!entry)
+      return;
+
+   entry->key = NULL;
+   set_hash(map->metadata + (entry - map->map), 0);
+   map->entries--;
+   map->deleted_entries++;
+}
+
+/**
+ * This function is an iterator over the pointer map.
+ *
+ * Pass in NULL for the first entry, as in the start of a for loop.  Note that
+ * an iteration over the map is O(table_size) not O(entries).
+ */
+struct map_entry *
+_mesa_pointer_map_next_entry(struct pointer_map *map,
+                             struct map_entry *entry)
+{
+   if (entry == NULL)
+      entry = map->map;
+   else
+      entry = entry + 1;
+
+   for (; entry != map->map + map->size; entry++) {
+      if (entry->key) {
+         return entry;
+      }
+   }
+
+   return NULL;
+}
diff --git a/src/util/pointer_map.h b/src/util/pointer_map.h
new file mode 100644
index 0000000000..e1cef418d8
--- /dev/null
+++ b/src/util/pointer_map.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _POINTER_MAP_H
+#define _POINTER_MAP_H
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include "c99_compat.h"
+#include "macros.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct map_entry {
+   const void *key;
+   void *data;
+};
+
+struct pointer_map {
+   struct map_entry *map;
+   uint8_t *metadata;
+
+   const void *deleted_key;
+
+   uint32_t size;
+   uint32_t max_entries;
+   uint32_t entries;
+   uint32_t deleted_entries;
+};
+
+struct pointer_map *
+_mesa_pointer_map_create(void *mem_ctx);
+
+void _mesa_pointer_map_destroy(struct pointer_map *map,
+                               void (*delete_function)(struct map_entry *entry));
+
+void _mesa_pointer_map_clear(struct pointer_map *map);
+
+static inline uint32_t _mesa_pointer_map_num_entries(struct pointer_map *map)
+{
+   return map->entries;
+}
+
+struct map_entry *
+_mesa_pointer_map_insert(struct pointer_map *map, const void *key, void *data);
+
+struct map_entry *
+_mesa_pointer_map_search(struct pointer_map *map, const void *key);
+
+void _mesa_pointer_map_remove(struct pointer_map *map,
+                             struct map_entry *entry);
+
+struct map_entry *
+_mesa_pointer_map_next_entry(struct pointer_map *map,
+                             struct map_entry *entry);
+
+/**
+ * This foreach function is safe against deletion (which just replaces
+ * an entry's data with the deleted marker), but not against insertion
+ * (which may rehash the table, making entry a dangling pointer).
+ */
+#define _mesa_pointer_map_foreach(map, entry)                   \
+   for (entry = _mesa_pointer_map_next_entry(map, NULL);  \
+        entry != NULL;                                  \
+        entry = _mesa_pointer_map_next_entry(map, entry))
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif /* _POINTER_MAP_H */
-- 
2.16.2



More information about the mesa-dev mailing list