[Mesa-dev] [PATCH v2 7/7] vbo: cache/memoize the result of vbo_get_minmax_indices

Nicolai Hähnle nhaehnle at gmail.com
Fri Jan 8 09:02:59 PST 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Some games developers are unaware that an index buffer in a VBO still needs
to be read by the CPU if some varying data comes from a user pointer (unless
glDrawRangeElements and friends are used). This is particularly bad when
they tell us that the index buffer should live in VRAM.

This cache helps, e.g. lifting This War Of Mine (a particularly bad
offender) from under 10fps to slightly over 20fps on a Carrizo.

Note that there is nothing prohibiting a user from rendering from multiple
threads simultaneously with the same index buffer, hence the locking. (The
internal buffer map taken for the buffer still leads to a race, but at least
the locks are a move in the right direction.)

v2: disable the cache on USAGE_TEXTURE_BUFFER as well (Chris Forbes)
---
This should be correct if a bit conservative if stores aren't used
(ARB_texture_buffer is older than ARB_shader_image_load_store), but that's
not worth losing sleep over.

 src/mesa/main/bufferobj.c       |  10 +++
 src/mesa/main/mtypes.h          |   4 +
 src/mesa/vbo/vbo.h              |   3 +
 src/mesa/vbo/vbo_minmax_index.c | 164 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 181 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index d88d9e3..a113cac 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -458,6 +458,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx,
 {
    (void) ctx;
 
+   vbo_delete_minmax_cache(bufObj);
    _mesa_align_free(bufObj->Data);
 
    /* assign strange values here to help w/ debugging */
@@ -1528,6 +1529,7 @@ _mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj,
 
    bufObj->Written = GL_TRUE;
    bufObj->Immutable = GL_TRUE;
+   bufObj->MinMaxCacheDirty = GL_TRUE;
 
    assert(ctx->Driver.BufferData);
    if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
@@ -1641,6 +1643,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
    FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);
 
    bufObj->Written = GL_TRUE;
+   bufObj->MinMaxCacheDirty = GL_TRUE;
 
 #ifdef VBO_DEBUG
    printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n",
@@ -1753,6 +1756,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
    }
 
    bufObj->Written = GL_TRUE;
+   bufObj->MinMaxCacheDirty = GL_TRUE;
 
    assert(ctx->Driver.BufferSubData);
    ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj);
@@ -1872,6 +1876,8 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,
    if (size == 0)
       return;
 
+   bufObj->MinMaxCacheDirty = GL_TRUE;
+
    if (data == NULL) {
       /* clear to zeros, per the spec */
       ctx->Driver.ClearBufferSubData(ctx, offset, size,
@@ -2285,6 +2291,8 @@ _mesa_copy_buffer_sub_data(struct gl_context *ctx,
       }
    }
 
+   dst->MinMaxCacheDirty = GL_TRUE;
+
    ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size);
 }
 
@@ -2494,6 +2502,8 @@ _mesa_map_buffer_range(struct gl_context *ctx,
 
       if (access & GL_MAP_PERSISTENT_BIT)
          bufObj->UsageHistory |= USAGE_PERSISTENT_WRITE_MAP;
+
+      bufObj->MinMaxCacheDirty = GL_TRUE;
    }
 
 #ifdef VBO_DEBUG
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 5b87c7c..37a088b 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1283,6 +1283,10 @@ struct gl_buffer_object
    GLuint NumMapBufferWriteCalls;
 
    struct gl_buffer_mapping Mappings[MAP_COUNT];
+
+   /** Memoization of min/max index computations for static index buffers */
+   struct hash_table *MinMaxCache;
+   GLboolean MinMaxCacheDirty;
 };
 
 
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 0b8b6a9..6494aa5 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -181,6 +181,9 @@ vbo_sizeof_ib_type(GLenum type)
 }
 
 void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
+
+void
 vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,
                        const struct _mesa_index_buffer *ib,
                        GLuint *min_index, GLuint *max_index, GLuint nr_prims);
diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c
index b43ed98..87dc777 100644
--- a/src/mesa/vbo/vbo_minmax_index.c
+++ b/src/mesa/vbo/vbo_minmax_index.c
@@ -32,6 +32,163 @@
 #include "main/macros.h"
 #include "main/sse_minmax.h"
 #include "x86/common_x86_asm.h"
+#include "util/hash_table.h"
+
+
+struct minmax_cache_key {
+   GLintptr offset;
+   GLuint count;
+   GLenum type;
+};
+
+
+struct minmax_cache_entry {
+   struct minmax_cache_key key;
+   GLuint min;
+   GLuint max;
+};
+
+
+static uint32_t
+vbo_minmax_cache_hash(const struct minmax_cache_key *key)
+{
+   return _mesa_hash_data(key, sizeof(*key));
+}
+
+
+static bool
+vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
+                           const struct minmax_cache_key *b)
+{
+   return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
+}
+
+
+static void
+vbo_minmax_cache_delete_entry(struct hash_entry *entry)
+{
+   free(entry->data);
+}
+
+
+static GLboolean
+vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+   if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
+                                  USAGE_ATOMIC_COUNTER_BUFFER |
+                                  USAGE_SHADER_STORAGE_BUFFER |
+                                  USAGE_TRANSFORM_FEEDBACK_BUFFER |
+                                  USAGE_PIXEL_PACK_BUFFER |
+                                  USAGE_PERSISTENT_WRITE_MAP))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+   _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+   bufferObj->MinMaxCache = NULL;
+}
+
+
+static GLboolean
+vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
+                      GLenum type, GLintptr offset, GLuint count,
+                      GLuint *min_index, GLuint *max_index)
+{
+   GLboolean found = GL_FALSE;
+   struct minmax_cache_key key;
+   uint32_t hash;
+   struct hash_entry *result;
+
+   if (!bufferObj->MinMaxCache)
+      return GL_FALSE;
+   if (!vbo_use_minmax_cache(bufferObj))
+      return GL_FALSE;
+
+   mtx_lock(&bufferObj->Mutex);
+
+   if (bufferObj->MinMaxCacheDirty) {
+      _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+      bufferObj->MinMaxCacheDirty = GL_FALSE;
+      goto out;
+   }
+
+   key.type = type;
+   key.offset = offset;
+   key.count = count;
+   hash = vbo_minmax_cache_hash(&key);
+   result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
+   if (result) {
+      struct minmax_cache_entry *entry = result->data;
+      *min_index = entry->min;
+      *max_index = entry->max;
+      found = GL_TRUE;
+   }
+
+out:
+   mtx_unlock(&bufferObj->Mutex);
+   return found;
+}
+
+
+static void
+vbo_minmax_cache_store(struct gl_context *ctx,
+                       struct gl_buffer_object *bufferObj,
+                       GLenum type, GLintptr offset, GLuint count,
+                       GLuint min, GLuint max)
+{
+   struct minmax_cache_entry *entry;
+   struct hash_entry *table_entry;
+   uint32_t hash;
+
+   if (!vbo_use_minmax_cache(bufferObj))
+      return;
+
+   mtx_lock(&bufferObj->Mutex);
+
+   if (!bufferObj->MinMaxCache) {
+      bufferObj->MinMaxCache =
+         _mesa_hash_table_create(NULL,
+                                 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
+                                 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
+      if (!bufferObj->MinMaxCache)
+         goto out;
+   }
+
+   entry = MALLOC_STRUCT(minmax_cache_entry);
+   if (!entry)
+      goto out;
+
+   entry->key.offset = offset;
+   entry->key.count = count;
+   entry->key.type = type;
+   entry->min = min;
+   entry->max = max;
+   hash = vbo_minmax_cache_hash(&entry->key);
+
+   table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
+                                                    hash, &entry->key);
+   if (table_entry) {
+      /* It seems like this could happen when two contexts are rendering using
+       * the same buffer object from multiple threads.
+       */
+      _mesa_debug(ctx, "duplicate entry in minmax cache\n");
+      free(entry);
+      goto out;
+   }
+
+   table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
+                                                    hash, &entry->key, entry);
+   if (!table_entry)
+      free(entry);
+
+out:
+   mtx_unlock(&bufferObj->Mutex);
+}
 
 
 /**
@@ -56,6 +213,11 @@ vbo_get_minmax_index(struct gl_context *ctx,
    indices = (char *) ib->ptr + prim->start * index_size;
    if (_mesa_is_bufferobj(ib->obj)) {
       GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
+
+      if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
+                                min_index, max_index))
+         return;
+
       indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
                                            GL_MAP_READ_BIT, ib->obj,
                                            MAP_INTERNAL);
@@ -139,6 +301,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
    }
 
    if (_mesa_is_bufferobj(ib->obj)) {
+      vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
+                             *min_index, *max_index);
       ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
    }
 }
-- 
2.5.0



More information about the mesa-dev mailing list