[Mesa-dev] [PATCH 7/7] vbo: cache/memoize the result of vbo_get_minmax_indices
Nicolai Hähnle
nhaehnle at gmail.com
Fri Jan 8 07:17:15 PST 2016
On 07.01.2016 23:13, Chris Forbes wrote:
> I think this misses the image load/store case. (*samplerBuffer)
Good catch, thanks!
Cheers,
Nicolai
>
> - Chris
>
> From: Nicolai Hähnle <nicolai.haehnle at amd.com
> <mailto:nicolai.haehnle at amd.com>>
>
> Some games developers are unaware that an index buffer in a VBO still needs
> to be read by the CPU if some varying data comes from a user pointer (unless
> glDrawRangeElements and friends are used). This is particularly bad when
> they tell us that the index buffer should live in VRAM.
>
> This cache helps, e.g. lifting This War Of Mine (a particularly bad
> offender) from under 10fps to slightly over 20fps on a Carrizo.
>
> Note that there is nothing prohibiting a user from rendering from multiple
> threads simultaneously with the same index buffer, hence the locking. (The
> internal buffer map taken for the buffer still leads to a race, but at least
> the locks are a move in the right direction.)
> ---
> src/mesa/main/bufferobj.c | 10 +++
> src/mesa/main/mtypes.h | 4 +
> src/mesa/vbo/vbo.h | 3 +
> src/mesa/vbo/vbo_minmax_index.c | 163
> ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 180 insertions(+)
>
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index b06f528..f431bb8 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -453,6 +453,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx,
> {
> (void) ctx;
>
> + vbo_delete_minmax_cache(bufObj);
> _mesa_align_free(bufObj->Data);
>
> /* assign strange values here to help w/ debugging */
> @@ -1513,6 +1514,7 @@ _mesa_buffer_storage(struct gl_context *ctx,
> struct gl_buffer_object *bufObj,
>
> bufObj->Written = GL_TRUE;
> bufObj->Immutable = GL_TRUE;
> + bufObj->MinMaxCacheDirty = GL_TRUE;
>
> assert(ctx->Driver.BufferData);
> if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
> @@ -1626,6 +1628,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct
> gl_buffer_object *bufObj,
> FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);
>
> bufObj->Written = GL_TRUE;
> + bufObj->MinMaxCacheDirty = GL_TRUE;
>
> #ifdef VBO_DEBUG
> printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n",
> @@ -1738,6 +1741,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx,
> struct gl_buffer_object *bufObj,
> }
>
> bufObj->Written = GL_TRUE;
> + bufObj->MinMaxCacheDirty = GL_TRUE;
>
> assert(ctx->Driver.BufferSubData);
> ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj);
> @@ -1857,6 +1861,8 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,
> if (size == 0)
> return;
>
> + bufObj->MinMaxCacheDirty = GL_TRUE;
> +
> if (data == NULL) {
> /* clear to zeros, per the spec */
> ctx->Driver.ClearBufferSubData(ctx, offset, size,
> @@ -2270,6 +2276,8 @@ _mesa_copy_buffer_sub_data(struct gl_context *ctx,
> }
> }
>
> + dst->MinMaxCacheDirty = GL_TRUE;
> +
> ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset,
> writeOffset, size);
> }
>
> @@ -2479,6 +2487,8 @@ _mesa_map_buffer_range(struct gl_context *ctx,
>
> if (access & GL_MAP_PERSISTENT_BIT)
> bufObj->UsageHistory |= USAGE_PERSISTENT_WRITE_MAP;
> +
> + bufObj->MinMaxCacheDirty = GL_TRUE;
> }
>
> #ifdef VBO_DEBUG
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 4d625da..d4c41a7 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1283,6 +1283,10 @@ struct gl_buffer_object
> GLuint NumMapBufferWriteCalls;
>
> struct gl_buffer_mapping Mappings[MAP_COUNT];
> +
> + /** Memoization of min/max index computations for static index
> buffers */
> + struct hash_table *MinMaxCache;
> + GLboolean MinMaxCacheDirty;
> };
>
>
> diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
> index dd9b428..59c7351 100644
> --- a/src/mesa/vbo/vbo.h
> +++ b/src/mesa/vbo/vbo.h
> @@ -169,6 +169,9 @@ vbo_sizeof_ib_type(GLenum type)
> }
>
> void
> +vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
> +
> +void
> vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim
> *prim,
> const struct _mesa_index_buffer *ib,
> GLuint *min_index, GLuint *max_index, GLuint
> nr_prims);
> diff --git a/src/mesa/vbo/vbo_minmax_index.c
> b/src/mesa/vbo/vbo_minmax_index.c
> index b43ed98..9ac0168 100644
> --- a/src/mesa/vbo/vbo_minmax_index.c
> +++ b/src/mesa/vbo/vbo_minmax_index.c
> @@ -32,6 +32,162 @@
> #include "main/macros.h"
> #include "main/sse_minmax.h"
> #include "x86/common_x86_asm.h"
> +#include "util/hash_table.h"
> +
> +
> +struct minmax_cache_key {
> + GLintptr offset;
> + GLuint count;
> + GLenum type;
> +};
> +
> +
> +struct minmax_cache_entry {
> + struct minmax_cache_key key;
> + GLuint min;
> + GLuint max;
> +};
> +
> +
> +static uint32_t
> +vbo_minmax_cache_hash(const struct minmax_cache_key *key)
> +{
> + return _mesa_hash_data(key, sizeof(*key));
> +}
> +
> +
> +static bool
> +vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
> + const struct minmax_cache_key *b)
> +{
> + return (a->offset == b->offset) && (a->count == b->count) &&
> (a->type == b->type);
> +}
> +
> +
> +static void
> +vbo_minmax_cache_delete_entry(struct hash_entry *entry)
> +{
> + free(entry->data);
> +}
> +
> +
> +static GLboolean
> +vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
> +{
> + if (bufferObj->UsageHistory & (USAGE_PIXEL_PACK_BUFFER |
> + USAGE_ATOMIC_COUNTER_BUFFER |
> + USAGE_SHADER_STORAGE_BUFFER |
> + USAGE_TRANSFORM_FEEDBACK_BUFFER |
> + USAGE_PERSISTENT_WRITE_MAP))
> + return GL_FALSE;
> +
> + return GL_TRUE;
> +}
> +
> +
> +void
> +vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
> +{
> + _mesa_hash_table_destroy(bufferObj->MinMaxCache,
> vbo_minmax_cache_delete_entry);
> + bufferObj->MinMaxCache = NULL;
> +}
> +
> +
> +static GLboolean
> +vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
> + GLenum type, GLintptr offset, GLuint count,
> + GLuint *min_index, GLuint *max_index)
> +{
> + GLboolean found = GL_FALSE;
> + struct minmax_cache_key key;
> + uint32_t hash;
> + struct hash_entry *result;
> +
> + if (!bufferObj->MinMaxCache)
> + return GL_FALSE;
> + if (!vbo_use_minmax_cache(bufferObj))
> + return GL_FALSE;
> +
> + mtx_lock(&bufferObj->Mutex);
> +
> + if (bufferObj->MinMaxCacheDirty) {
> + _mesa_hash_table_clear(bufferObj->MinMaxCache,
> vbo_minmax_cache_delete_entry);
> + bufferObj->MinMaxCacheDirty = GL_FALSE;
> + goto out;
> + }
> +
> + key.type = type;
> + key.offset = offset;
> + key.count = count;
> + hash = vbo_minmax_cache_hash(&key);
> + result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
> hash, &key);
> + if (result) {
> + struct minmax_cache_entry *entry = result->data;
> + *min_index = entry->min;
> + *max_index = entry->max;
> + found = GL_TRUE;
> + }
> +
> +out:
> + mtx_unlock(&bufferObj->Mutex);
> + return found;
> +}
> +
> +
> +static void
> +vbo_minmax_cache_store(struct gl_context *ctx,
> + struct gl_buffer_object *bufferObj,
> + GLenum type, GLintptr offset, GLuint count,
> + GLuint min, GLuint max)
> +{
> + struct minmax_cache_entry *entry;
> + struct hash_entry *table_entry;
> + uint32_t hash;
> +
> + if (!vbo_use_minmax_cache(bufferObj))
> + return;
> +
> + mtx_lock(&bufferObj->Mutex);
> +
> + if (!bufferObj->MinMaxCache) {
> + bufferObj->MinMaxCache =
> + _mesa_hash_table_create(NULL,
> + (uint32_t (*)(const void
> *))vbo_minmax_cache_hash,
> + (bool (*)(const void *, const void
> *))vbo_minmax_cache_key_equal);
> + if (!bufferObj->MinMaxCache)
> + goto out;
> + }
> +
> + entry = MALLOC_STRUCT(minmax_cache_entry);
> + if (!entry)
> + goto out;
> +
> + entry->key.offset = offset;
> + entry->key.count = count;
> + entry->key.type = type;
> + entry->min = min;
> + entry->max = max;
> + hash = vbo_minmax_cache_hash(&entry->key);
> +
> + table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
> + hash, &entry->key);
> + if (table_entry) {
> + /* It seems like this could happen when two contexts are
> rendering using
> + * the same buffer object from multiple threads.
> + */
> + _mesa_debug(ctx, "duplicate entry in minmax cache\n");
> + free(entry);
> + goto out;
> + }
> +
> + table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
> + hash, &entry->key,
> entry);
> + if (!table_entry)
> + free(entry);
> +
> +out:
> + mtx_unlock(&bufferObj->Mutex);
> +}
>
>
> /**
> @@ -56,6 +212,11 @@ vbo_get_minmax_index(struct gl_context *ctx,
> indices = (char *) ib->ptr + prim->start * index_size;
> if (_mesa_is_bufferobj(ib->obj)) {
> GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
> +
> + if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices,
> count,
> + min_index, max_index))
> + return;
> +
> indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
> GL_MAP_READ_BIT, ib->obj,
> MAP_INTERNAL);
> @@ -139,6 +300,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
> }
>
> if (_mesa_is_bufferobj(ib->obj)) {
> + vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
> + *min_index, *max_index);
> ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
> }
> }
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list