<p dir="ltr">I think this misses the image load/store case. (*samplerBuffer)</p>
<p dir="ltr">- Chris</p>
<div class="gmail_quot<blockquote class=" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com">nicolai.haehnle@amd.com</a>><br>
<br>
Some games developers are unaware that an index buffer in a VBO still needs<br>
to be read by the CPU if some varying data comes from a user pointer (unless<br>
glDrawRangeElements and friends are used). This is particularly bad when<br>
they tell us that the index buffer should live in VRAM.<br>
<br>
This cache helps, e.g. lifting This War Of Mine (a particularly bad<br>
offender) from under 10fps to slightly over 20fps on a Carrizo.<br>
<br>
Note that there is nothing prohibiting a user from rendering from multiple<br>
threads simultaneously with the same index buffer, hence the locking. (The<br>
internal buffer map taken for the buffer still leads to a race, but at least<br>
the locks are a move in the right direction.)<br>
---<br>
src/mesa/main/bufferobj.c | 10 +++<br>
src/mesa/main/mtypes.h | 4 +<br>
src/mesa/vbo/vbo.h | 3 +<br>
src/mesa/vbo/vbo_minmax_index.c | 163 ++++++++++++++++++++++++++++++++++++++++<br>
4 files changed, 180 insertions(+)<br>
<br>
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c<br>
index b06f528..f431bb8 100644<br>
--- a/src/mesa/main/bufferobj.c<br>
+++ b/src/mesa/main/bufferobj.c<br>
@@ -453,6 +453,7 @@ _mesa_delete_buffer_object(struct gl_context *ctx,<br>
{<br>
(void) ctx;<br>
<br>
+ vbo_delete_minmax_cache(bufObj);<br>
_mesa_align_free(bufObj->Data);<br>
<br>
/* assign strange values here to help w/ debugging */<br>
@@ -1513,6 +1514,7 @@ _mesa_buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj,<br>
<br>
bufObj->Written = GL_TRUE;<br>
bufObj->Immutable = GL_TRUE;<br>
+ bufObj->MinMaxCacheDirty = GL_TRUE;<br>
<br>
assert(ctx->Driver.BufferData);<br>
if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,<br>
@@ -1626,6 +1628,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,<br>
FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);<br>
<br>
bufObj->Written = GL_TRUE;<br>
+ bufObj->MinMaxCacheDirty = GL_TRUE;<br>
<br>
#ifdef VBO_DEBUG<br>
printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n",<br>
@@ -1738,6 +1741,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,<br>
}<br>
<br>
bufObj->Written = GL_TRUE;<br>
+ bufObj->MinMaxCacheDirty = GL_TRUE;<br>
<br>
assert(ctx->Driver.BufferSubData);<br>
ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj);<br>
@@ -1857,6 +1861,8 @@ _mesa_clear_buffer_sub_data(struct gl_context *ctx,<br>
if (size == 0)<br>
return;<br>
<br>
+ bufObj->MinMaxCacheDirty = GL_TRUE;<br>
+<br>
if (data == NULL) {<br>
/* clear to zeros, per the spec */<br>
ctx->Driver.ClearBufferSubData(ctx, offset, size,<br>
@@ -2270,6 +2276,8 @@ _mesa_copy_buffer_sub_data(struct gl_context *ctx,<br>
}<br>
}<br>
<br>
+ dst->MinMaxCacheDirty = GL_TRUE;<br>
+<br>
ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size);<br>
}<br>
<br>
@@ -2479,6 +2487,8 @@ _mesa_map_buffer_range(struct gl_context *ctx,<br>
<br>
if (access & GL_MAP_PERSISTENT_BIT)<br>
bufObj->UsageHistory |= USAGE_PERSISTENT_WRITE_MAP;<br>
+<br>
+ bufObj->MinMaxCacheDirty = GL_TRUE;<br>
}<br>
<br>
#ifdef VBO_DEBUG<br>
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h<br>
index 4d625da..d4c41a7 100644<br>
--- a/src/mesa/main/mtypes.h<br>
+++ b/src/mesa/main/mtypes.h<br>
@@ -1283,6 +1283,10 @@ struct gl_buffer_object<br>
GLuint NumMapBufferWriteCalls;<br>
<br>
struct gl_buffer_mapping Mappings[MAP_COUNT];<br>
+<br>
+ /** Memoization of min/max index computations for static index buffers */<br>
+ struct hash_table *MinMaxCache;<br>
+ GLboolean MinMaxCacheDirty;<br>
};<br>
<br>
<br>
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h<br>
index dd9b428..59c7351 100644<br>
--- a/src/mesa/vbo/vbo.h<br>
+++ b/src/mesa/vbo/vbo.h<br>
@@ -169,6 +169,9 @@ vbo_sizeof_ib_type(GLenum type)<br>
}<br>
<br>
void<br>
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);<br>
+<br>
+void<br>
vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,<br>
const struct _mesa_index_buffer *ib,<br>
GLuint *min_index, GLuint *max_index, GLuint nr_prims);<br>
diff --git a/src/mesa/vbo/vbo_minmax_index.c b/src/mesa/vbo/vbo_minmax_index.c<br>
index b43ed98..9ac0168 100644<br>
--- a/src/mesa/vbo/vbo_minmax_index.c<br>
+++ b/src/mesa/vbo/vbo_minmax_index.c<br>
@@ -32,6 +32,162 @@<br>
#include "main/macros.h"<br>
#include "main/sse_minmax.h"<br>
#include "x86/common_x86_asm.h"<br>
+#include "util/hash_table.h"<br>
+<br>
+<br>
+struct minmax_cache_key {<br>
+ GLintptr offset;<br>
+ GLuint count;<br>
+ GLenum type;<br>
+};<br>
+<br>
+<br>
+struct minmax_cache_entry {<br>
+ struct minmax_cache_key key;<br>
+ GLuint min;<br>
+ GLuint max;<br>
+};<br>
+<br>
+<br>
+static uint32_t<br>
+vbo_minmax_cache_hash(const struct minmax_cache_key *key)<br>
+{<br>
+ return _mesa_hash_data(key, sizeof(*key));<br>
+}<br>
+<br>
+<br>
+static bool<br>
+vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,<br>
+ const struct minmax_cache_key *b)<br>
+{<br>
+ return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);<br>
+}<br>
+<br>
+<br>
+static void<br>
+vbo_minmax_cache_delete_entry(struct hash_entry *entry)<br>
+{<br>
+ free(entry->data);<br>
+}<br>
+<br>
+<br>
+static GLboolean<br>
+vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)<br>
+{<br>
+ if (bufferObj->UsageHistory & (USAGE_PIXEL_PACK_BUFFER |<br>
+ USAGE_ATOMIC_COUNTER_BUFFER |<br>
+ USAGE_SHADER_STORAGE_BUFFER |<br>
+ USAGE_TRANSFORM_FEEDBACK_BUFFER |<br>
+ USAGE_PERSISTENT_WRITE_MAP))<br>
+ return GL_FALSE;<br>
+<br>
+ return GL_TRUE;<br>
+}<br>
+<br>
+<br>
+void<br>
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)<br>
+{<br>
+ _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);<br>
+ bufferObj->MinMaxCache = NULL;<br>
+}<br>
+<br>
+<br>
+static GLboolean<br>
+vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,<br>
+ GLenum type, GLintptr offset, GLuint count,<br>
+ GLuint *min_index, GLuint *max_index)<br>
+{<br>
+ GLboolean found = GL_FALSE;<br>
+ struct minmax_cache_key key;<br>
+ uint32_t hash;<br>
+ struct hash_entry *result;<br>
+<br>
+ if (!bufferObj->MinMaxCache)<br>
+ return GL_FALSE;<br>
+ if (!vbo_use_minmax_cache(bufferObj))<br>
+ return GL_FALSE;<br>
+<br>
+ mtx_lock(&bufferObj->Mutex);<br>
+<br>
+ if (bufferObj->MinMaxCacheDirty) {<br>
+ _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);<br>
+ bufferObj->MinMaxCacheDirty = GL_FALSE;<br>
+ goto out;<br>
+ }<br>
+<br>
+ key.type = type;<br>
+ key.offset = offset;<br>
+ key.count = count;<br>
+ hash = vbo_minmax_cache_hash(&key);<br>
+ result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);<br>
+ if (result) {<br>
+ struct minmax_cache_entry *entry = result->data;<br>
+ *min_index = entry->min;<br>
+ *max_index = entry->max;<br>
+ found = GL_TRUE;<br>
+ }<br>
+<br>
+out:<br>
+ mtx_unlock(&bufferObj->Mutex);<br>
+ return found;<br>
+}<br>
+<br>
+<br>
+static void<br>
+vbo_minmax_cache_store(struct gl_context *ctx,<br>
+ struct gl_buffer_object *bufferObj,<br>
+ GLenum type, GLintptr offset, GLuint count,<br>
+ GLuint min, GLuint max)<br>
+{<br>
+ struct minmax_cache_entry *entry;<br>
+ struct hash_entry *table_entry;<br>
+ uint32_t hash;<br>
+<br>
+ if (!vbo_use_minmax_cache(bufferObj))<br>
+ return;<br>
+<br>
+ mtx_lock(&bufferObj->Mutex);<br>
+<br>
+ if (!bufferObj->MinMaxCache) {<br>
+ bufferObj->MinMaxCache =<br>
+ _mesa_hash_table_create(NULL,<br>
+ (uint32_t (*)(const void *))vbo_minmax_cache_hash,<br>
+ (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);<br>
+ if (!bufferObj->MinMaxCache)<br>
+ goto out;<br>
+ }<br>
+<br>
+ entry = MALLOC_STRUCT(minmax_cache_entry);<br>
+ if (!entry)<br>
+ goto out;<br>
+<br>
+ entry->key.offset = offset;<br>
+ entry->key.count = count;<br>
+ entry->key.type = type;<br>
+ entry->min = min;<br>
+ entry->max = max;<br>
+ hash = vbo_minmax_cache_hash(&entry->key);<br>
+<br>
+ table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,<br>
+ hash, &entry->key);<br>
+ if (table_entry) {<br>
+ /* It seems like this could happen when two contexts are rendering using<br>
+ * the same buffer object from multiple threads.<br>
+ */<br>
+ _mesa_debug(ctx, "duplicate entry in minmax cache\n");<br>
+ free(entry);<br>
+ goto out;<br>
+ }<br>
+<br>
+ table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,<br>
+ hash, &entry->key, entry);<br>
+ if (!table_entry)<br>
+ free(entry);<br>
+<br>
+out:<br>
+ mtx_unlock(&bufferObj->Mutex);<br>
+}<br>
<br>
<br>
/**<br>
@@ -56,6 +212,11 @@ vbo_get_minmax_index(struct gl_context *ctx,<br>
indices = (char *) ib->ptr + prim->start * index_size;<br>
if (_mesa_is_bufferobj(ib->obj)) {<br>
GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);<br>
+<br>
+ if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,<br>
+ min_index, max_index))<br>
+ return;<br>
+<br>
indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,<br>
GL_MAP_READ_BIT, ib->obj,<br>
MAP_INTERNAL);<br>
@@ -139,6 +300,8 @@ vbo_get_minmax_index(struct gl_context *ctx,<br>
}<br>
<br>
if (_mesa_is_bufferobj(ib->obj)) {<br>
+ vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,<br>
+ *min_index, *max_index);<br>
ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);<br>
}<br>
}<br>
--<br>
2.5.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</div>