Mesa (master): st/mesa: set take_index_buffer_ownership to skip an atomic in u_threaded

Thu Jan 28 00:13:26 UTC 2021

Module: Mesa
Branch: master
Commit: 431cd34cdb18dd6804efeb48f1c08360acd58d98
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=431cd34cdb18dd6804efeb48f1c08360acd58d98

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sat Jan  2 17:53:13 2021 -0500

st/mesa: set take_index_buffer_ownership to skip an atomic in u_threaded

This reduces overhead by skipping the atomic, which is slow on AMD Zen.
It uses the same mechanism as vertex buffers.

Reviewed-by: Zoltán Böszörményi <zboszor at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8298>

---

 src/gallium/auxiliary/util/u_threaded_context.c |  2 +-
 src/gallium/auxiliary/util/u_threaded_context.h |  6 ++++++
 src/mesa/state_tracker/st_draw.c                | 13 ++++++++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 39fbd23d413..a690509a140 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -2389,7 +2389,7 @@ tc_call_draw_multi(struct pipe_context *pipe, union tc_payload *payload)
 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
    offsetof(struct pipe_draw_info, index)
 
-static void
+void
 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
             const struct pipe_draw_indirect_info *indirect,
             const struct pipe_draw_start_count *draws,
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index 6838f804481..0a9289abaf9 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -396,6 +396,12 @@ threaded_context_flush(struct pipe_context *_pipe,
                        struct tc_unflushed_batch_token *token,
                        bool prefer_async);
 
+void
+tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
+            const struct pipe_draw_indirect_info *indirect,
+            const struct pipe_draw_start_count *draws,
+            unsigned num_draws);
+
 static inline struct threaded_context *
 threaded_context(struct pipe_context *pipe)
 {
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index be0ea865a83..b05de7fa74e 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -65,6 +65,7 @@
 #include "util/u_prim.h"
 #include "util/u_draw.h"
 #include "util/u_upload_mgr.h"
+#include "util/u_threaded_context.h"
 #include "draw/draw_context.h"
 #include "cso_cache/cso_context.h"
 
@@ -247,7 +248,17 @@ prepare_indexed_draw(/* pass both st and ctx to reduce dereferences */
       }
 
       if (!info->has_user_indices) {
-         info->index.resource = st_buffer_object(info->index.gl_bo)->buffer;
+         if (st->pipe->draw_vbo == tc_draw_vbo) {
+            /* Fast path for u_threaded_context. This eliminates the atomic
+             * increment for the index buffer refcount when adding it into
+             * the threaded batch buffer.
+             */
+            info->index.resource =
+               st_get_buffer_reference(ctx, info->index.gl_bo);
+            info->take_index_buffer_ownership = true;
+         } else {
+            info->index.resource = st_buffer_object(info->index.gl_bo)->buffer;
+         }
 
          /* Return if the bound element array buffer doesn't have any backing
           * storage. (nothing to do)