[Mesa-dev] [PATCH 8/8] radeonsi: pin the winsys thread to the requested L3 cache

Marek Olšák maraeo at gmail.com
Thu Sep 6 04:02:29 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeon/radeon_winsys.h        |  8 ++++++++
 src/gallium/drivers/radeonsi/si_pipe.c            |  8 ++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     | 10 ++++++++++
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 13 +++++++++++++
 4 files changed, 39 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index b75a2331b95..5ac75b70e77 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -251,20 +251,28 @@ struct radeon_winsys {
 
     /**
      * Query an info structure from winsys.
      *
      * \param ws        The winsys this function is called from.
      * \param info      Return structure
      */
     void (*query_info)(struct radeon_winsys *ws,
                        struct radeon_info *info);
 
+    /**
+     * A hint for the winsys that it should pin its execution threads to
+     * a group of cores sharing a specific L3 cache if the CPU has multiple
+     * L3 caches. This is needed for good multithreading performance on
+     * AMD Zen CPUs.
+     */
+    void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
+
     /**************************************************************************
      * Buffer management. Buffer attributes are mostly fixed over its lifetime.
      *
      * Remember that gallium gets to choose the interface it needs, and the
      * window systems must then implement that interface (rather than the
      * other way around...).
      *************************************************************************/
 
     /**
      * Create a buffer object.
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 9156f3e708c..f8e5df9b382 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -345,40 +345,48 @@ static void si_set_debug_callback(struct pipe_context *ctx,
 static void si_set_log_context(struct pipe_context *ctx,
 			       struct u_log_context *log)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	sctx->log = log;
 
 	if (log)
 		u_log_add_auto_logger(log, si_auto_log_cs, sctx);
 }
 
+static void si_pin_threads_to_L3_cache(struct pipe_context *ctx, unsigned cache)
+{
+	struct radeon_winsys *ws = ((struct si_context *)ctx)->ws;
+
+	ws->pin_threads_to_L3_cache(ws, cache);
+}
+
 static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                               unsigned flags)
 {
 	struct si_context *sctx = CALLOC_STRUCT(si_context);
 	struct si_screen* sscreen = (struct si_screen *)screen;
 	struct radeon_winsys *ws = sscreen->ws;
 	int shader, i;
 
 	if (!sctx)
 		return NULL;
 
 	if (flags & PIPE_CONTEXT_DEBUG)
 		sscreen->record_llvm_ir = true; /* racy but not critical */
 
 	sctx->b.screen = screen; /* this must be set first */
 	sctx->b.priv = NULL;
 	sctx->b.destroy = si_destroy_context;
 	sctx->b.emit_string_marker = si_emit_string_marker;
 	sctx->b.set_debug_callback = si_set_debug_callback;
 	sctx->b.set_log_context = si_set_log_context;
+	sctx->b.pin_threads_to_L3_cache = si_pin_threads_to_L3_cache;
 	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
 	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
 
 	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
 	slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
 
 	sctx->ws = sscreen->ws;
 	sctx->family = sscreen->info.family;
 	sctx->chip_class = sscreen->info.chip_class;
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index dcbc075e3c5..f32bbd9d086 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -23,20 +23,21 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  */
 
 #include "amdgpu_cs.h"
 #include "amdgpu_public.h"
 
+#include "util/u_cpu_detect.h"
 #include "util/u_hash_table.h"
 #include "util/hash_table.h"
 #include "util/xmlconfig.h"
 #include <amdgpu_drm.h>
 #include <xf86drm.h>
 #include <stdio.h>
 #include <sys/stat.h>
 #include "amd/common/sid.h"
 #include "amd/common/gfx9d.h"
 
@@ -228,20 +229,28 @@ static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
    simple_mtx_unlock(&dev_tab_mutex);
    return destroy;
 }
 
 static const char* amdgpu_get_chip_name(struct radeon_winsys *ws)
 {
    amdgpu_device_handle dev = ((struct amdgpu_winsys *)ws)->dev;
    return amdgpu_get_marketing_name(dev);
 }
 
+static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
+                                           unsigned cache)
+{
+   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+   util_pin_thread_to_L3(ws->cs_queue.threads[0], cache,
+                         util_cpu_caps.cores_per_L3);
+}
 
 PUBLIC struct radeon_winsys *
 amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
 		     radeon_screen_create_t screen_create)
 {
    struct amdgpu_winsys *ws;
    drmVersionPtr version = drmGetVersion(fd);
    amdgpu_device_handle dev;
    uint32_t drm_major, drm_minor, r;
 
@@ -307,20 +316,21 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
    pipe_reference_init(&ws->reference, 1);
 
    /* Set functions. */
    ws->base.unref = amdgpu_winsys_unref;
    ws->base.destroy = amdgpu_winsys_destroy;
    ws->base.query_info = amdgpu_winsys_query_info;
    ws->base.cs_request_feature = amdgpu_cs_request_feature;
    ws->base.query_value = amdgpu_query_value;
    ws->base.read_registers = amdgpu_read_registers;
    ws->base.get_chip_name = amdgpu_get_chip_name;
+   ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
 
    amdgpu_bo_init_functions(ws);
    amdgpu_cs_init_functions(ws);
    amdgpu_surface_init_functions(ws);
 
    LIST_INITHEAD(&ws->global_bo_list);
    ws->bo_export_table = util_hash_table_create(hash_pointer, compare_pointers);
 
    (void) simple_mtx_init(&ws->global_bo_list_lock, mtx_plain);
    (void) simple_mtx_init(&ws->bo_fence_lock, mtx_plain);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 0c41e1397c7..cf07a8d8e26 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -22,20 +22,21 @@
  *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  */
 
 #include "radeon_drm_bo.h"
 #include "radeon_drm_cs.h"
 #include "radeon_drm_public.h"
 
+#include "util/u_cpu_detect.h"
 #include "util/u_memory.h"
 #include "util/u_hash_table.h"
 
 #include <xf86drm.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <radeon_surface.h>
@@ -796,20 +797,31 @@ static bool radeon_winsys_unref(struct radeon_winsys *ws)
 static unsigned handle_hash(void *key)
 {
     return PTR_TO_UINT(key);
 }
 
 static int handle_compare(void *key1, void *key2)
 {
     return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
 }
 
+static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
+                                           unsigned cache)
+{
+    struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
+
+    if (util_queue_is_initialized(&rws->cs_queue)) {
+        util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
+                              util_cpu_caps.cores_per_L3);
+    }
+}
+
 PUBLIC struct radeon_winsys *
 radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
 			 radeon_screen_create_t screen_create)
 {
     struct radeon_drm_winsys *ws;
 
     mtx_lock(&fd_tab_mutex);
     if (!fd_tab) {
         fd_tab = util_hash_table_create(hash_fd, compare_fd);
     }
@@ -863,20 +875,21 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
             goto fail_slab;
     }
 
     /* init reference */
     pipe_reference_init(&ws->reference, 1);
 
     /* Set functions. */
     ws->base.unref = radeon_winsys_unref;
     ws->base.destroy = radeon_winsys_destroy;
     ws->base.query_info = radeon_query_info;
+    ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
     ws->base.cs_request_feature = radeon_cs_request_feature;
     ws->base.query_value = radeon_query_value;
     ws->base.read_registers = radeon_read_registers;
 
     radeon_drm_bo_init_functions(ws);
     radeon_drm_cs_init_functions(ws);
     radeon_surface_init_functions(ws);
 
     (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain);
     (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain);
-- 
2.17.1



More information about the mesa-dev mailing list