[Mesa-dev] [PATCH 2/2] st/mesa: pin driver threads to a fixed CCX when glthread is enabled

Marek Olšák maraeo at gmail.com
Tue Nov 13 00:21:17 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

radeonsi has 3 driver threads (glthread, gallium, winsys), other drivers
may have 2 (glthread, gallium), so it makes sense to pin them to a random
CCX and keep that irrespective of the app thread.
---
 src/gallium/auxiliary/util/u_helpers.c | 63 ++++----------------------
 src/gallium/auxiliary/util/u_helpers.h |  3 +-
 src/mesa/state_tracker/st_manager.c    | 11 +++++
 3 files changed, 21 insertions(+), 56 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c
index 4c70c004178..821242c0181 100644
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -114,102 +114,55 @@ util_upload_index_buffer(struct pipe_context *pipe,
 
    u_upload_data(pipe->stream_uploader, start_offset,
                  info->count * info->index_size, 4,
                  (char*)info->index.user + start_offset,
                  out_offset, out_buffer);
    u_upload_unmap(pipe->stream_uploader);
    *out_offset -= start_offset;
    return *out_buffer != NULL;
 }
 
-#ifdef HAVE_PTHREAD_SETAFFINITY
-
-static unsigned L3_cache_number;
-static once_flag thread_pinning_once_flag = ONCE_FLAG_INIT;
-
-static void
-util_set_full_cpu_affinity(void)
-{
-   cpu_set_t cpuset;
-
-   CPU_ZERO(&cpuset);
-   for (unsigned i = 0; i < CPU_SETSIZE; i++)
-      CPU_SET(i, &cpuset);
-
-   pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
-}
-
-static void
-util_init_thread_pinning(void)
-{
-   /* Get a semi-random number. */
-   int64_t t = os_time_get_nano();
-   L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
-
-   /* Reset thread affinity for all child processes to prevent them from
-    * inheriting the current thread's affinity.
-    *
-    * XXX: If the driver is unloaded after this, and the app later calls
-    * fork(), the child process will likely crash before fork() returns,
-    * because the address where util_set_full_cpu_affinity was located
-    * will either be unmapped or point to random other contents.
-    */
-   pthread_atfork(NULL, NULL, util_set_full_cpu_affinity);
-}
-
-#endif
-
 /**
  * Called by MakeCurrent. Used to notify the driver that the application
  * thread may have been changed.
  *
  * The function pins the current thread and driver threads to a group of
  * CPU cores that share the same L3 cache. This is needed for good multi-
  * threading performance on AMD Zen CPUs.
  *
  * \param upper_thread  thread in the state tracker that also needs to be
  *                      pinned.
  */
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread)
 {
-#ifdef HAVE_PTHREAD_SETAFFINITY
    /* If pinning has no effect, don't do anything. */
    if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
       return;
 
-   thrd_t current = thrd_current();
-   int cache = util_get_L3_for_pinned_thread(current,
-                                             util_cpu_caps.cores_per_L3);
-
-   call_once(&thread_pinning_once_flag, util_init_thread_pinning);
+   unsigned num_L3_caches = util_cpu_caps.nr_cpus /
+                            util_cpu_caps.cores_per_L3;
 
-   /* If the main thread is not pinned, choose the L3 cache. */
-   if (cache == -1) {
-      unsigned num_L3_caches = util_cpu_caps.nr_cpus /
-                               util_cpu_caps.cores_per_L3;
-
-      /* Choose a different L3 cache for each subsequent MakeCurrent. */
-      cache = p_atomic_inc_return(&L3_cache_number) % num_L3_caches;
-      util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
-   }
+   /* Get a semi-random number. */
+   int64_t t = os_time_get_nano();
+   unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
 
-   /* Tell the driver to pin its threads to the same L3 cache. */
+   /* Tell the driver to pin its threads to the selected L3 cache. */
    if (ctx->set_context_param) {
       ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
                              cache);
    }
 
    /* Do the same for the upper level thread if there is any (e.g. glthread) */
    if (upper_thread)
       util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
-#endif
 }
 
 /* This is a helper for hardware bring-up. Don't remove. */
 struct pipe_query *
 util_begin_pipestat_query(struct pipe_context *ctx)
 {
    struct pipe_query *q =
       ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
    if (!q)
       return NULL;
diff --git a/src/gallium/auxiliary/util/u_helpers.h b/src/gallium/auxiliary/util/u_helpers.h
index 38c47c1cc98..ed8467291ba 100644
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -45,21 +45,22 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
                                    unsigned *dst_count,
                                    const struct pipe_vertex_buffer *src,
                                    unsigned start_slot, unsigned count);
 
 bool util_upload_index_buffer(struct pipe_context *pipe,
                               const struct pipe_draw_info *info,
                               struct pipe_resource **out_buffer,
                               unsigned *out_offset);
 
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread);
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread);
 
 struct pipe_query *
 util_begin_pipestat_query(struct pipe_context *ctx);
 
 void
 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
                         FILE *f);
 
 void
 util_wait_for_idle(struct pipe_context *ctx);
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 076ad42646d..73729d74545 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -793,20 +793,31 @@ st_context_destroy(struct st_context_iface *stctxi)
    st_destroy_context(st);
 }
 
 
 static void
 st_start_thread(struct st_context_iface *stctxi)
 {
    struct st_context *st = (struct st_context *) stctxi;
 
    _mesa_glthread_init(st->ctx);
+
+   /* Pin all driver threads to one L3 cache for optimal performance
+    * on AMD Zen. This is only done if glthread is enabled.
+    *
+    * If glthread is disabled, st_draw.c re-pins driver threads regularly
+    * based on the location of the app thread.
+    */
+   struct glthread_state *glthread = st->ctx->GLThread;
+   if (glthread && st->pipe->set_context_param) {
+      util_pin_driver_threads_to_random_L3(st->pipe, &glthread->queue.threads[0]);
+   }
 }
 
 
 static void
 st_thread_finish(struct st_context_iface *stctxi)
 {
    struct st_context *st = (struct st_context *) stctxi;
 
    _mesa_glthread_finish(st->ctx);
 }
-- 
2.17.1



More information about the mesa-dev mailing list