Mesa (master): util: replace UTIL_MAX_CPUS by util_cpu_caps.num_cpu_mask_bits

Tue Jan 5 04:11:34 UTC 2021

Module: Mesa
Branch: master
Commit: a0467b7fa16e6c447a644f1d4c1953800c4265fe
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a0467b7fa16e6c447a644f1d4c1953800c4265fe

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sat Nov 28 04:18:32 2020 -0500

util: replace UTIL_MAX_CPUS by util_cpu_caps.num_cpu_mask_bits

to reduce overhead when setting thread affinity.

Reviewed-by: Eric Anholt <eric at anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8017>

---

 src/gallium/auxiliary/util/u_threaded_context.c   | 2 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     | 2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +-
 src/mesa/main/glthread.c                          | 2 +-
 src/util/u_cpu_detect.c                           | 7 +++++--
 src/util/u_cpu_detect.h                           | 1 +
 src/util/u_queue.c                                | 5 +++--
 7 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index c654d633b19..bdd1ac99fb0 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -2173,7 +2173,7 @@ tc_set_context_param(struct pipe_context *_pipe,
       /* Pin the gallium thread as requested. */
       util_set_thread_affinity(tc->queue.threads[0],
                                util_cpu_caps.L3_affinity_mask[value],
-                               NULL, UTIL_MAX_CPUS);
+                               NULL, util_cpu_caps.num_cpu_mask_bits);
 
       /* Execute this immediately (without enqueuing).
        * It's required to be thread-safe.
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 94ade923611..6b87601f1be 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -328,7 +328,7 @@ static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
 
    util_set_thread_affinity(ws->cs_queue.threads[0],
                             util_cpu_caps.L3_affinity_mask[cache],
-                            NULL, UTIL_MAX_CPUS);
+                            NULL, util_cpu_caps.num_cpu_mask_bits);
 }
 
 static uint32_t kms_handle_hash(const void *key)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 2c5e3968cf8..6b306a6ce7b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -805,7 +805,7 @@ static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
    if (util_queue_is_initialized(&rws->cs_queue)) {
       util_set_thread_affinity(rws->cs_queue.threads[0],
                                util_cpu_caps.L3_affinity_mask[cache],
-                               NULL, UTIL_MAX_CPUS);
+                               NULL, util_cpu_caps.num_cpu_mask_bits);
    }
 }
 
diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index 15714283025..6316cad4e32 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -225,7 +225,7 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
 
          util_set_thread_affinity(glthread->queue.threads[0],
                                   util_cpu_caps.L3_affinity_mask[L3_cache],
-                                  NULL, UTIL_MAX_CPUS);
+                                  NULL, util_cpu_caps.num_cpu_mask_bits);
          ctx->Driver.PinDriverToL3Cache(ctx, L3_cache);
       }
    }
diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c
index 025f2f30156..8cfe3286b1f 100644
--- a/src/util/u_cpu_detect.c
+++ b/src/util/u_cpu_detect.c
@@ -482,7 +482,7 @@ get_cpu_topology(void)
 
          if (util_set_current_thread_affinity(mask,
                                               !saved ? saved_mask : NULL,
-                                              UTIL_MAX_CPUS)) {
+                                              util_cpu_caps.num_cpu_mask_bits)) {
             saved = true;
             allowed_mask[i / 32] |= cpu_bit;
 
@@ -528,7 +528,8 @@ get_cpu_topology(void)
          }
 
          /* Restore the original affinity mask. */
-         util_set_current_thread_affinity(saved_mask, NULL, UTIL_MAX_CPUS);
+         util_set_current_thread_affinity(saved_mask, NULL,
+                                          util_cpu_caps.num_cpu_mask_bits);
       } else {
          if (debug_get_option_dump_cpu())
             fprintf(stderr, "Cannot set thread affinity for any thread.\n");
@@ -569,6 +570,8 @@ util_cpu_detect_once(void)
    util_cpu_caps.nr_cpus = 1;
 #endif
 
+   util_cpu_caps.num_cpu_mask_bits = align(util_cpu_caps.nr_cpus, 32);
+
    /* Make the fallback cacheline size nonzero so that it can be
     * safely passed to align().
     */
diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h
index a76fd912910..5a9a139c990 100644
--- a/src/util/u_cpu_detect.h
+++ b/src/util/u_cpu_detect.h
@@ -98,6 +98,7 @@ struct util_cpu_caps {
 
    unsigned num_L3_caches;
    unsigned cores_per_L3;
+   unsigned num_cpu_mask_bits;
 
    uint16_t cpu_to_L3[UTIL_MAX_CPUS];
    /* Affinity masks for each L3 cache. */
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index b11b297a45c..6161260634e 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -27,7 +27,7 @@
 #include "u_queue.h"
 
 #include "c11/threads.h"
-
+#include "util/u_cpu_detect.h"
 #include "util/os_time.h"
 #include "util/u_string.h"
 #include "util/u_thread.h"
@@ -258,7 +258,8 @@ util_queue_thread_func(void *input)
       uint32_t mask[UTIL_MAX_CPUS / 32];
 
       memset(mask, 0xff, sizeof(mask));
-      util_set_current_thread_affinity(mask, NULL, UTIL_MAX_CPUS);
+      util_set_current_thread_affinity(mask, NULL,
+                                       util_cpu_caps.num_cpu_mask_bits);
    }
 
 #if defined(__linux__)