Mesa (master): util: add util_set_thread_affinity helpers including Windows support

Fri Oct 30 05:44:39 UTC 2020

Module: Mesa
Branch: master
Commit: 9758b1d416a109f92e911d7bac6f00f9419affab
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9758b1d416a109f92e911d7bac6f00f9419affab

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Oct  6 18:44:08 2020 -0400

util: add util_set_thread_affinity helpers including Windows support

Acked-by: Jose Fonseca <jfonseca at vmware.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7054>

---

 src/util/u_queue.c  |  10 ++---
 src/util/u_thread.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 99 insertions(+), 15 deletions(-)

diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index b1478bdf483..b11b297a45c 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -251,19 +251,15 @@ util_queue_thread_func(void *input)
 
    free(input);
 
-#ifdef HAVE_PTHREAD_SETAFFINITY
    if (queue->flags & UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY) {
       /* Don't inherit the thread affinity from the parent thread.
        * Set the full mask.
        */
-      cpu_set_t cpuset;
-      CPU_ZERO(&cpuset);
-      for (unsigned i = 0; i < CPU_SETSIZE; i++)
-         CPU_SET(i, &cpuset);
+      uint32_t mask[UTIL_MAX_CPUS / 32];
 
-      pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+      memset(mask, 0xff, sizeof(mask));
+      util_set_current_thread_affinity(mask, NULL, UTIL_MAX_CPUS);
    }
-#endif
 
 #if defined(__linux__)
    if (queue->flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) {
diff --git a/src/util/u_thread.h b/src/util/u_thread.h
index d1fb95e3de1..3f4733592ab 100644
--- a/src/util/u_thread.h
+++ b/src/util/u_thread.h
@@ -29,9 +29,11 @@
 
 #include <stdint.h>
 #include <stdbool.h>
+#include <string.h>
 
 #include "c11/threads.h"
 #include "detect_os.h"
+#include "macros.h"
 
 #ifdef HAVE_PTHREAD
 #include <signal.h>
@@ -52,6 +54,9 @@
 #define cpu_set_t cpuset_t
 #endif
 
+/* For util_set_thread_affinity to size the mask. */
+#define UTIL_MAX_CPUS               1024  /* this should be enough */
+
 static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
 {
    thrd_t thread;
@@ -94,6 +99,85 @@ static inline void u_thread_setname( const char *name )
    (void)name;
 }
 
+/**
+ * Set thread affinity.
+ *
+ * \param thread         Thread
+ * \param mask           Set this affinity mask
+ * \param old_mask       Previous affinity mask returned if not NULL
+ * \param num_mask_bits  Number of bits in both masks
+ * \return  true on success
+ */
+static inline bool
+util_set_thread_affinity(thrd_t thread,
+                         const uint32_t *mask,
+                         uint32_t *old_mask,
+                         unsigned num_mask_bits)
+{
+#if defined(HAVE_PTHREAD_SETAFFINITY)
+   cpu_set_t cpuset;
+
+   if (old_mask) {
+      if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) != 0)
+         return false;
+
+      memset(old_mask, 0, num_mask_bits / 32);
+      for (unsigned i = 0; i < num_mask_bits && i < CPU_SETSIZE; i++) {
+         if (CPU_ISSET(i, &cpuset))
+            old_mask[i / 32] |= 1u << (i % 32);
+      }
+   }
+
+   CPU_ZERO(&cpuset);
+   for (unsigned i = 0; i < num_mask_bits && i < CPU_SETSIZE; i++) {
+      if (mask[i / 32] & (1u << (i % 32)))
+         CPU_SET(i, &cpuset);
+   }
+   return pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset) == 0;
+
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+   DWORD_PTR m = mask[0];
+
+   if (sizeof(m) > 4 && num_mask_bits > 32)
+      m |= (uint64_t)mask[1] << 32;
+
+   m = SetThreadAffinityMask(thread, m);
+   if (!m)
+      return false;
+
+   if (old_mask) {
+      memset(old_mask, 0, num_mask_bits / 32);
+
+      old_mask[0] = m;
+      if (sizeof(m) > 4)
+         old_mask[1] = m >> 32;
+   }
+
+   return true;
+#else
+   return false;
+#endif
+}
+
+static inline bool
+util_set_current_thread_affinity(const uint32_t *mask,
+                                 uint32_t *old_mask,
+                                 unsigned num_mask_bits)
+{
+#if defined(HAVE_PTHREAD_SETAFFINITY)
+   return util_set_thread_affinity(pthread_self(), mask, old_mask,
+                                   num_mask_bits);
+
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+   /* The GetCurrentThreadId() handle is only valid within the current thread. */
+   return util_set_thread_affinity(GetCurrentThread(), mask, old_mask,
+                                   num_mask_bits);
+
+#else
+   return false;
+#endif
+}
+
 /**
  * An AMD Zen CPU consists of multiple modules where each module has its own L3
  * cache. Inter-thread communication such as locks and atomics between modules
@@ -104,17 +188,21 @@ static inline void u_thread_setname( const char *name )
  * \param L3_index      index of the L3 cache
  * \param cores_per_L3  number of CPU cores shared by one L3
  */
-static inline void
+static inline bool
 util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3)
 {
-#if defined(HAVE_PTHREAD_SETAFFINITY)
-   cpu_set_t cpuset;
+   unsigned num_mask_bits = DIV_ROUND_UP((L3_index + 1) * cores_per_L3, 32);
+   uint32_t mask[UTIL_MAX_CPUS / 32];
 
-   CPU_ZERO(&cpuset);
-   for (unsigned i = 0; i < cores_per_L3; i++)
-      CPU_SET(L3_index * cores_per_L3 + i, &cpuset);
-   pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-#endif
+   assert((L3_index + 1) * cores_per_L3 <= UTIL_MAX_CPUS);
+
+   for (unsigned i = 0; i < cores_per_L3; i++) {
+      unsigned core = L3_index * cores_per_L3 + i;
+
+      mask[core / 32] |= 1u << (core % 32);
+   }
+
+   return util_set_thread_affinity(thread, mask, NULL, num_mask_bits);
 }