[Mesa-dev] [PATCH 2/4] st/mesa: pin driver threads to a specific L3 cache on AMD Zen (v2)

Brian Paul brianp at vmware.com
Fri Sep 7 03:51:03 UTC 2018


The revised series looks OK to me.  One whitespace issue below.

For the series,
Reviewed-by: Brian Paul <brianp at vmware.com>


On 09/06/2018 07:04 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> v2: use set_context_param
> ---
>   src/gallium/auxiliary/util/u_helpers.c | 42 +++++++++++++++++++
>   src/gallium/auxiliary/util/u_helpers.h |  4 ++
>   src/mesa/state_tracker/st_context.c    |  3 ++
>   src/mesa/state_tracker/st_manager.c    |  9 ++++
>   src/util/u_thread.h                    | 57 ++++++++++++++++++++++++++
>   5 files changed, 115 insertions(+)
> 

[...]

> diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
> index 69286b57916..7a37f9850f8 100644
> --- a/src/mesa/state_tracker/st_manager.c
> +++ b/src/mesa/state_tracker/st_manager.c
> @@ -1056,20 +1056,29 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
>            ret = _mesa_make_current(st->ctx, incomplete, incomplete);
>         }
>   
>         st_framebuffer_reference(&stdraw, NULL);
>         st_framebuffer_reference(&stread, NULL);
>   
>         /* Purge the context's winsys_buffers list in case any
>          * of the referenced drawables no longer exist.
>          */
>         st_framebuffers_purge(st);
> +
> +      /* Notify the driver that the context thread may have been changed.
> +       * This should pin all driver threads to a specific L3 cache for optimal
> +       * performance on AMD Zen CPUs.
> +       */
> +      struct glthread_state *glthread = st->ctx->GLThread;
> +      thrd_t *upper_thread = glthread ? &glthread->queue.threads[0] : NULL;
> +
> +      util_context_thread_changed(st->pipe, upper_thread);
>      }
>      else {
>         ret = _mesa_make_current(NULL, NULL, NULL);
>      }
>   
>      return ret;
>   }
>   
>   
>   static void
> diff --git a/src/util/u_thread.h b/src/util/u_thread.h
> index 8c6e0bdc59e..0555ba61111 100644
> --- a/src/util/u_thread.h
> +++ b/src/util/u_thread.h
> @@ -63,20 +63,77 @@ static inline void u_thread_setname( const char *name )
>   #if defined(HAVE_PTHREAD)
>   #  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
>         (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) && \
>         defined(__linux__)
>      pthread_setname_np(pthread_self(), name);
>   #  endif
>   #endif
>      (void)name;
>   }
>   
> +/**
> + * An AMD Zen CPU consists of multiple modules where each module has its own L3
> + * cache. Inter-thread communication such as locks and atomics between modules
> + * is very expensive. It's desirable to pin a group of closely cooperating
> + * threads to one group of cores sharing L3.
> + *
> + * \param thread        thread
> + * \param L3_index      index of the L3 cache
> + * \param cores_per_L3  number of CPU cores shared by one L3
> + */
> +static inline void
> +util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3)
> +{
> +#if defined(HAVE_PTHREAD)
> +   cpu_set_t cpuset;
> +
> +   CPU_ZERO(&cpuset);
> +   for (unsigned i = 0; i < cores_per_L3; i++)
> +	   CPU_SET(L3_index * cores_per_L3 + i, &cpuset);

Indentation.


> +   pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
> +#endif
> +}
> +
> +/**
> + * Return the index of L3 that the thread is pinned to. If the thread is
> + * pinned to multiple L3 caches, return -1.
> + *
> + * \param thread        thread
> + * \param cores_per_L3  number of CPU cores shared by one L3
> + */
> +static inline int
> +util_get_L3_for_pinned_thread(thrd_t thread, unsigned cores_per_L3)
> +{
> +#if defined(HAVE_PTHREAD)
> +   cpu_set_t cpuset;
> +
> +   if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) {
> +      int L3_index = -1;
> +
> +      for (unsigned i = 0; i < CPU_SETSIZE; i++) {
> +         if (CPU_ISSET(i, &cpuset)) {
> +            int x = i / cores_per_L3;
> +
> +            if (L3_index != x) {
> +               if (L3_index == -1)
> +                  L3_index = x;
> +               else
> +                  return -1; /* multiple L3s are set */
> +            }
> +         }
> +      }
> +      return L3_index;
> +   }
> +#endif
> +   return -1;
> +}
> +
>   /*
>    * Thread statistics.
>    */
>   
>   /* Return the time of a thread's CPU time clock. */
>   static inline int64_t
>   u_thread_get_time_nano(thrd_t thread)
>   {
>   #if defined(__linux__) && defined(HAVE_PTHREAD)
>      struct timespec ts;
> 



More information about the mesa-dev mailing list