[Mesa-dev] [PATCH 5/8] gallium: add pipe_context::pin_threads_to_L3_cache for AMD Zen

Marek Olšák maraeo at gmail.com
Fri Sep 7 00:35:21 UTC 2018


On Thu, Sep 6, 2018 at 2:37 PM, Brian Paul <brianp at vmware.com> wrote:
> On 09/05/2018 10:02 PM, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> State trackers will not use this directly, but will instead use a helper
>> in MakeCurrent that does the right thing.
>> ---
>>   .../auxiliary/driver_ddebug/dd_context.c       | 10 ++++++++++
>>   src/gallium/auxiliary/driver_noop/noop_pipe.c  |  6 ++++++
>>   .../auxiliary/driver_trace/tr_context.c        | 18 ++++++++++++++++++
>>   src/gallium/include/pipe/p_context.h           |  9 +++++++++
>>   4 files changed, 43 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/driver_ddebug/dd_context.c
>> b/src/gallium/auxiliary/driver_ddebug/dd_context.c
>> index a1b6c971e89..51719b6679b 100644
>> --- a/src/gallium/auxiliary/driver_ddebug/dd_context.c
>> +++ b/src/gallium/auxiliary/driver_ddebug/dd_context.c
>> @@ -752,20 +752,29 @@ dd_context_delete_image_handle(struct pipe_context
>> *_pipe, uint64_t handle)
>>   static void
>>   dd_context_make_image_handle_resident(struct pipe_context *_pipe,
>>                                         uint64_t handle, unsigned access,
>>                                         bool resident)
>>   {
>>      struct pipe_context *pipe = dd_context(_pipe)->pipe;
>>        pipe->make_image_handle_resident(pipe, handle, access, resident);
>>   }
>>   +static void
>> +dd_context_pin_threads_to_L3_cache(struct pipe_context *_pipe,
>> +                                   unsigned cache)
>> +{
>> +   struct pipe_context *pipe = dd_context(_pipe)->pipe;
>> +
>> +   pipe->pin_threads_to_L3_cache(pipe, cache);
>> +}
>> +
>>   struct pipe_context *
>>   dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
>>   {
>>      struct dd_context *dctx;
>>        if (!pipe)
>>         return NULL;
>>        dctx = CALLOC_STRUCT(dd_context);
>>      if (!dctx)
>> @@ -855,20 +864,21 @@ dd_context_create(struct dd_screen *dscreen, struct
>> pipe_context *pipe)
>>      CTX_INIT(get_device_reset_status);
>>      CTX_INIT(set_device_reset_callback);
>>      CTX_INIT(dump_debug_state);
>>      CTX_INIT(emit_string_marker);
>>      CTX_INIT(create_texture_handle);
>>      CTX_INIT(delete_texture_handle);
>>      CTX_INIT(make_texture_handle_resident);
>>      CTX_INIT(create_image_handle);
>>      CTX_INIT(delete_image_handle);
>>      CTX_INIT(make_image_handle_resident);
>> +   CTX_INIT(pin_threads_to_L3_cache);
>>        dd_init_draw_functions(dctx);
>>        u_log_context_init(&dctx->log);
>>      if (pipe->set_log_context)
>>         pipe->set_log_context(pipe, &dctx->log);
>>        dctx->draw_state.sample_mask = ~0;
>>        list_inithead(&dctx->records);
>> diff --git a/src/gallium/auxiliary/driver_noop/noop_pipe.c
>> b/src/gallium/auxiliary/driver_noop/noop_pipe.c
>> index 7de3e882398..a8d7d16a696 100644
>> --- a/src/gallium/auxiliary/driver_noop/noop_pipe.c
>> +++ b/src/gallium/auxiliary/driver_noop/noop_pipe.c
>> @@ -305,20 +305,25 @@ static boolean noop_generate_mipmap(struct
>> pipe_context *ctx,
>>                                       unsigned last_layer)
>>   {
>>      return true;
>>   }
>>     static void noop_invalidate_resource(struct pipe_context *ctx,
>>                                        struct pipe_resource *resource)
>>   {
>>   }
>>   +static void noop_pin_threads_to_L3_cache(struct pipe_context *ctx,
>> +                                         unsigned L3_cache_index)
>> +{
>> +}
>> +
>>   static struct pipe_context *noop_create_context(struct pipe_screen
>> *screen,
>>                                                   void *priv, unsigned
>> flags)
>>   {
>>      struct pipe_context *ctx = CALLOC_STRUCT(pipe_context);
>>        if (!ctx)
>>         return NULL;
>>        ctx->screen = screen;
>>      ctx->priv = priv;
>> @@ -344,20 +349,21 @@ static struct pipe_context
>> *noop_create_context(struct pipe_screen *screen,
>>      ctx->begin_query = noop_begin_query;
>>      ctx->end_query = noop_end_query;
>>      ctx->get_query_result = noop_get_query_result;
>>      ctx->set_active_query_state = noop_set_active_query_state;
>>      ctx->transfer_map = noop_transfer_map;
>>      ctx->transfer_flush_region = noop_transfer_flush_region;
>>      ctx->transfer_unmap = noop_transfer_unmap;
>>      ctx->buffer_subdata = noop_buffer_subdata;
>>      ctx->texture_subdata = noop_texture_subdata;
>>      ctx->invalidate_resource = noop_invalidate_resource;
>> +   ctx->pin_threads_to_L3_cache = noop_pin_threads_to_L3_cache;
>>      noop_init_state_functions(ctx);
>>        return ctx;
>>   }
>>       /*
>>    * pipe_screen
>>    */
>>   static void noop_flush_frontbuffer(struct pipe_screen *_screen,
>> diff --git a/src/gallium/auxiliary/driver_trace/tr_context.c
>> b/src/gallium/auxiliary/driver_trace/tr_context.c
>> index dc091aee2e9..13f147316f3 100644
>> --- a/src/gallium/auxiliary/driver_trace/tr_context.c
>> +++ b/src/gallium/auxiliary/driver_trace/tr_context.c
>> @@ -1569,20 +1569,37 @@ trace_context_invalidate_resource(struct
>> pipe_context *_context,
>>      trace_dump_call_begin("pipe_context", "invalidate_resource");
>>        trace_dump_arg(ptr, context);
>>      trace_dump_arg(ptr, resource);
>>        trace_dump_call_end();
>>        context->invalidate_resource(context, resource);
>>   }
>>   +static void
>> +trace_context_pin_threads_to_L3_cache(struct pipe_context *_context,
>> +                                      unsigned cache)
>> +{
>> +   struct trace_context *tr_context = trace_context(_context);
>> +   struct pipe_context *context = tr_context->pipe;
>> +
>> +   trace_dump_call_begin("pipe_context", "pin_threads_to_L3_cache");
>> +
>> +   trace_dump_arg(ptr, context);
>> +   trace_dump_arg(uint, cache);
>> +
>> +   trace_dump_call_end();
>> +
>> +   context->pin_threads_to_L3_cache(context, cache);
>> +}
>> +
>>   static void
>>   trace_context_render_condition(struct pipe_context *_context,
>>                                  struct pipe_query *query,
>>                                  boolean condition,
>>                                  enum pipe_render_cond_flag mode)
>>   {
>>      struct trace_context *tr_context = trace_context(_context);
>>      struct pipe_context *context = tr_context->pipe;
>>        query = trace_query_unwrap(query);
>> @@ -1941,20 +1958,21 @@ trace_context_create(struct trace_screen *tr_scr,
>>      TR_CTX_INIT(create_image_handle);
>>      TR_CTX_INIT(delete_image_handle);
>>      TR_CTX_INIT(make_image_handle_resident);
>>        TR_CTX_INIT(transfer_map);
>>      TR_CTX_INIT(transfer_unmap);
>>      TR_CTX_INIT(transfer_flush_region);
>>      TR_CTX_INIT(buffer_subdata);
>>      TR_CTX_INIT(texture_subdata);
>>      TR_CTX_INIT(invalidate_resource);
>> +   TR_CTX_INIT(pin_threads_to_L3_cache);
>>     #undef TR_CTX_INIT
>>        tr_ctx->pipe = pipe;
>>        return &tr_ctx->base;
>>     error1:
>>      return pipe;
>>   }
>> diff --git a/src/gallium/include/pipe/p_context.h
>> b/src/gallium/include/pipe/p_context.h
>> index 7cf037f1abd..211812fe1a6 100644
>> --- a/src/gallium/include/pipe/p_context.h
>> +++ b/src/gallium/include/pipe/p_context.h
>> @@ -920,18 +920,27 @@ struct pipe_context {
>>      /**
>>       * Call the given function from the driver thread.
>>       *
>>       * This is set by threaded contexts for use by debugging wrappers.
>>       *
>>       * \param asap if true, run the callback immediately if there are no
>> pending
>>       *             commands to be processed by the driver thread
>>       */
>>      void (*callback)(struct pipe_context *ctx, void (*fn)(void *), void
>> *data,
>>                       bool asap);
>> +
>> +   /**
>> +    * A hint for the driver that it should pin its execution threads to
>> +    * a group of cores sharing a specific L3 cache if the CPU has
>> multiple
>> +    * L3 caches. This is needed for good multithreading performance on
>> +    * AMD Zen CPUs.
>> +    */
>> +   void (*pin_threads_to_L3_cache)(struct pipe_context *ctx,
>> +                                   unsigned L3_cache_index);
>>   };
>
>
> I wonder if we could use a more generic interface here.  Perhaps there are
> other thread/core affinity policies that might be useful (now or in the
> future).
>
> My first thought is something like this:
>
> void (*set_cpu_param)(struct pipe_context *ctx,
>                       enum pipe_cpu_param param,
>                       unsigned value);
>
> enum pipe_cpu_param param {
>    PIPE_CPU_PARAM_PIN_THREADS_TO_L3_CACHE,  // value is L3 cache index
> };

Thanks. I'll call it set_context_param.

Marek


More information about the mesa-dev mailing list