[Mesa-dev] [RFC PATCH 48/65] radeonsi: add a slab allocator for resident descriptors

Samuel Pitoiset samuel.pitoiset at gmail.com
Fri May 26 14:43:16 UTC 2017



On 05/26/2017 04:39 PM, Marek Olšák wrote:
> FYI, I've replied on some radeonsi patches and skimmed through the
> rest without Rbs. I'll do another review once there is version 2.

Looks good to me.

> 
> Marek
> 
> On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> For each texture/image handles, we need to allocate a new
>> buffer for the resident descriptor. But when the number of
>> buffers added to the current CS becomes high, the overhead
>> in the winsys (and in the kernel) is important.
>>
>> To reduce this bottleneck, the idea is to suballocate the
>> resident descriptors using a slab similar to the one used
>> in the winsys.
>>
>> Currently, a buffer can hold 1024 resident descriptors but
>> this limit is arbitrary and could be changed in the future
>> for some reasons. Once a slab is allocated the "base" buffer
>> is added to a per-context residency list.
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>>   src/gallium/drivers/radeonsi/si_descriptors.c | 150 ++++++++++++++++++++++++++
>>   src/gallium/drivers/radeonsi/si_pipe.c        |  10 ++
>>   src/gallium/drivers/radeonsi/si_pipe.h        |  15 +++
>>   src/gallium/drivers/radeonsi/si_state.h       |   8 ++
>>   4 files changed, 183 insertions(+)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index 61eb2f10be..d337fc3f11 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -2005,6 +2005,156 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
>>          sctx->shader_pointers_dirty &= ~compute_mask;
>>   }
>>
>> +/* BINDLESS */
>> +
>> +static int si_add_resident_descriptor(struct si_context *sctx,
>> +                                     struct r600_resource *desc)
>> +{
>> +       int idx;
>> +
>> +       /* New resident descriptor, check if the backing array is large enough. */
>> +       if (sctx->num_resident_descriptors >= sctx->max_resident_descriptors) {
>> +               unsigned new_max_descriptors =
>> +                       MAX2(1, sctx->max_resident_descriptors * 2);
>> +               struct r600_resource **new_descriptors =
>> +                       REALLOC(sctx->resident_descriptors,
>> +                               sctx->num_resident_descriptors * (sizeof(*new_descriptors)),
>> +                               new_max_descriptors * sizeof(*new_descriptors));
>> +
>> +               if (new_descriptors) {
>> +                       sctx->resident_descriptors = new_descriptors;
>> +                       sctx->max_resident_descriptors = new_max_descriptors;
>> +               } else {
>> +                       fprintf(stderr, "si_add_resident_descriptor: "
>> +                               "allocation failed\n");
>> +                       return -1;
>> +               }
>> +       }
>> +
>> +       idx = sctx->num_resident_descriptors;
>> +       sctx->resident_descriptors[idx] = desc;
>> +       sctx->num_resident_descriptors++;
>> +
>> +       return 0;
>> +}
>> +
>> +static void si_del_resident_descriptor(struct si_context *sctx,
>> +                                      struct r600_resource *desc)
>> +{
>> +       unsigned i;
>> +       int size;
>> +
>> +       for (i = 0; i < sctx->num_resident_descriptors; i++) {
>> +               if (sctx->resident_descriptors[i] != desc)
>> +                       continue;
>> +
>> +               if (i < sctx->num_resident_descriptors - 1) {
>> +                       size = sizeof(*sctx->resident_descriptors) *
>> +                               (sctx->num_resident_descriptors - 1 - i);
>> +
>> +                       memmove(&sctx->resident_descriptors[i],
>> +                               &sctx->resident_descriptors[i + 1], size);
>> +               }
>> +
>> +               sctx->num_resident_descriptors--;
>> +               return;
>> +       }
>> +}
>> +
>> +struct si_resident_descriptor_slab
>> +{
>> +       struct pb_slab base;
>> +       struct r600_resource *buffer;
>> +       struct si_resident_descriptor *entries;
>> +};
>> +
>> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
>> +                                            struct pb_slab_entry *entry)
>> +{
>> +       struct si_context *sctx = priv;
>> +       struct radeon_winsys *ws = sctx->b.ws;
>> +       struct si_resident_descriptor *desc = NULL; /* fix container_of */
>> +
>> +       desc = container_of(entry, desc, entry);
>> +
>> +       if (ws->cs_is_buffer_referenced(sctx->b.gfx.cs, desc->buffer->buf,
>> +                                       RADEON_USAGE_READ)) {
>> +               /* Do not allow to reclaim the buffer if the resident
>> +                * descriptor is currently used.
>> +                */
>> +               return false;
>> +       }
>> +
>> +       return true;
>> +}
>> +
>> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
>> +                                                 unsigned entry_size,
>> +                                                 unsigned group_index)
>> +{
>> +       struct si_context *sctx = priv;
>> +       struct si_screen *sscreen = sctx->screen;
>> +       struct si_resident_descriptor_slab *slab;
>> +
>> +       slab = CALLOC_STRUCT(si_resident_descriptor_slab);
>> +       if (!slab)
>> +               return NULL;
>> +
>> +       /* Create a buffer in VRAM for 1024 resident descriptors. */
>> +       slab->buffer = (struct r600_resource *)
>> +               pipe_buffer_create(&sscreen->b.b, 0,
>> +                                  PIPE_USAGE_IMMUTABLE, 64 * 1024);
>> +       if (!slab->buffer)
>> +               goto fail;
>> +
>> +       slab->base.num_entries = slab->buffer->bo_size / entry_size;
>> +       slab->base.num_free = slab->base.num_entries;
>> +       slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
>> +       if (!slab->entries)
>> +               goto fail_buffer;
>> +
>> +       LIST_INITHEAD(&slab->base.free);
>> +
>> +       for (unsigned i = 0; i < slab->base.num_entries; ++i) {
>> +               struct si_resident_descriptor *desc = &slab->entries[i];
>> +
>> +               desc->entry.slab = &slab->base;
>> +               desc->entry.group_index = group_index;
>> +               desc->buffer = slab->buffer;
>> +               desc->offset = i * entry_size;
>> +
>> +               LIST_ADDTAIL(&desc->entry.head, &slab->base.free);
>> +       }
>> +
>> +       /* Add the descriptor to the per-context residency list. */
>> +       if (si_add_resident_descriptor(sctx, slab->buffer))
>> +               goto fail_desc;
>> +
>> +       return &slab->base;
>> +
>> +fail_desc:
>> +       FREE(slab->entries);
>> +fail_buffer:
>> +       r600_resource_reference(&slab->buffer, NULL);
>> +fail:
>> +       FREE(slab);
>> +       return NULL;
>> +}
>> +
>> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab)
>> +{
>> +       struct si_context *sctx = priv;
>> +       struct si_resident_descriptor_slab *slab =
>> +               (struct si_resident_descriptor_slab *)pslab;
>> +
>> +       /* Remove the descriptor from the per-context residency list. */
>> +       si_del_resident_descriptor(sctx, slab->buffer);
>> +
>> +       r600_resource_reference(&slab->buffer, NULL);
>> +       FREE(slab->entries);
>> +       FREE(slab);
>> +}
>> +
>>   /* INIT/DEINIT/UPLOAD */
>>
>>   /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>> index 8e55b807ce..5b1ddda321 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -96,6 +96,9 @@ static void si_destroy_context(struct pipe_context *context)
>>          r600_resource_reference(&sctx->last_trace_buf, NULL);
>>          radeon_clear_saved_cs(&sctx->last_gfx);
>>
>> +       pb_slabs_deinit(&sctx->resident_descriptor_slabs);
>> +
>> +       FREE(sctx->resident_descriptors);
>>          FREE(sctx);
>>   }
>>
>> @@ -314,6 +317,13 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>>
>>          sctx->tm = si_create_llvm_target_machine(sscreen);
>>
>> +       /* Create a slab allocator for all resident descriptors. */
>> +       if (!pb_slabs_init(&sctx->resident_descriptor_slabs, 6, 6, 1, sctx,
>> +                          si_resident_descriptor_can_reclaim_slab,
>> +                          si_resident_descriptor_slab_alloc,
>> +                          si_resident_descriptor_slab_free))
>> +               goto fail;
>> +
>>          return &sctx->b.b;
>>   fail:
>>          fprintf(stderr, "radeonsi: Failed to create a context.\n");
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
>> index 13ec0729b1..41b0a2a79f 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -224,6 +224,13 @@ union si_vgt_param_key {
>>          uint32_t index;
>>   };
>>
>> +struct si_resident_descriptor
>> +{
>> +       struct pb_slab_entry            entry;
>> +       struct r600_resource            *buffer;
>> +       unsigned                        offset;
>> +};
>> +
>>   struct si_context {
>>          struct r600_common_context      b;
>>          struct blitter_context          *blitter;
>> @@ -384,6 +391,14 @@ struct si_context {
>>          /* Precomputed IA_MULTI_VGT_PARAM */
>>          union si_vgt_param_key  ia_multi_vgt_param_key;
>>          unsigned                ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];
>> +
>> +       /* Slab allocator for resident descriptors. */
>> +       struct pb_slabs         resident_descriptor_slabs;
>> +
>> +       /* Resident descriptors. */
>> +       struct r600_resource    **resident_descriptors;
>> +       unsigned                num_resident_descriptors;
>> +       unsigned                max_resident_descriptors;
>>   };
>>
>>   /* cik_sdma.c */
>> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
>> index 275f830613..3e9016c84a 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.h
>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>> @@ -30,6 +30,8 @@
>>   #include "si_pm4.h"
>>   #include "radeon/r600_pipe_common.h"
>>
>> +#include "pipebuffer/pb_slab.h"
>> +
>>   #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1)
>>   #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1)
>>
>> @@ -335,6 +337,12 @@ void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
>>                                 uint64_t new_active_mask);
>>   void si_set_active_descriptors_for_shader(struct si_context *sctx,
>>                                            struct si_shader_selector *sel);
>> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
>> +                                            struct pb_slab_entry *entry);
>> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
>> +                                                 unsigned entry_size,
>> +                                                 unsigned group_index);
>> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab);
>>
>>   /* si_state.c */
>>   struct si_shader_selector;
>> --
>> 2.13.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list