[Mesa-dev] [RFC PATCH 48/65] radeonsi: add a slab allocator for resident descriptors
Samuel Pitoiset
samuel.pitoiset at gmail.com
Fri May 26 14:43:16 UTC 2017
On 05/26/2017 04:39 PM, Marek Olšák wrote:
> FYI, I've replied on some radeonsi patches and skimmed through the
> rest without Rbs. I'll do another review once there is version 2.
Looks good to me.
>
> Marek
>
> On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> For each texture/image handles, we need to allocate a new
>> buffer for the resident descriptor. But when the number of
>> buffers added to the current CS becomes high, the overhead
>> in the winsys (and in the kernel) is important.
>>
>> To reduce this bottleneck, the idea is to suballocate the
>> resident descriptors using a slab similar to the one used
>> in the winsys.
>>
>> Currently, a buffer can hold 1024 resident descriptors but
>> this limit is arbitrary and could be changed in the future
>> for some reasons. Once a slab is allocated the "base" buffer
>> is added to a per-context residency list.
>>
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>> src/gallium/drivers/radeonsi/si_descriptors.c | 150 ++++++++++++++++++++++++++
>> src/gallium/drivers/radeonsi/si_pipe.c | 10 ++
>> src/gallium/drivers/radeonsi/si_pipe.h | 15 +++
>> src/gallium/drivers/radeonsi/si_state.h | 8 ++
>> 4 files changed, 183 insertions(+)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index 61eb2f10be..d337fc3f11 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -2005,6 +2005,156 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
>> sctx->shader_pointers_dirty &= ~compute_mask;
>> }
>>
>> +/* BINDLESS */
>> +
>> +static int si_add_resident_descriptor(struct si_context *sctx,
>> + struct r600_resource *desc)
>> +{
>> + int idx;
>> +
>> + /* New resident descriptor, check if the backing array is large enough. */
>> + if (sctx->num_resident_descriptors >= sctx->max_resident_descriptors) {
>> + unsigned new_max_descriptors =
>> + MAX2(1, sctx->max_resident_descriptors * 2);
>> + struct r600_resource **new_descriptors =
>> + REALLOC(sctx->resident_descriptors,
>> + sctx->num_resident_descriptors * (sizeof(*new_descriptors)),
>> + new_max_descriptors * sizeof(*new_descriptors));
>> +
>> + if (new_descriptors) {
>> + sctx->resident_descriptors = new_descriptors;
>> + sctx->max_resident_descriptors = new_max_descriptors;
>> + } else {
>> + fprintf(stderr, "si_add_resident_descriptor: "
>> + "allocation failed\n");
>> + return -1;
>> + }
>> + }
>> +
>> + idx = sctx->num_resident_descriptors;
>> + sctx->resident_descriptors[idx] = desc;
>> + sctx->num_resident_descriptors++;
>> +
>> + return 0;
>> +}
>> +
>> +static void si_del_resident_descriptor(struct si_context *sctx,
>> + struct r600_resource *desc)
>> +{
>> + unsigned i;
>> + int size;
>> +
>> + for (i = 0; i < sctx->num_resident_descriptors; i++) {
>> + if (sctx->resident_descriptors[i] != desc)
>> + continue;
>> +
>> + if (i < sctx->num_resident_descriptors - 1) {
>> + size = sizeof(*sctx->resident_descriptors) *
>> + (sctx->num_resident_descriptors - 1 - i);
>> +
>> + memmove(&sctx->resident_descriptors[i],
>> + &sctx->resident_descriptors[i + 1], size);
>> + }
>> +
>> + sctx->num_resident_descriptors--;
>> + return;
>> + }
>> +}
>> +
>> +struct si_resident_descriptor_slab
>> +{
>> + struct pb_slab base;
>> + struct r600_resource *buffer;
>> + struct si_resident_descriptor *entries;
>> +};
>> +
>> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
>> + struct pb_slab_entry *entry)
>> +{
>> + struct si_context *sctx = priv;
>> + struct radeon_winsys *ws = sctx->b.ws;
>> + struct si_resident_descriptor *desc = NULL; /* fix container_of */
>> +
>> + desc = container_of(entry, desc, entry);
>> +
>> + if (ws->cs_is_buffer_referenced(sctx->b.gfx.cs, desc->buffer->buf,
>> + RADEON_USAGE_READ)) {
>> + /* Do not allow to reclaim the buffer if the resident
>> + * descriptor is currently used.
>> + */
>> + return false;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
>> + unsigned entry_size,
>> + unsigned group_index)
>> +{
>> + struct si_context *sctx = priv;
>> + struct si_screen *sscreen = sctx->screen;
>> + struct si_resident_descriptor_slab *slab;
>> +
>> + slab = CALLOC_STRUCT(si_resident_descriptor_slab);
>> + if (!slab)
>> + return NULL;
>> +
>> + /* Create a buffer in VRAM for 1024 resident descriptors. */
>> + slab->buffer = (struct r600_resource *)
>> + pipe_buffer_create(&sscreen->b.b, 0,
>> + PIPE_USAGE_IMMUTABLE, 64 * 1024);
>> + if (!slab->buffer)
>> + goto fail;
>> +
>> + slab->base.num_entries = slab->buffer->bo_size / entry_size;
>> + slab->base.num_free = slab->base.num_entries;
>> + slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
>> + if (!slab->entries)
>> + goto fail_buffer;
>> +
>> + LIST_INITHEAD(&slab->base.free);
>> +
>> + for (unsigned i = 0; i < slab->base.num_entries; ++i) {
>> + struct si_resident_descriptor *desc = &slab->entries[i];
>> +
>> + desc->entry.slab = &slab->base;
>> + desc->entry.group_index = group_index;
>> + desc->buffer = slab->buffer;
>> + desc->offset = i * entry_size;
>> +
>> + LIST_ADDTAIL(&desc->entry.head, &slab->base.free);
>> + }
>> +
>> + /* Add the descriptor to the per-context residency list. */
>> + if (si_add_resident_descriptor(sctx, slab->buffer))
>> + goto fail_desc;
>> +
>> + return &slab->base;
>> +
>> +fail_desc:
>> + FREE(slab->entries);
>> +fail_buffer:
>> + r600_resource_reference(&slab->buffer, NULL);
>> +fail:
>> + FREE(slab);
>> + return NULL;
>> +}
>> +
>> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab)
>> +{
>> + struct si_context *sctx = priv;
>> + struct si_resident_descriptor_slab *slab =
>> + (struct si_resident_descriptor_slab *)pslab;
>> +
>> + /* Remove the descriptor from the per-context residency list. */
>> + si_del_resident_descriptor(sctx, slab->buffer);
>> +
>> + r600_resource_reference(&slab->buffer, NULL);
>> + FREE(slab->entries);
>> + FREE(slab);
>> +}
>> +
>> /* INIT/DEINIT/UPLOAD */
>>
>> /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>> index 8e55b807ce..5b1ddda321 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -96,6 +96,9 @@ static void si_destroy_context(struct pipe_context *context)
>> r600_resource_reference(&sctx->last_trace_buf, NULL);
>> radeon_clear_saved_cs(&sctx->last_gfx);
>>
>> + pb_slabs_deinit(&sctx->resident_descriptor_slabs);
>> +
>> + FREE(sctx->resident_descriptors);
>> FREE(sctx);
>> }
>>
>> @@ -314,6 +317,13 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>>
>> sctx->tm = si_create_llvm_target_machine(sscreen);
>>
>> + /* Create a slab allocator for all resident descriptors. */
>> + if (!pb_slabs_init(&sctx->resident_descriptor_slabs, 6, 6, 1, sctx,
>> + si_resident_descriptor_can_reclaim_slab,
>> + si_resident_descriptor_slab_alloc,
>> + si_resident_descriptor_slab_free))
>> + goto fail;
>> +
>> return &sctx->b.b;
>> fail:
>> fprintf(stderr, "radeonsi: Failed to create a context.\n");
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
>> index 13ec0729b1..41b0a2a79f 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -224,6 +224,13 @@ union si_vgt_param_key {
>> uint32_t index;
>> };
>>
>> +struct si_resident_descriptor
>> +{
>> + struct pb_slab_entry entry;
>> + struct r600_resource *buffer;
>> + unsigned offset;
>> +};
>> +
>> struct si_context {
>> struct r600_common_context b;
>> struct blitter_context *blitter;
>> @@ -384,6 +391,14 @@ struct si_context {
>> /* Precomputed IA_MULTI_VGT_PARAM */
>> union si_vgt_param_key ia_multi_vgt_param_key;
>> unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];
>> +
>> + /* Slab allocator for resident descriptors. */
>> + struct pb_slabs resident_descriptor_slabs;
>> +
>> + /* Resident descriptors. */
>> + struct r600_resource **resident_descriptors;
>> + unsigned num_resident_descriptors;
>> + unsigned max_resident_descriptors;
>> };
>>
>> /* cik_sdma.c */
>> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
>> index 275f830613..3e9016c84a 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.h
>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>> @@ -30,6 +30,8 @@
>> #include "si_pm4.h"
>> #include "radeon/r600_pipe_common.h"
>>
>> +#include "pipebuffer/pb_slab.h"
>> +
>> #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1)
>> #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1)
>>
>> @@ -335,6 +337,12 @@ void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
>> uint64_t new_active_mask);
>> void si_set_active_descriptors_for_shader(struct si_context *sctx,
>> struct si_shader_selector *sel);
>> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
>> + struct pb_slab_entry *entry);
>> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
>> + unsigned entry_size,
>> + unsigned group_index);
>> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab);
>>
>> /* si_state.c */
>> struct si_shader_selector;
>> --
>> 2.13.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list