[Mesa-dev] [RFC PATCH 48/65] radeonsi: add a slab allocator for resident descriptors
Marek Olšák
maraeo at gmail.com
Fri May 26 14:39:30 UTC 2017
FYI, I've replied on some radeonsi patches and skimmed through the
rest without Rbs. I'll do another review once there is version 2.
Marek
On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> For each texture/image handles, we need to allocate a new
> buffer for the resident descriptor. But when the number of
> buffers added to the current CS becomes high, the overhead
> in the winsys (and in the kernel) is important.
>
> To reduce this bottleneck, the idea is to suballocate the
> resident descriptors using a slab similar to the one used
> in the winsys.
>
> Currently, a buffer can hold 1024 resident descriptors but
> this limit is arbitrary and could be changed in the future
> for some reasons. Once a slab is allocated the "base" buffer
> is added to a per-context residency list.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/gallium/drivers/radeonsi/si_descriptors.c | 150 ++++++++++++++++++++++++++
> src/gallium/drivers/radeonsi/si_pipe.c | 10 ++
> src/gallium/drivers/radeonsi/si_pipe.h | 15 +++
> src/gallium/drivers/radeonsi/si_state.h | 8 ++
> 4 files changed, 183 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 61eb2f10be..d337fc3f11 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -2005,6 +2005,156 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
> sctx->shader_pointers_dirty &= ~compute_mask;
> }
>
> +/* BINDLESS */
> +
> +static int si_add_resident_descriptor(struct si_context *sctx,
> + struct r600_resource *desc)
> +{
> + int idx;
> +
> + /* New resident descriptor, check if the backing array is large enough. */
> + if (sctx->num_resident_descriptors >= sctx->max_resident_descriptors) {
> + unsigned new_max_descriptors =
> + MAX2(1, sctx->max_resident_descriptors * 2);
> + struct r600_resource **new_descriptors =
> + REALLOC(sctx->resident_descriptors,
> + sctx->num_resident_descriptors * (sizeof(*new_descriptors)),
> + new_max_descriptors * sizeof(*new_descriptors));
> +
> + if (new_descriptors) {
> + sctx->resident_descriptors = new_descriptors;
> + sctx->max_resident_descriptors = new_max_descriptors;
> + } else {
> + fprintf(stderr, "si_add_resident_descriptor: "
> + "allocation failed\n");
> + return -1;
> + }
> + }
> +
> + idx = sctx->num_resident_descriptors;
> + sctx->resident_descriptors[idx] = desc;
> + sctx->num_resident_descriptors++;
> +
> + return 0;
> +}
> +
> +static void si_del_resident_descriptor(struct si_context *sctx,
> + struct r600_resource *desc)
> +{
> + unsigned i;
> + int size;
> +
> + for (i = 0; i < sctx->num_resident_descriptors; i++) {
> + if (sctx->resident_descriptors[i] != desc)
> + continue;
> +
> + if (i < sctx->num_resident_descriptors - 1) {
> + size = sizeof(*sctx->resident_descriptors) *
> + (sctx->num_resident_descriptors - 1 - i);
> +
> + memmove(&sctx->resident_descriptors[i],
> + &sctx->resident_descriptors[i + 1], size);
> + }
> +
> + sctx->num_resident_descriptors--;
> + return;
> + }
> +}
> +
> +struct si_resident_descriptor_slab
> +{
> + struct pb_slab base;
> + struct r600_resource *buffer;
> + struct si_resident_descriptor *entries;
> +};
> +
> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
> + struct pb_slab_entry *entry)
> +{
> + struct si_context *sctx = priv;
> + struct radeon_winsys *ws = sctx->b.ws;
> + struct si_resident_descriptor *desc = NULL; /* fix container_of */
> +
> + desc = container_of(entry, desc, entry);
> +
> + if (ws->cs_is_buffer_referenced(sctx->b.gfx.cs, desc->buffer->buf,
> + RADEON_USAGE_READ)) {
> + /* Do not allow to reclaim the buffer if the resident
> + * descriptor is currently used.
> + */
> + return false;
> + }
> +
> + return true;
> +}
> +
> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
> + unsigned entry_size,
> + unsigned group_index)
> +{
> + struct si_context *sctx = priv;
> + struct si_screen *sscreen = sctx->screen;
> + struct si_resident_descriptor_slab *slab;
> +
> + slab = CALLOC_STRUCT(si_resident_descriptor_slab);
> + if (!slab)
> + return NULL;
> +
> + /* Create a buffer in VRAM for 1024 resident descriptors. */
> + slab->buffer = (struct r600_resource *)
> + pipe_buffer_create(&sscreen->b.b, 0,
> + PIPE_USAGE_IMMUTABLE, 64 * 1024);
> + if (!slab->buffer)
> + goto fail;
> +
> + slab->base.num_entries = slab->buffer->bo_size / entry_size;
> + slab->base.num_free = slab->base.num_entries;
> + slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
> + if (!slab->entries)
> + goto fail_buffer;
> +
> + LIST_INITHEAD(&slab->base.free);
> +
> + for (unsigned i = 0; i < slab->base.num_entries; ++i) {
> + struct si_resident_descriptor *desc = &slab->entries[i];
> +
> + desc->entry.slab = &slab->base;
> + desc->entry.group_index = group_index;
> + desc->buffer = slab->buffer;
> + desc->offset = i * entry_size;
> +
> + LIST_ADDTAIL(&desc->entry.head, &slab->base.free);
> + }
> +
> + /* Add the descriptor to the per-context residency list. */
> + if (si_add_resident_descriptor(sctx, slab->buffer))
> + goto fail_desc;
> +
> + return &slab->base;
> +
> +fail_desc:
> + FREE(slab->entries);
> +fail_buffer:
> + r600_resource_reference(&slab->buffer, NULL);
> +fail:
> + FREE(slab);
> + return NULL;
> +}
> +
> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab)
> +{
> + struct si_context *sctx = priv;
> + struct si_resident_descriptor_slab *slab =
> + (struct si_resident_descriptor_slab *)pslab;
> +
> + /* Remove the descriptor from the per-context residency list. */
> + si_del_resident_descriptor(sctx, slab->buffer);
> +
> + r600_resource_reference(&slab->buffer, NULL);
> + FREE(slab->entries);
> + FREE(slab);
> +}
> +
> /* INIT/DEINIT/UPLOAD */
>
> /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 8e55b807ce..5b1ddda321 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -96,6 +96,9 @@ static void si_destroy_context(struct pipe_context *context)
> r600_resource_reference(&sctx->last_trace_buf, NULL);
> radeon_clear_saved_cs(&sctx->last_gfx);
>
> + pb_slabs_deinit(&sctx->resident_descriptor_slabs);
> +
> + FREE(sctx->resident_descriptors);
> FREE(sctx);
> }
>
> @@ -314,6 +317,13 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>
> sctx->tm = si_create_llvm_target_machine(sscreen);
>
> + /* Create a slab allocator for all resident descriptors. */
> + if (!pb_slabs_init(&sctx->resident_descriptor_slabs, 6, 6, 1, sctx,
> + si_resident_descriptor_can_reclaim_slab,
> + si_resident_descriptor_slab_alloc,
> + si_resident_descriptor_slab_free))
> + goto fail;
> +
> return &sctx->b.b;
> fail:
> fprintf(stderr, "radeonsi: Failed to create a context.\n");
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 13ec0729b1..41b0a2a79f 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -224,6 +224,13 @@ union si_vgt_param_key {
> uint32_t index;
> };
>
> +struct si_resident_descriptor
> +{
> + struct pb_slab_entry entry;
> + struct r600_resource *buffer;
> + unsigned offset;
> +};
> +
> struct si_context {
> struct r600_common_context b;
> struct blitter_context *blitter;
> @@ -384,6 +391,14 @@ struct si_context {
> /* Precomputed IA_MULTI_VGT_PARAM */
> union si_vgt_param_key ia_multi_vgt_param_key;
> unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];
> +
> + /* Slab allocator for resident descriptors. */
> + struct pb_slabs resident_descriptor_slabs;
> +
> + /* Resident descriptors. */
> + struct r600_resource **resident_descriptors;
> + unsigned num_resident_descriptors;
> + unsigned max_resident_descriptors;
> };
>
> /* cik_sdma.c */
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 275f830613..3e9016c84a 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -30,6 +30,8 @@
> #include "si_pm4.h"
> #include "radeon/r600_pipe_common.h"
>
> +#include "pipebuffer/pb_slab.h"
> +
> #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1)
> #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1)
>
> @@ -335,6 +337,12 @@ void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
> uint64_t new_active_mask);
> void si_set_active_descriptors_for_shader(struct si_context *sctx,
> struct si_shader_selector *sel);
> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
> + struct pb_slab_entry *entry);
> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
> + unsigned entry_size,
> + unsigned group_index);
> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab);
>
> /* si_state.c */
> struct si_shader_selector;
> --
> 2.13.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list