[Mesa-dev] [PATCH 01/10] radeonsi: merge constant and shader buffers descriptor lists into one

Marek Olšák maraeo at gmail.com
Thu May 18 10:30:52 UTC 2017


On Thu, May 18, 2017 at 11:28 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 17.05.2017 21:38, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> Constant buffers: slot[16], .. slot[31] (ascending)
>> Shader buffers: slot[15], .. slot[0] (descending)
>>
>> The idea is that if we have 4 constant buffers and 2 shader buffers, we
>> only
>> have to upload 6 slots. That optimization is left for a later commit.
>> ---
>>  src/gallium/drivers/radeonsi/si_debug.c           |  44 ++++---
>>  src/gallium/drivers/radeonsi/si_descriptors.c     | 141
>> +++++++++++-----------
>>  src/gallium/drivers/radeonsi/si_pipe.h            |   3 +-
>>  src/gallium/drivers/radeonsi/si_shader.c          |  32 ++---
>>  src/gallium/drivers/radeonsi/si_shader.h          |  20 ++-
>>  src/gallium/drivers/radeonsi/si_shader_internal.h |   3 +-
>>  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  13 +-
>>  src/gallium/drivers/radeonsi/si_state.h           |  25 +++-
>>  8 files changed, 150 insertions(+), 131 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_debug.c
>> b/src/gallium/drivers/radeonsi/si_debug.c
>> index d1159ad..25c3882 100644
>> --- a/src/gallium/drivers/radeonsi/si_debug.c
>> +++ b/src/gallium/drivers/radeonsi/si_debug.c
>> @@ -373,37 +373,38 @@ static void si_dump_framebuffer(struct si_context
>> *sctx, FILE *f)
>>         }
>>
>>         if (state->zsbuf) {
>>                 rtex = (struct r600_texture*)state->zsbuf->texture;
>>                 fprintf(f, COLOR_YELLOW "Depth-stencil buffer:"
>> COLOR_RESET "\n");
>>                 r600_print_texture_info(sctx->b.screen, rtex, f);
>>                 fprintf(f, "\n");
>>         }
>>  }
>>
>> +typedef unsigned (*slot_remap_func)(unsigned);
>> +
>>  static void si_dump_descriptor_list(struct si_descriptors *desc,
>>                                     const char *shader_name,
>>                                     const char *elem_name,
>>                                     unsigned num_elements,
>> +                                   slot_remap_func slot_remap,
>>                                     FILE *f)
>>  {
>>         unsigned i, j;
>> -       uint32_t *cpu_list = desc->list;
>> -       uint32_t *gpu_list = desc->gpu_list;
>> -       const char *list_note = "GPU list";
>> -
>> -       if (!gpu_list) {
>> -               gpu_list = cpu_list;
>> -               list_note = "CPU list";
>> -       }
>>
>>         for (i = 0; i < num_elements; i++) {
>> +               unsigned dw_offset = slot_remap(i) *
>> desc->element_dw_size;
>> +               uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list :
>> desc->list;
>> +               const char *list_note = desc->gpu_list ? "GPU list" : "CPU
>> list";
>> +               uint32_t *cpu_list = desc->list + dw_offset;
>> +               uint32_t *gpu_list = gpu_ptr + dw_offset;
>> +
>>                 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET
>> "\n",
>>                         shader_name, elem_name, i, list_note);
>>
>>                 switch (desc->element_dw_size) {
>>                 case 4:
>>                         for (j = 0; j < 4; j++)
>>                                 ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0
>> + j*4,
>>                                             gpu_list[j], 0xffffffff);
>>                         break;
>>                 case 8:
>> @@ -437,63 +438,75 @@ static void si_dump_descriptor_list(struct
>> si_descriptors *desc,
>>                                             gpu_list[12+j], 0xffffffff);
>>                         break;
>>                 }
>>
>>                 if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4)
>> != 0) {
>>                         fprintf(f, COLOR_RED "!!!!! This slot was
>> corrupted in GPU memory !!!!!"
>>                                 COLOR_RESET "\n");
>>                 }
>>
>>                 fprintf(f, "\n");
>> -               gpu_list += desc->element_dw_size;
>> -               cpu_list += desc->element_dw_size;
>>         }
>>  }
>>
>> +static unsigned si_identity(unsigned slot)
>> +{
>> +       return slot;
>> +}
>> +
>>  static void si_dump_descriptors(struct si_context *sctx,
>>                                 enum pipe_shader_type processor,
>>                                 const struct tgsi_shader_info *info, FILE
>> *f)
>>  {
>>         struct si_descriptors *descs =
>>                 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
>>                                    processor * SI_NUM_SHADER_DESCS];
>>         static const char *shader_name[] = {"VS", "PS", "GS", "TCS",
>> "TES", "CS"};
>>
>>         static const char *elem_name[] = {
>>                 " - Constant buffer",
>>                 " - Shader buffer",
>>                 " - Sampler",
>>                 " - Image",
>>         };
>> +       static const slot_remap_func remap_func[] = {
>> +               si_get_constbuf_slot,
>> +               si_get_shaderbuf_slot,
>> +               si_identity,
>> +               si_identity,
>> +       };
>>         unsigned enabled_slots[] = {
>> -               sctx->const_buffers[processor].enabled_mask,
>> -               sctx->shader_buffers[processor].enabled_mask,
>> +               sctx->const_and_shader_buffers[processor].enabled_mask >>
>> SI_NUM_SHADER_BUFFERS,
>> +
>> util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask &
>> +                               u_bit_consecutive(0,
>> SI_NUM_SHADER_BUFFERS)),
>>                 sctx->samplers[processor].views.enabled_mask,
>>                 sctx->images[processor].enabled_mask,
>>         };
>>         unsigned required_slots[] = {
>>                 info ? info->const_buffers_declared : 0,
>>                 info ? info->shader_buffers_declared : 0,
>>                 info ? info->samplers_declared : 0,
>>                 info ? info->images_declared : 0,
>>         };
>>
>>         if (processor == PIPE_SHADER_VERTEX) {
>>                 assert(info); /* only CS may not have an info struct */
>>
>>                 si_dump_descriptor_list(&sctx->vertex_buffers,
>> shader_name[processor],
>> -                                       " - Vertex buffer",
>> info->num_inputs, f);
>> +                                       " - Vertex buffer",
>> info->num_inputs,
>> +                                       si_identity, f);
>>         }
>>
>>         for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
>>                 si_dump_descriptor_list(descs, shader_name[processor],
>> elem_name[i],
>> -                                       util_last_bit(enabled_slots[i] |
>> required_slots[i]), f);
>> +                                       util_last_bit(enabled_slots[i] |
>> required_slots[i]),
>> +                                       remap_func[i], f);
>>  }
>>
>>  static void si_dump_gfx_descriptors(struct si_context *sctx,
>>                                     const struct si_shader_ctx_state
>> *state,
>>                                     FILE *f)
>>  {
>>         if (!state->cso || !state->current)
>>                 return;
>>
>>         si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
>> @@ -798,21 +811,22 @@ static void si_dump_debug_state(struct pipe_context
>> *ctx, FILE *f,
>>                 si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
>>                 si_dump_compute_shader(sctx->screen,
>> &sctx->cs_shader_state, f);
>>
>>                 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
>>                         si_dump_annotated_shaders(sctx, f);
>>                         si_dump_command("Active waves (raw data)", "umr
>> -wa | column -t", f);
>>                         si_dump_command("Wave information", "umr -O bits
>> -wa", f);
>>                 }
>>
>>
>> si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
>> -                                       "", "RW buffers",
>> SI_NUM_RW_BUFFERS, f);
>> +                                       "", "RW buffers",
>> SI_NUM_RW_BUFFERS,
>> +                                       si_identity, f);
>>                 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
>>                 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
>>                 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
>>                 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
>>                 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
>>                 si_dump_compute_descriptors(sctx, f);
>>         }
>>
>>         if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
>>                 si_dump_bo_list(sctx, &sctx->last_gfx, f);
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index c92a657..5dc7068 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -929,25 +929,29 @@ static void si_bind_sampler_states(struct
>> pipe_context *ctx,
>>         }
>>  }
>>
>>  /* BUFFER RESOURCES */
>>
>>  static void si_init_buffer_resources(struct si_buffer_resources *buffers,
>>                                      struct si_descriptors *descs,
>>                                      unsigned num_buffers,
>>                                      unsigned shader_userdata_index,
>>                                      enum radeon_bo_usage shader_usage,
>> +                                    enum radeon_bo_usage
>> shader_usage_constbuf,
>>                                      enum radeon_bo_priority priority,
>> +                                    enum radeon_bo_priority
>> priority_constbuf,
>>                                      unsigned *ce_offset)
>>  {
>>         buffers->shader_usage = shader_usage;
>> +       buffers->shader_usage_constbuf = shader_usage_constbuf;
>>         buffers->priority = priority;
>> +       buffers->priority_constbuf = priority_constbuf;
>>         buffers->buffers = CALLOC(num_buffers, sizeof(struct
>> pipe_resource*));
>>
>>         si_init_descriptors(descs, shader_userdata_index, 4,
>>                             num_buffers, NULL, ce_offset);
>>  }
>>
>>  static void si_release_buffer_resources(struct si_buffer_resources
>> *buffers,
>>                                         struct si_descriptors *descs)
>>  {
>>         int i;
>> @@ -962,22 +966,25 @@ static void si_release_buffer_resources(struct
>> si_buffer_resources *buffers,
>>  static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
>>                                              struct si_buffer_resources
>> *buffers)
>>  {
>>         unsigned mask = buffers->enabled_mask;
>>
>>         /* Add buffers to the CS. */
>>         while (mask) {
>>                 int i = u_bit_scan(&mask);
>>
>>                 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>> -                                     (struct
>> r600_resource*)buffers->buffers[i],
>> -                                     buffers->shader_usage,
>> buffers->priority);
>> +                       r600_resource(buffers->buffers[i]),
>> +                       i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage
>> :
>> +
>> buffers->shader_usage_constbuf,
>> +                       i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
>> +
>> buffers->priority_constbuf);
>>         }
>>  }
>>
>>  static void si_get_buffer_from_descriptors(struct si_buffer_resources
>> *buffers,
>>                                            struct si_descriptors *descs,
>>                                            unsigned idx, struct
>> pipe_resource **buf,
>>                                            unsigned *offset, unsigned
>> *size)
>>  {
>>         pipe_resource_reference(buf, buffers->buffers[idx]);
>>         if (*buf) {
>> @@ -1112,30 +1119,30 @@ bool si_upload_vertex_buffer_descriptors(struct
>> si_context *sctx)
>>                 si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
>>         sctx->vertex_buffers_dirty = false;
>>         sctx->vertex_buffer_pointer_dirty = true;
>>         return true;
>>  }
>>
>>
>>  /* CONSTANT BUFFERS */
>>
>>  static unsigned
>> -si_const_buffer_descriptors_idx(unsigned shader)
>> +si_const_and_shader_buffer_descriptors_idx(unsigned shader)
>>  {
>>         return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
>> -              SI_SHADER_DESCS_CONST_BUFFERS;
>> +              SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
>>  }
>>
>>  static struct si_descriptors *
>> -si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
>> +si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned
>> shader)
>>  {
>> -       return
>> &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
>> +       return
>> &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
>>  }
>>
>>  void si_upload_const_buffer(struct si_context *sctx, struct r600_resource
>> **rbuffer,
>>                             const uint8_t *ptr, unsigned size, uint32_t
>> *const_offset)
>>  {
>>         void *tmp;
>>
>>         u_upload_alloc(sctx->b.b.const_uploader, 0, size,
>>                        si_optimal_tcc_alignment(sctx, size),
>>                        const_offset,
>> @@ -1192,22 +1199,22 @@ static void si_set_constant_buffer(struct
>> si_context *sctx,
>>                 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>>                           S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>>                           S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>>                           S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>>
>> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>>
>> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
>>
>>                 buffers->buffers[slot] = buffer;
>>                 radeon_add_to_buffer_list_check_mem(&sctx->b,
>> &sctx->b.gfx,
>>                                                     (struct
>> r600_resource*)buffer,
>> -                                                   buffers->shader_usage,
>> -                                                   buffers->priority,
>> true);
>> +
>> buffers->shader_usage_constbuf,
>> +
>> buffers->priority_constbuf, true);
>>                 buffers->enabled_mask |= 1u << slot;
>>         } else {
>>                 /* Clear the descriptor. */
>>                 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
>>                 buffers->enabled_mask &= ~(1u << slot);
>>         }
>>
>>         descs->dirty_mask |= 1u << slot;
>>         sctx->descriptors_dirty |= 1u << descriptors_idx;
>>  }
>> @@ -1221,77 +1228,64 @@ void si_set_rw_buffer(struct si_context *sctx,
>>
>>  static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
>>                                         enum pipe_shader_type shader, uint
>> slot,
>>                                         const struct pipe_constant_buffer
>> *input)
>>  {
>>         struct si_context *sctx = (struct si_context *)ctx;
>>
>>         if (shader >= SI_NUM_SHADERS)
>>                 return;
>>
>> -       si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
>> -                              si_const_buffer_descriptors_idx(shader),
>> +       slot = si_get_constbuf_slot(slot);
>> +       si_set_constant_buffer(sctx,
>> &sctx->const_and_shader_buffers[shader],
>> +
>> si_const_and_shader_buffer_descriptors_idx(shader),
>>                                slot, input);
>>  }
>>
>>  void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
>>                                  uint slot, struct pipe_constant_buffer
>> *cbuf)
>>  {
>>         cbuf->user_buffer = NULL;
>>         si_get_buffer_from_descriptors(
>> -               &sctx->const_buffers[shader],
>> -               si_const_buffer_descriptors(sctx, shader),
>> +               &sctx->const_and_shader_buffers[shader],
>> +               si_const_and_shader_buffer_descriptors(sctx, shader),
>>                 slot, &cbuf->buffer, &cbuf->buffer_offset,
>> &cbuf->buffer_size);
>
>
> slot needs to be remapped here as well, doesn't it?

Yes, I just need to use si_get_constbuf_slot(slot).

>
> With that fixed, the patch is:
>
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

Thanks.

Marek


More information about the mesa-dev mailing list