[Mesa-dev] [PATCH 01/10] radeonsi: merge constant and shader buffers descriptor lists into one
Marek Olšák
maraeo at gmail.com
Thu May 18 10:30:52 UTC 2017
On Thu, May 18, 2017 at 11:28 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 17.05.2017 21:38, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> Constant buffers: slot[16], .. slot[31] (ascending)
>> Shader buffers: slot[15], .. slot[0] (descending)
>>
>> The idea is that if we have 4 constant buffers and 2 shader buffers, we
>> only
>> have to upload 6 slots. That optimization is left for a later commit.
>> ---
>> src/gallium/drivers/radeonsi/si_debug.c | 44 ++++---
>> src/gallium/drivers/radeonsi/si_descriptors.c | 141
>> +++++++++++-----------
>> src/gallium/drivers/radeonsi/si_pipe.h | 3 +-
>> src/gallium/drivers/radeonsi/si_shader.c | 32 ++---
>> src/gallium/drivers/radeonsi/si_shader.h | 20 ++-
>> src/gallium/drivers/radeonsi/si_shader_internal.h | 3 +-
>> src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 13 +-
>> src/gallium/drivers/radeonsi/si_state.h | 25 +++-
>> 8 files changed, 150 insertions(+), 131 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_debug.c
>> b/src/gallium/drivers/radeonsi/si_debug.c
>> index d1159ad..25c3882 100644
>> --- a/src/gallium/drivers/radeonsi/si_debug.c
>> +++ b/src/gallium/drivers/radeonsi/si_debug.c
>> @@ -373,37 +373,38 @@ static void si_dump_framebuffer(struct si_context
>> *sctx, FILE *f)
>> }
>>
>> if (state->zsbuf) {
>> rtex = (struct r600_texture*)state->zsbuf->texture;
>> fprintf(f, COLOR_YELLOW "Depth-stencil buffer:"
>> COLOR_RESET "\n");
>> r600_print_texture_info(sctx->b.screen, rtex, f);
>> fprintf(f, "\n");
>> }
>> }
>>
>> +typedef unsigned (*slot_remap_func)(unsigned);
>> +
>> static void si_dump_descriptor_list(struct si_descriptors *desc,
>> const char *shader_name,
>> const char *elem_name,
>> unsigned num_elements,
>> + slot_remap_func slot_remap,
>> FILE *f)
>> {
>> unsigned i, j;
>> - uint32_t *cpu_list = desc->list;
>> - uint32_t *gpu_list = desc->gpu_list;
>> - const char *list_note = "GPU list";
>> -
>> - if (!gpu_list) {
>> - gpu_list = cpu_list;
>> - list_note = "CPU list";
>> - }
>>
>> for (i = 0; i < num_elements; i++) {
>> + unsigned dw_offset = slot_remap(i) *
>> desc->element_dw_size;
>> + uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list :
>> desc->list;
>> + const char *list_note = desc->gpu_list ? "GPU list" : "CPU
>> list";
>> + uint32_t *cpu_list = desc->list + dw_offset;
>> + uint32_t *gpu_list = gpu_ptr + dw_offset;
>> +
>> fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET
>> "\n",
>> shader_name, elem_name, i, list_note);
>>
>> switch (desc->element_dw_size) {
>> case 4:
>> for (j = 0; j < 4; j++)
>> ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0
>> + j*4,
>> gpu_list[j], 0xffffffff);
>> break;
>> case 8:
>> @@ -437,63 +438,75 @@ static void si_dump_descriptor_list(struct
>> si_descriptors *desc,
>> gpu_list[12+j], 0xffffffff);
>> break;
>> }
>>
>> if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4)
>> != 0) {
>> fprintf(f, COLOR_RED "!!!!! This slot was
>> corrupted in GPU memory !!!!!"
>> COLOR_RESET "\n");
>> }
>>
>> fprintf(f, "\n");
>> - gpu_list += desc->element_dw_size;
>> - cpu_list += desc->element_dw_size;
>> }
>> }
>>
>> +static unsigned si_identity(unsigned slot)
>> +{
>> + return slot;
>> +}
>> +
>> static void si_dump_descriptors(struct si_context *sctx,
>> enum pipe_shader_type processor,
>> const struct tgsi_shader_info *info, FILE
>> *f)
>> {
>> struct si_descriptors *descs =
>> &sctx->descriptors[SI_DESCS_FIRST_SHADER +
>> processor * SI_NUM_SHADER_DESCS];
>> static const char *shader_name[] = {"VS", "PS", "GS", "TCS",
>> "TES", "CS"};
>>
>> static const char *elem_name[] = {
>> " - Constant buffer",
>> " - Shader buffer",
>> " - Sampler",
>> " - Image",
>> };
>> + static const slot_remap_func remap_func[] = {
>> + si_get_constbuf_slot,
>> + si_get_shaderbuf_slot,
>> + si_identity,
>> + si_identity,
>> + };
>> unsigned enabled_slots[] = {
>> - sctx->const_buffers[processor].enabled_mask,
>> - sctx->shader_buffers[processor].enabled_mask,
>> + sctx->const_and_shader_buffers[processor].enabled_mask >>
>> SI_NUM_SHADER_BUFFERS,
>> +
>> util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask &
>> + u_bit_consecutive(0,
>> SI_NUM_SHADER_BUFFERS)),
>> sctx->samplers[processor].views.enabled_mask,
>> sctx->images[processor].enabled_mask,
>> };
>> unsigned required_slots[] = {
>> info ? info->const_buffers_declared : 0,
>> info ? info->shader_buffers_declared : 0,
>> info ? info->samplers_declared : 0,
>> info ? info->images_declared : 0,
>> };
>>
>> if (processor == PIPE_SHADER_VERTEX) {
>> assert(info); /* only CS may not have an info struct */
>>
>> si_dump_descriptor_list(&sctx->vertex_buffers,
>> shader_name[processor],
>> - " - Vertex buffer",
>> info->num_inputs, f);
>> + " - Vertex buffer",
>> info->num_inputs,
>> + si_identity, f);
>> }
>>
>> for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
>> si_dump_descriptor_list(descs, shader_name[processor],
>> elem_name[i],
>> - util_last_bit(enabled_slots[i] |
>> required_slots[i]), f);
>> + util_last_bit(enabled_slots[i] |
>> required_slots[i]),
>> + remap_func[i], f);
>> }
>>
>> static void si_dump_gfx_descriptors(struct si_context *sctx,
>> const struct si_shader_ctx_state
>> *state,
>> FILE *f)
>> {
>> if (!state->cso || !state->current)
>> return;
>>
>> si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
>> @@ -798,21 +811,22 @@ static void si_dump_debug_state(struct pipe_context
>> *ctx, FILE *f,
>> si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
>> si_dump_compute_shader(sctx->screen,
>> &sctx->cs_shader_state, f);
>>
>> if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
>> si_dump_annotated_shaders(sctx, f);
>> si_dump_command("Active waves (raw data)", "umr
>> -wa | column -t", f);
>> si_dump_command("Wave information", "umr -O bits
>> -wa", f);
>> }
>>
>>
>> si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
>> - "", "RW buffers",
>> SI_NUM_RW_BUFFERS, f);
>> + "", "RW buffers",
>> SI_NUM_RW_BUFFERS,
>> + si_identity, f);
>> si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
>> si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
>> si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
>> si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
>> si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
>> si_dump_compute_descriptors(sctx, f);
>> }
>>
>> if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
>> si_dump_bo_list(sctx, &sctx->last_gfx, f);
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index c92a657..5dc7068 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -929,25 +929,29 @@ static void si_bind_sampler_states(struct
>> pipe_context *ctx,
>> }
>> }
>>
>> /* BUFFER RESOURCES */
>>
>> static void si_init_buffer_resources(struct si_buffer_resources *buffers,
>> struct si_descriptors *descs,
>> unsigned num_buffers,
>> unsigned shader_userdata_index,
>> enum radeon_bo_usage shader_usage,
>> + enum radeon_bo_usage
>> shader_usage_constbuf,
>> enum radeon_bo_priority priority,
>> + enum radeon_bo_priority
>> priority_constbuf,
>> unsigned *ce_offset)
>> {
>> buffers->shader_usage = shader_usage;
>> + buffers->shader_usage_constbuf = shader_usage_constbuf;
>> buffers->priority = priority;
>> + buffers->priority_constbuf = priority_constbuf;
>> buffers->buffers = CALLOC(num_buffers, sizeof(struct
>> pipe_resource*));
>>
>> si_init_descriptors(descs, shader_userdata_index, 4,
>> num_buffers, NULL, ce_offset);
>> }
>>
>> static void si_release_buffer_resources(struct si_buffer_resources
>> *buffers,
>> struct si_descriptors *descs)
>> {
>> int i;
>> @@ -962,22 +966,25 @@ static void si_release_buffer_resources(struct
>> si_buffer_resources *buffers,
>> static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
>> struct si_buffer_resources
>> *buffers)
>> {
>> unsigned mask = buffers->enabled_mask;
>>
>> /* Add buffers to the CS. */
>> while (mask) {
>> int i = u_bit_scan(&mask);
>>
>> radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>> - (struct
>> r600_resource*)buffers->buffers[i],
>> - buffers->shader_usage,
>> buffers->priority);
>> + r600_resource(buffers->buffers[i]),
>> + i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage
>> :
>> +
>> buffers->shader_usage_constbuf,
>> + i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
>> +
>> buffers->priority_constbuf);
>> }
>> }
>>
>> static void si_get_buffer_from_descriptors(struct si_buffer_resources
>> *buffers,
>> struct si_descriptors *descs,
>> unsigned idx, struct
>> pipe_resource **buf,
>> unsigned *offset, unsigned
>> *size)
>> {
>> pipe_resource_reference(buf, buffers->buffers[idx]);
>> if (*buf) {
>> @@ -1112,30 +1119,30 @@ bool si_upload_vertex_buffer_descriptors(struct
>> si_context *sctx)
>> si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
>> sctx->vertex_buffers_dirty = false;
>> sctx->vertex_buffer_pointer_dirty = true;
>> return true;
>> }
>>
>>
>> /* CONSTANT BUFFERS */
>>
>> static unsigned
>> -si_const_buffer_descriptors_idx(unsigned shader)
>> +si_const_and_shader_buffer_descriptors_idx(unsigned shader)
>> {
>> return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
>> - SI_SHADER_DESCS_CONST_BUFFERS;
>> + SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
>> }
>>
>> static struct si_descriptors *
>> -si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
>> +si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned
>> shader)
>> {
>> - return
>> &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
>> + return
>> &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
>> }
>>
>> void si_upload_const_buffer(struct si_context *sctx, struct r600_resource
>> **rbuffer,
>> const uint8_t *ptr, unsigned size, uint32_t
>> *const_offset)
>> {
>> void *tmp;
>>
>> u_upload_alloc(sctx->b.b.const_uploader, 0, size,
>> si_optimal_tcc_alignment(sctx, size),
>> const_offset,
>> @@ -1192,22 +1199,22 @@ static void si_set_constant_buffer(struct
>> si_context *sctx,
>> desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>> S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>> S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>> S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>>
>> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>>
>> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
>>
>> buffers->buffers[slot] = buffer;
>> radeon_add_to_buffer_list_check_mem(&sctx->b,
>> &sctx->b.gfx,
>> (struct
>> r600_resource*)buffer,
>> - buffers->shader_usage,
>> - buffers->priority,
>> true);
>> +
>> buffers->shader_usage_constbuf,
>> +
>> buffers->priority_constbuf, true);
>> buffers->enabled_mask |= 1u << slot;
>> } else {
>> /* Clear the descriptor. */
>> memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
>> buffers->enabled_mask &= ~(1u << slot);
>> }
>>
>> descs->dirty_mask |= 1u << slot;
>> sctx->descriptors_dirty |= 1u << descriptors_idx;
>> }
>> @@ -1221,77 +1228,64 @@ void si_set_rw_buffer(struct si_context *sctx,
>>
>> static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
>> enum pipe_shader_type shader, uint
>> slot,
>> const struct pipe_constant_buffer
>> *input)
>> {
>> struct si_context *sctx = (struct si_context *)ctx;
>>
>> if (shader >= SI_NUM_SHADERS)
>> return;
>>
>> - si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
>> - si_const_buffer_descriptors_idx(shader),
>> + slot = si_get_constbuf_slot(slot);
>> + si_set_constant_buffer(sctx,
>> &sctx->const_and_shader_buffers[shader],
>> +
>> si_const_and_shader_buffer_descriptors_idx(shader),
>> slot, input);
>> }
>>
>> void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
>> uint slot, struct pipe_constant_buffer
>> *cbuf)
>> {
>> cbuf->user_buffer = NULL;
>> si_get_buffer_from_descriptors(
>> - &sctx->const_buffers[shader],
>> - si_const_buffer_descriptors(sctx, shader),
>> + &sctx->const_and_shader_buffers[shader],
>> + si_const_and_shader_buffer_descriptors(sctx, shader),
>> slot, &cbuf->buffer, &cbuf->buffer_offset,
>> &cbuf->buffer_size);
>
>
> slot needs to be remapped here as well, doesn't it?
Yes, I just need to use si_get_constbuf_slot(slot).
>
> With that fixed, the patch is:
>
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
Thanks.
Marek
More information about the mesa-dev
mailing list