[Mesa-dev] [PATCH 01/10] radeonsi: merge constant and shader buffers descriptor lists into one
Nicolai Hähnle
nhaehnle at gmail.com
Thu May 18 09:28:35 UTC 2017
On 17.05.2017 21:38, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Constant buffers: slot[16], .. slot[31] (ascending)
> Shader buffers: slot[15], .. slot[0] (descending)
>
> The idea is that if we have 4 constant buffers and 2 shader buffers, we only
> have to upload 6 slots. That optimization is left for a later commit.
> ---
> src/gallium/drivers/radeonsi/si_debug.c | 44 ++++---
> src/gallium/drivers/radeonsi/si_descriptors.c | 141 +++++++++++-----------
> src/gallium/drivers/radeonsi/si_pipe.h | 3 +-
> src/gallium/drivers/radeonsi/si_shader.c | 32 ++---
> src/gallium/drivers/radeonsi/si_shader.h | 20 ++-
> src/gallium/drivers/radeonsi/si_shader_internal.h | 3 +-
> src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 13 +-
> src/gallium/drivers/radeonsi/si_state.h | 25 +++-
> 8 files changed, 150 insertions(+), 131 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
> index d1159ad..25c3882 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -373,37 +373,38 @@ static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
> }
>
> if (state->zsbuf) {
> rtex = (struct r600_texture*)state->zsbuf->texture;
> fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
> r600_print_texture_info(sctx->b.screen, rtex, f);
> fprintf(f, "\n");
> }
> }
>
> +typedef unsigned (*slot_remap_func)(unsigned);
> +
> static void si_dump_descriptor_list(struct si_descriptors *desc,
> const char *shader_name,
> const char *elem_name,
> unsigned num_elements,
> + slot_remap_func slot_remap,
> FILE *f)
> {
> unsigned i, j;
> - uint32_t *cpu_list = desc->list;
> - uint32_t *gpu_list = desc->gpu_list;
> - const char *list_note = "GPU list";
> -
> - if (!gpu_list) {
> - gpu_list = cpu_list;
> - list_note = "CPU list";
> - }
>
> for (i = 0; i < num_elements; i++) {
> + unsigned dw_offset = slot_remap(i) * desc->element_dw_size;
> + uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list;
> + const char *list_note = desc->gpu_list ? "GPU list" : "CPU list";
> + uint32_t *cpu_list = desc->list + dw_offset;
> + uint32_t *gpu_list = gpu_ptr + dw_offset;
> +
> fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
> shader_name, elem_name, i, list_note);
>
> switch (desc->element_dw_size) {
> case 4:
> for (j = 0; j < 4; j++)
> ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
> gpu_list[j], 0xffffffff);
> break;
> case 8:
> @@ -437,63 +438,75 @@ static void si_dump_descriptor_list(struct si_descriptors *desc,
> gpu_list[12+j], 0xffffffff);
> break;
> }
>
> if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
> fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
> COLOR_RESET "\n");
> }
>
> fprintf(f, "\n");
> - gpu_list += desc->element_dw_size;
> - cpu_list += desc->element_dw_size;
> }
> }
>
> +static unsigned si_identity(unsigned slot)
> +{
> + return slot;
> +}
> +
> static void si_dump_descriptors(struct si_context *sctx,
> enum pipe_shader_type processor,
> const struct tgsi_shader_info *info, FILE *f)
> {
> struct si_descriptors *descs =
> &sctx->descriptors[SI_DESCS_FIRST_SHADER +
> processor * SI_NUM_SHADER_DESCS];
> static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
>
> static const char *elem_name[] = {
> " - Constant buffer",
> " - Shader buffer",
> " - Sampler",
> " - Image",
> };
> + static const slot_remap_func remap_func[] = {
> + si_get_constbuf_slot,
> + si_get_shaderbuf_slot,
> + si_identity,
> + si_identity,
> + };
> unsigned enabled_slots[] = {
> - sctx->const_buffers[processor].enabled_mask,
> - sctx->shader_buffers[processor].enabled_mask,
> + sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS,
> + util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask &
> + u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS)),
> sctx->samplers[processor].views.enabled_mask,
> sctx->images[processor].enabled_mask,
> };
> unsigned required_slots[] = {
> info ? info->const_buffers_declared : 0,
> info ? info->shader_buffers_declared : 0,
> info ? info->samplers_declared : 0,
> info ? info->images_declared : 0,
> };
>
> if (processor == PIPE_SHADER_VERTEX) {
> assert(info); /* only CS may not have an info struct */
>
> si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor],
> - " - Vertex buffer", info->num_inputs, f);
> + " - Vertex buffer", info->num_inputs,
> + si_identity, f);
> }
>
> for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
> si_dump_descriptor_list(descs, shader_name[processor], elem_name[i],
> - util_last_bit(enabled_slots[i] | required_slots[i]), f);
> + util_last_bit(enabled_slots[i] | required_slots[i]),
> + remap_func[i], f);
> }
>
> static void si_dump_gfx_descriptors(struct si_context *sctx,
> const struct si_shader_ctx_state *state,
> FILE *f)
> {
> if (!state->cso || !state->current)
> return;
>
> si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
> @@ -798,21 +811,22 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
> si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
> si_dump_compute_shader(sctx->screen, &sctx->cs_shader_state, f);
>
> if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
> si_dump_annotated_shaders(sctx, f);
> si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
> si_dump_command("Wave information", "umr -O bits -wa", f);
> }
>
> si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
> - "", "RW buffers", SI_NUM_RW_BUFFERS, f);
> + "", "RW buffers", SI_NUM_RW_BUFFERS,
> + si_identity, f);
> si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
> si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
> si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
> si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
> si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
> si_dump_compute_descriptors(sctx, f);
> }
>
> if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
> si_dump_bo_list(sctx, &sctx->last_gfx, f);
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index c92a657..5dc7068 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -929,25 +929,29 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
> }
> }
>
> /* BUFFER RESOURCES */
>
> static void si_init_buffer_resources(struct si_buffer_resources *buffers,
> struct si_descriptors *descs,
> unsigned num_buffers,
> unsigned shader_userdata_index,
> enum radeon_bo_usage shader_usage,
> + enum radeon_bo_usage shader_usage_constbuf,
> enum radeon_bo_priority priority,
> + enum radeon_bo_priority priority_constbuf,
> unsigned *ce_offset)
> {
> buffers->shader_usage = shader_usage;
> + buffers->shader_usage_constbuf = shader_usage_constbuf;
> buffers->priority = priority;
> + buffers->priority_constbuf = priority_constbuf;
> buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
>
> si_init_descriptors(descs, shader_userdata_index, 4,
> num_buffers, NULL, ce_offset);
> }
>
> static void si_release_buffer_resources(struct si_buffer_resources *buffers,
> struct si_descriptors *descs)
> {
> int i;
> @@ -962,22 +966,25 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers,
> static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
> struct si_buffer_resources *buffers)
> {
> unsigned mask = buffers->enabled_mask;
>
> /* Add buffers to the CS. */
> while (mask) {
> int i = u_bit_scan(&mask);
>
> radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> - (struct r600_resource*)buffers->buffers[i],
> - buffers->shader_usage, buffers->priority);
> + r600_resource(buffers->buffers[i]),
> + i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
> + buffers->shader_usage_constbuf,
> + i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
> + buffers->priority_constbuf);
> }
> }
>
> static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
> struct si_descriptors *descs,
> unsigned idx, struct pipe_resource **buf,
> unsigned *offset, unsigned *size)
> {
> pipe_resource_reference(buf, buffers->buffers[idx]);
> if (*buf) {
> @@ -1112,30 +1119,30 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
> si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
> sctx->vertex_buffers_dirty = false;
> sctx->vertex_buffer_pointer_dirty = true;
> return true;
> }
>
>
> /* CONSTANT BUFFERS */
>
> static unsigned
> -si_const_buffer_descriptors_idx(unsigned shader)
> +si_const_and_shader_buffer_descriptors_idx(unsigned shader)
> {
> return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
> - SI_SHADER_DESCS_CONST_BUFFERS;
> + SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
> }
>
> static struct si_descriptors *
> -si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
> +si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
> {
> - return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
> + return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
> }
>
> void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
> const uint8_t *ptr, unsigned size, uint32_t *const_offset)
> {
> void *tmp;
>
> u_upload_alloc(sctx->b.b.const_uploader, 0, size,
> si_optimal_tcc_alignment(sctx, size),
> const_offset,
> @@ -1192,22 +1199,22 @@ static void si_set_constant_buffer(struct si_context *sctx,
> desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
>
> buffers->buffers[slot] = buffer;
> radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> (struct r600_resource*)buffer,
> - buffers->shader_usage,
> - buffers->priority, true);
> + buffers->shader_usage_constbuf,
> + buffers->priority_constbuf, true);
> buffers->enabled_mask |= 1u << slot;
> } else {
> /* Clear the descriptor. */
> memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
> buffers->enabled_mask &= ~(1u << slot);
> }
>
> descs->dirty_mask |= 1u << slot;
> sctx->descriptors_dirty |= 1u << descriptors_idx;
> }
> @@ -1221,77 +1228,64 @@ void si_set_rw_buffer(struct si_context *sctx,
>
> static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
> enum pipe_shader_type shader, uint slot,
> const struct pipe_constant_buffer *input)
> {
> struct si_context *sctx = (struct si_context *)ctx;
>
> if (shader >= SI_NUM_SHADERS)
> return;
>
> - si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
> - si_const_buffer_descriptors_idx(shader),
> + slot = si_get_constbuf_slot(slot);
> + si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
> + si_const_and_shader_buffer_descriptors_idx(shader),
> slot, input);
> }
>
> void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
> uint slot, struct pipe_constant_buffer *cbuf)
> {
> cbuf->user_buffer = NULL;
> si_get_buffer_from_descriptors(
> - &sctx->const_buffers[shader],
> - si_const_buffer_descriptors(sctx, shader),
> + &sctx->const_and_shader_buffers[shader],
> + si_const_and_shader_buffer_descriptors(sctx, shader),
> slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
slot needs to be remapped here as well, doesn't it?
With that fixed, the patch is:
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> }
>
> /* SHADER BUFFERS */
>
> -static unsigned
> -si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
> -{
> - return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
> - SI_SHADER_DESCS_SHADER_BUFFERS;
> -}
> -
> -static struct si_descriptors *
> -si_shader_buffer_descriptors(struct si_context *sctx,
> - enum pipe_shader_type shader)
> -{
> - return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
> -}
> -
> static void si_set_shader_buffers(struct pipe_context *ctx,
> enum pipe_shader_type shader,
> unsigned start_slot, unsigned count,
> const struct pipe_shader_buffer *sbuffers)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> - struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
> - struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
> + struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
> + struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
> unsigned i;
>
> assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
>
> for (i = 0; i < count; ++i) {
> const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
> struct r600_resource *buf;
> - unsigned slot = start_slot + i;
> + unsigned slot = si_get_shaderbuf_slot(start_slot + i);
> uint32_t *desc = descs->list + slot * 4;
> uint64_t va;
>
> if (!sbuffer || !sbuffer->buffer) {
> pipe_resource_reference(&buffers->buffers[slot], NULL);
> memset(desc, 0, sizeof(uint32_t) * 4);
> buffers->enabled_mask &= ~(1u << slot);
> descs->dirty_mask |= 1u << slot;
> sctx->descriptors_dirty |=
> - 1u << si_shader_buffer_descriptors_idx(shader);
> + 1u << si_const_and_shader_buffer_descriptors_idx(shader);
> continue;
> }
>
> buf = (struct r600_resource *)sbuffer->buffer;
> va = buf->gpu_address + sbuffer->buffer_offset;
>
> desc[0] = va;
> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
> S_008F04_STRIDE(0);
> desc[2] = sbuffer->buffer_size;
> @@ -1304,38 +1298,39 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
>
> pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
> radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
> buffers->shader_usage,
> buffers->priority, true);
> buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
>
> buffers->enabled_mask |= 1u << slot;
> descs->dirty_mask |= 1u << slot;
> sctx->descriptors_dirty |=
> - 1u << si_shader_buffer_descriptors_idx(shader);
> + 1u << si_const_and_shader_buffer_descriptors_idx(shader);
>
> util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
> sbuffer->buffer_offset + sbuffer->buffer_size);
> }
> }
>
> void si_get_shader_buffers(struct si_context *sctx,
> enum pipe_shader_type shader,
> uint start_slot, uint count,
> struct pipe_shader_buffer *sbuf)
> {
> - struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
> - struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
> + struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
> + struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
>
> for (unsigned i = 0; i < count; ++i) {
> si_get_buffer_from_descriptors(
> - buffers, descs, start_slot + i,
> + buffers, descs,
> + si_get_shaderbuf_slot(start_slot + i),
> &sbuf[i].buffer, &sbuf[i].buffer_offset,
> &sbuf[i].buffer_size);
> }
> }
>
> /* RING BUFFERS */
>
> void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
> struct pipe_resource *buffer,
> unsigned stride, unsigned num_records,
> @@ -1596,39 +1591,41 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
> si_update_compressed_tex_shader_mask(sctx, i);
> }
> }
>
> /* BUFFER DISCARD/INVALIDATION */
>
> /** Reset descriptors of buffer resources after \p buf has been invalidated. */
> static void si_reset_buffer_resources(struct si_context *sctx,
> struct si_buffer_resources *buffers,
> unsigned descriptors_idx,
> + unsigned slot_mask,
> struct pipe_resource *buf,
> - uint64_t old_va)
> + uint64_t old_va,
> + enum radeon_bo_usage usage,
> + enum radeon_bo_priority priority)
> {
> struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
> - unsigned mask = buffers->enabled_mask;
> + unsigned mask = buffers->enabled_mask & slot_mask;
>
> while (mask) {
> unsigned i = u_bit_scan(&mask);
> if (buffers->buffers[i] == buf) {
> si_desc_reset_buffer_offset(&sctx->b.b,
> descs->list + i*4,
> old_va, buf);
> descs->dirty_mask |= 1u << i;
> sctx->descriptors_dirty |= 1u << descriptors_idx;
>
> radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> (struct r600_resource *)buf,
> - buffers->shader_usage,
> - buffers->priority, true);
> + usage, priority, true);
> }
> }
> }
>
> static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
> uint64_t old_va)
> {
> struct si_context *sctx = (struct si_context*)ctx;
> struct r600_resource *rbuffer = r600_resource(buf);
> unsigned i, shader;
> @@ -1683,30 +1680,36 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
> r600_emit_streamout_end(&sctx->b);
> sctx->b.streamout.append_bitmask =
> sctx->b.streamout.enabled_mask;
> r600_streamout_buffers_dirty(&sctx->b);
> }
> }
>
> /* Constant and shader buffers. */
> if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
> for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> - si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
> - si_const_buffer_descriptors_idx(shader),
> - buf, old_va);
> + si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
> + si_const_and_shader_buffer_descriptors_idx(shader),
> + u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
> + buf, old_va,
> + sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
> + sctx->const_and_shader_buffers[shader].priority_constbuf);
> }
>
> if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
> for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> - si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
> - si_shader_buffer_descriptors_idx(shader),
> - buf, old_va);
> + si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
> + si_const_and_shader_buffer_descriptors_idx(shader),
> + u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
> + buf, old_va,
> + sctx->const_and_shader_buffers[shader].shader_usage,
> + sctx->const_and_shader_buffers[shader].priority);
> }
>
> if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
> /* Texture buffers - update bindings. */
> for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> struct si_sampler_views *views = &sctx->samplers[shader].views;
> struct si_descriptors *descs =
> si_sampler_descriptors(sctx, shader);
> unsigned mask = views->enabled_mask;
>
> @@ -1993,54 +1996,50 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
> sctx->shader_pointers_dirty &= ~compute_mask;
> }
>
> /* INIT/DEINIT/UPLOAD */
>
> void si_init_all_descriptors(struct si_context *sctx)
> {
> int i;
> unsigned ce_offset = 0;
>
> - STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
> - STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0);
> + STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
> + STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
>
> for (i = 0; i < SI_NUM_SHADERS; i++) {
> bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
> i == PIPE_SHADER_TESS_CTRL;
> bool gfx9_gs = sctx->b.chip_class == GFX9 &&
> i == PIPE_SHADER_GEOMETRY;
> /* GFX9 has only 4KB of CE, while previous chips had 32KB.
> * Rarely used descriptors don't use CE RAM.
> */
> bool big_ce = sctx->b.chip_class <= VI;
> bool images_use_ce = big_ce;
> - bool shaderbufs_use_ce = big_ce ||
> - i == PIPE_SHADER_COMPUTE;
> + bool const_and_shaderbufs_use_ce = big_ce ||
> + i == PIPE_SHADER_VERTEX ||
> + i == PIPE_SHADER_FRAGMENT;
> bool samplers_use_ce = big_ce ||
> i == PIPE_SHADER_FRAGMENT;
>
> - si_init_buffer_resources(&sctx->const_buffers[i],
> - si_const_buffer_descriptors(sctx, i),
> - SI_NUM_CONST_BUFFERS,
> - gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
> - gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS :
> - SI_SGPR_CONST_BUFFERS,
> - RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
> - &ce_offset);
> - si_init_buffer_resources(&sctx->shader_buffers[i],
> - si_shader_buffer_descriptors(sctx, i),
> - SI_NUM_SHADER_BUFFERS,
> - gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
> - gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS :
> - SI_SGPR_SHADER_BUFFERS,
> - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
> - shaderbufs_use_ce ? &ce_offset : NULL);
> + si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
> + si_const_and_shader_buffer_descriptors(sctx, i),
> + SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS,
> + gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
> + gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
> + SI_SGPR_CONST_AND_SHADER_BUFFERS,
> + RADEON_USAGE_READWRITE,
> + RADEON_USAGE_READ,
> + RADEON_PRIO_SHADER_RW_BUFFER,
> + RADEON_PRIO_CONST_BUFFER,
> + const_and_shaderbufs_use_ce ? &ce_offset : NULL);
>
> si_init_descriptors(si_sampler_descriptors(sctx, i),
> gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
> gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
> SI_SGPR_SAMPLERS,
> 16, SI_NUM_SAMPLERS,
> null_texture_descriptor,
> samplers_use_ce ? &ce_offset : NULL);
>
> si_init_descriptors(si_image_descriptors(sctx, i),
> @@ -2048,21 +2047,24 @@ void si_init_all_descriptors(struct si_context *sctx)
> gfx9_gs ? GFX9_SGPR_GS_IMAGES :
> SI_SGPR_IMAGES,
> 8, SI_NUM_IMAGES,
> null_image_descriptor,
> images_use_ce ? &ce_offset : NULL);
> }
>
> si_init_buffer_resources(&sctx->rw_buffers,
> &sctx->descriptors[SI_DESCS_RW_BUFFERS],
> SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
> - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
> + /* The second set of usage/priority is used by
> + * const buffers in RW buffer slots. */
> + RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
> + RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
> &ce_offset);
> si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
> 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
>
> sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
>
> if (sctx->b.chip_class >= GFX9)
> assert(ce_offset <= 4096);
> else
> assert(ce_offset <= 32768);
> @@ -2141,42 +2143,39 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx)
> sctx->descriptors_dirty &= ~mask;
>
> return true;
> }
>
> void si_release_all_descriptors(struct si_context *sctx)
> {
> int i;
>
> for (i = 0; i < SI_NUM_SHADERS; i++) {
> - si_release_buffer_resources(&sctx->const_buffers[i],
> - si_const_buffer_descriptors(sctx, i));
> - si_release_buffer_resources(&sctx->shader_buffers[i],
> - si_shader_buffer_descriptors(sctx, i));
> + si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
> + si_const_and_shader_buffer_descriptors(sctx, i));
> si_release_sampler_views(&sctx->samplers[i].views);
> si_release_image_views(&sctx->images[i]);
> }
> si_release_buffer_resources(&sctx->rw_buffers,
> &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
>
> for (i = 0; i < SI_NUM_DESCS; ++i)
> si_release_descriptors(&sctx->descriptors[i]);
> si_release_descriptors(&sctx->vertex_buffers);
> }
>
> void si_all_descriptors_begin_new_cs(struct si_context *sctx)
> {
> int i;
>
> for (i = 0; i < SI_NUM_SHADERS; i++) {
> - si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
> - si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
> + si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
> si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
> si_image_views_begin_new_cs(sctx, &sctx->images[i]);
> }
> si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
> si_vertex_buffers_begin_new_cs(sctx);
>
> for (i = 0; i < SI_NUM_DESCS; ++i)
> si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
>
> si_shader_userdata_begin_new_cs(sctx);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 431d8a3..449a802 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -288,22 +288,21 @@ struct si_context {
> bool flatshade;
> bool do_update_shaders;
>
> /* shader descriptors */
> struct si_descriptors vertex_buffers;
> struct si_descriptors descriptors[SI_NUM_DESCS];
> unsigned descriptors_dirty;
> unsigned shader_pointers_dirty;
> unsigned compressed_tex_shader_mask;
> struct si_buffer_resources rw_buffers;
> - struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
> - struct si_buffer_resources shader_buffers[SI_NUM_SHADERS];
> + struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS];
> struct si_textures_info samplers[SI_NUM_SHADERS];
> struct si_images_info images[SI_NUM_SHADERS];
>
> /* other shader resources */
> struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
> struct pipe_resource *esgs_ring;
> struct pipe_resource *gsvs_ring;
> struct pipe_resource *tf_ring;
> struct pipe_resource *tess_offchip_ring;
> union pipe_color_union *border_color_table; /* in CPU memory, any endian */
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index a49449b..8c5bcb9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1719,24 +1719,24 @@ static void declare_compute_memory(struct si_shader_context *ctx,
> "compute_lds",
> LOCAL_ADDR_SPACE);
> LLVMSetAlignment(var, 4);
>
> ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, "");
> }
>
> static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
> {
> LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
> - ctx->param_const_buffers);
> + ctx->param_const_and_shader_buffers);
>
> return ac_build_indexed_load_const(&ctx->ac, list_ptr,
> - LLVMConstInt(ctx->i32, i, 0));
> + LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
> }
>
> static LLVMValueRef fetch_constant(
> struct lp_build_tgsi_context *bld_base,
> const struct tgsi_full_src_register *reg,
> enum tgsi_opcode_type type,
> unsigned swizzle)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct lp_build_context *base = &bld_base->base;
> @@ -1752,25 +1752,27 @@ static LLVMValueRef fetch_constant(
> for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
> values[chan] = fetch_constant(bld_base, reg, type, chan);
>
> return lp_build_gather_values(&ctx->gallivm, values, 4);
> }
>
> buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
> idx = reg->Register.Index * 4 + swizzle;
>
> if (reg->Register.Dimension && reg->Dimension.Indirect) {
> - LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_buffers);
> + LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
> LLVMValueRef index;
> index = si_get_bounded_indirect_index(ctx, ®->DimIndirect,
> reg->Dimension.Index,
> SI_NUM_CONST_BUFFERS);
> + index = LLVMBuildAdd(ctx->gallivm.builder, index,
> + LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
> bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
> } else
> bufp = load_const_buffer_desc(ctx, buf);
>
> if (reg->Register.Indirect) {
> addr = ctx->addrs[ireg->Index][ireg->Swizzle];
> addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
> addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
> addr = lp_build_add(&bld_base->uint_bld, addr,
> LLVMConstInt(ctx->i32, idx * 4, 0));
> @@ -2789,27 +2791,25 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
> 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
> 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
> 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
> 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
>
> unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
> - 8 + GFX9_SGPR_TCS_CONST_BUFFERS);
> + 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
> 8 + GFX9_SGPR_TCS_SAMPLERS);
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
> 8 + GFX9_SGPR_TCS_IMAGES);
> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
> - 8 + GFX9_SGPR_TCS_SHADER_BUFFERS);
>
> unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
> ret = si_insert_input_ret_float(ctx, ret,
> ctx->param_tcs_patch_id, vgpr++);
> ret = si_insert_input_ret_float(ctx, ret,
> ctx->param_tcs_rel_ids, vgpr++);
> ctx->return_value = ret;
> }
>
> /* Pass GS inputs from ES to GS on GFX9. */
> @@ -2818,27 +2818,25 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
> LLVMValueRef ret = ctx->return_value;
>
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
> ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
> ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
>
> ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
>
> unsigned desc_param = ctx->param_vs_state_bits + 1;
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
> - 8 + GFX9_SGPR_GS_CONST_BUFFERS);
> + 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
> 8 + GFX9_SGPR_GS_SAMPLERS);
> ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
> 8 + GFX9_SGPR_GS_IMAGES);
> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
> - 8 + GFX9_SGPR_GS_SHADER_BUFFERS);
>
> unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
> for (unsigned i = 0; i < 5; i++) {
> unsigned param = ctx->param_gs_vtx01_offset + i;
> ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
> }
> ctx->return_value = ret;
> }
>
> static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
> @@ -4054,30 +4052,29 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
> max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
> }
> return max_work_group_size;
> }
>
> static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
> LLVMTypeRef *params,
> unsigned *num_params,
> bool assign_params)
> {
> - params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
> + params[(*num_params)++] = si_const_array(ctx->v4i32,
> + SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
> params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
> params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
> - params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
>
> if (assign_params) {
> - ctx->param_const_buffers = *num_params - 4;
> - ctx->param_samplers = *num_params - 3;
> - ctx->param_images = *num_params - 2;
> - ctx->param_shader_buffers = *num_params - 1;
> + ctx->param_const_and_shader_buffers = *num_params - 3;
> + ctx->param_samplers = *num_params - 2;
> + ctx->param_images = *num_params - 1;
> }
> }
>
> static void declare_default_desc_pointers(struct si_shader_context *ctx,
> LLVMTypeRef *params,
> unsigned *num_params)
> {
> params[ctx->param_rw_buffers = (*num_params)++] =
> si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
> declare_per_stage_desc_pointers(ctx, params, num_params, true);
> @@ -6663,36 +6660,34 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
> params[num_params++] = ctx->i32; /* wave info */
> params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> - params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
> params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
> } else {
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> params[num_params++] = ctx->i64;
> - params[num_params++] = ctx->i64;
> params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[num_params++] = ctx->i32;
> params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
> params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
> params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
> params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
> }
> last_sgpr = num_params - 1;
> @@ -7035,24 +7030,23 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
> {
> struct gallivm_state *gallivm = &ctx->gallivm;
> struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
> LLVMTypeRef params[16+8*4+3];
> LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
> int last_sgpr, num_params = 0, i;
> struct si_ps_exports exp = {};
>
> /* Declare input SGPRs. */
> params[ctx->param_rw_buffers = num_params++] = ctx->i64;
> - params[ctx->param_const_buffers = num_params++] = ctx->i64;
> + params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
> params[ctx->param_samplers = num_params++] = ctx->i64;
> params[ctx->param_images = num_params++] = ctx->i64;
> - params[ctx->param_shader_buffers = num_params++] = ctx->i64;
> assert(num_params == SI_PARAM_ALPHA_REF);
> params[SI_PARAM_ALPHA_REF] = ctx->f32;
> last_sgpr = SI_PARAM_ALPHA_REF;
>
> /* Declare input VGPRs. */
> num_params = (last_sgpr + 1) +
> util_bitcount(key->ps_epilog.colors_written) * 4 +
> key->ps_epilog.writes_z +
> key->ps_epilog.writes_stencil +
> key->ps_epilog.writes_samplemask;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 1627de3..08e809c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -150,28 +150,26 @@ struct ac_shader_binary;
> */
> #define SI_MAX_IO_GENERIC 46
>
> /* SGPR user data indices */
> enum {
> /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
> * and these two are used for other purposes.
> */
> SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
> SI_SGPR_RW_BUFFERS_HI,
> - SI_SGPR_CONST_BUFFERS,
> - SI_SGPR_CONST_BUFFERS_HI,
> + SI_SGPR_CONST_AND_SHADER_BUFFERS,
> + SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
> SI_SGPR_SAMPLERS, /* images & sampler states interleaved */
> SI_SGPR_SAMPLERS_HI,
> SI_SGPR_IMAGES,
> SI_SGPR_IMAGES_HI,
> - SI_SGPR_SHADER_BUFFERS,
> - SI_SGPR_SHADER_BUFFERS_HI,
> SI_NUM_RESOURCE_SGPRS,
>
> /* all VS variants */
> SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS,
> SI_SGPR_VERTEX_BUFFERS_HI,
> SI_SGPR_BASE_VERTEX,
> SI_SGPR_START_INSTANCE,
> SI_SGPR_DRAWID,
> SI_SGPR_VS_STATE_BITS,
> SI_VS_NUM_USER_SGPR,
> @@ -190,53 +188,49 @@ enum {
> GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K,
> GFX6_TCS_NUM_USER_SGPR,
>
> /* GFX9: Merged LS-HS (VS-TCS) only. */
> GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
> GFX9_SGPR_TCS_OUT_OFFSETS,
> GFX9_SGPR_TCS_OUT_LAYOUT,
> GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
> GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
> GFX9_SGPR_unused_to_align_the_next_pointer,
> - GFX9_SGPR_TCS_CONST_BUFFERS,
> - GFX9_SGPR_TCS_CONST_BUFFERS_HI,
> + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
> + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
> GFX9_SGPR_TCS_SAMPLERS, /* images & sampler states interleaved */
> GFX9_SGPR_TCS_SAMPLERS_HI,
> GFX9_SGPR_TCS_IMAGES,
> GFX9_SGPR_TCS_IMAGES_HI,
> - GFX9_SGPR_TCS_SHADER_BUFFERS,
> - GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
> GFX9_TCS_NUM_USER_SGPR,
>
> /* GFX9: Merged ES-GS (VS-GS or TES-GS). */
> - GFX9_SGPR_GS_CONST_BUFFERS = SI_VS_NUM_USER_SGPR,
> - GFX9_SGPR_GS_CONST_BUFFERS_HI,
> + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
> + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
> GFX9_SGPR_GS_SAMPLERS,
> GFX9_SGPR_GS_SAMPLERS_HI,
> GFX9_SGPR_GS_IMAGES,
> GFX9_SGPR_GS_IMAGES_HI,
> - GFX9_SGPR_GS_SHADER_BUFFERS,
> - GFX9_SGPR_GS_SHADER_BUFFERS_HI,
> GFX9_GS_NUM_USER_SGPR,
>
> /* GS limits */
> GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
> SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
>
> /* PS only */
> SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS,
> SI_PS_NUM_USER_SGPR,
> };
>
> /* LLVM function parameter indices */
> enum {
> - SI_NUM_RESOURCE_PARAMS = 5,
> + SI_NUM_RESOURCE_PARAMS = 4,
>
> /* PS only parameters */
> SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
> SI_PARAM_PRIM_MASK,
> SI_PARAM_PERSP_SAMPLE,
> SI_PARAM_PERSP_CENTER,
> SI_PARAM_PERSP_CENTROID,
> SI_PARAM_PERSP_PULL_MODEL,
> SI_PARAM_LINEAR_SAMPLE,
> SI_PARAM_LINEAR_CENTER,
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 69e6dfc..9fd027d 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -102,24 +102,23 @@ struct si_shader_context {
> struct tgsi_array_info *temp_arrays;
> LLVMValueRef *temp_array_allocas;
>
> LLVMValueRef undef_alloca;
>
> LLVMValueRef main_fn;
> LLVMTypeRef return_type;
>
> /* Parameter indices for LLVMGetParam. */
> int param_rw_buffers;
> - int param_const_buffers;
> + int param_const_and_shader_buffers;
> int param_samplers;
> int param_images;
> - int param_shader_buffers;
> /* Common inputs for merged shaders. */
> int param_merged_wave_info;
> int param_merged_scratch_offset;
> /* API VS */
> int param_vertex_buffers;
> int param_base_vertex;
> int param_start_instance;
> int param_draw_id;
> int param_vertex_id;
> int param_rel_auto_id;
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index 13b4694..9c44cff 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -77,28 +77,33 @@ static LLVMValueRef get_buffer_size(
>
> return size;
> }
>
> static LLVMValueRef
> shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
> const struct tgsi_full_src_register *reg)
> {
> LLVMValueRef index;
> LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
> - ctx->param_shader_buffers);
> + ctx->param_const_and_shader_buffers);
>
> - if (!reg->Register.Indirect)
> - index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
> - else
> + if (!reg->Register.Indirect) {
> + index = LLVMConstInt(ctx->i32,
> + si_get_shaderbuf_slot(reg->Register.Index), 0);
> + } else {
> index = si_get_bounded_indirect_index(ctx, ®->Indirect,
> reg->Register.Index,
> SI_NUM_SHADER_BUFFERS);
> + index = LLVMBuildSub(ctx->gallivm.builder,
> + LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
> + index, "");
> + }
>
> return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
> }
>
> static bool tgsi_is_array_sampler(unsigned target)
> {
> return target == TGSI_TEXTURE_1D_ARRAY ||
> target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
> target == TGSI_TEXTURE_2D_ARRAY ||
> target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 629d614..90d0972 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -187,25 +187,26 @@ enum {
> *
> * 0 - rw buffers
> * 1 - vertex const buffers
> * 2 - vertex shader buffers
> * ...
> * 5 - fragment const buffers
> * ...
> * 21 - compute const buffers
> * ...
> */
> -#define SI_SHADER_DESCS_CONST_BUFFERS 0
> -#define SI_SHADER_DESCS_SHADER_BUFFERS 1
> -#define SI_SHADER_DESCS_SAMPLERS 2
> -#define SI_SHADER_DESCS_IMAGES 3
> -#define SI_NUM_SHADER_DESCS 4
> +enum {
> + SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
> + SI_SHADER_DESCS_SAMPLERS,
> + SI_SHADER_DESCS_IMAGES,
> + SI_NUM_SHADER_DESCS,
> +};
>
> #define SI_DESCS_RW_BUFFERS 0
> #define SI_DESCS_FIRST_SHADER 1
> #define SI_DESCS_FIRST_COMPUTE (SI_DESCS_FIRST_SHADER + \
> PIPE_SHADER_COMPUTE * SI_NUM_SHADER_DESCS)
> #define SI_NUM_DESCS (SI_DESCS_FIRST_SHADER + \
> SI_NUM_SHADERS * SI_NUM_SHADER_DESCS)
>
> /* This represents descriptors in memory, such as buffer resources,
> * image resources, and sampler states.
> @@ -244,21 +245,23 @@ struct si_descriptors {
> struct si_sampler_views {
> struct pipe_sampler_view *views[SI_NUM_SAMPLERS];
> struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS];
>
> /* The i-th bit is set if that element is enabled (non-NULL resource). */
> unsigned enabled_mask;
> };
>
> struct si_buffer_resources {
> enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */
> + enum radeon_bo_usage shader_usage_constbuf;
> enum radeon_bo_priority priority;
> + enum radeon_bo_priority priority_constbuf;
> struct pipe_resource **buffers; /* this has num_buffers elements */
>
> /* The i-th bit is set if that element is enabled (non-NULL resource). */
> unsigned enabled_mask;
> };
>
> #define si_pm4_block_idx(member) \
> (offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
>
> #define si_pm4_state_changed(sctx, member) \
> @@ -365,11 +368,23 @@ void si_trace_emit(struct si_context *sctx);
>
> static inline unsigned
> si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
> {
> if (stencil)
> return rtex->surface.u.legacy.stencil_tiling_index[level];
> else
> return rtex->surface.u.legacy.tiling_index[level];
> }
>
> +static inline unsigned si_get_constbuf_slot(unsigned slot)
> +{
> + /* Constant buffers are in slots [16..31], ascending */
> + return SI_NUM_SHADER_BUFFERS + slot;
> +}
> +
> +static inline unsigned si_get_shaderbuf_slot(unsigned slot)
> +{
> + /* shader buffers are in slots [15..0], descending */
> + return SI_NUM_SHADER_BUFFERS - 1 - slot;
> +}
> +
> #endif
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list