[Mesa-dev] [PATCH 01/10] radeonsi: merge constant and shader buffers descriptor lists into one

Marek Olšák maraeo at gmail.com
Wed May 17 19:38:43 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

Constant buffers: slot[16], .. slot[31] (ascending)
Shader buffers: slot[15], .. slot[0] (descending)

The idea is that if we have 4 constant buffers and 2 shader buffers, we only
have to upload 6 slots. That optimization is left for a later commit.
---
 src/gallium/drivers/radeonsi/si_debug.c           |  44 ++++---
 src/gallium/drivers/radeonsi/si_descriptors.c     | 141 +++++++++++-----------
 src/gallium/drivers/radeonsi/si_pipe.h            |   3 +-
 src/gallium/drivers/radeonsi/si_shader.c          |  32 ++---
 src/gallium/drivers/radeonsi/si_shader.h          |  20 ++-
 src/gallium/drivers/radeonsi/si_shader_internal.h |   3 +-
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  13 +-
 src/gallium/drivers/radeonsi/si_state.h           |  25 +++-
 8 files changed, 150 insertions(+), 131 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index d1159ad..25c3882 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -373,37 +373,38 @@ static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
 	}
 
 	if (state->zsbuf) {
 		rtex = (struct r600_texture*)state->zsbuf->texture;
 		fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
 		r600_print_texture_info(sctx->b.screen, rtex, f);
 		fprintf(f, "\n");
 	}
 }
 
+typedef unsigned (*slot_remap_func)(unsigned);
+
 static void si_dump_descriptor_list(struct si_descriptors *desc,
 				    const char *shader_name,
 				    const char *elem_name,
 				    unsigned num_elements,
+				    slot_remap_func slot_remap,
 				    FILE *f)
 {
 	unsigned i, j;
-	uint32_t *cpu_list = desc->list;
-	uint32_t *gpu_list = desc->gpu_list;
-	const char *list_note = "GPU list";
-
-	if (!gpu_list) {
-		gpu_list = cpu_list;
-		list_note = "CPU list";
-	}
 
 	for (i = 0; i < num_elements; i++) {
+		unsigned dw_offset = slot_remap(i) * desc->element_dw_size;
+		uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list;
+		const char *list_note = desc->gpu_list ? "GPU list" : "CPU list";
+		uint32_t *cpu_list = desc->list + dw_offset;
+		uint32_t *gpu_list = gpu_ptr + dw_offset;
+
 		fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
 			shader_name, elem_name, i, list_note);
 
 		switch (desc->element_dw_size) {
 		case 4:
 			for (j = 0; j < 4; j++)
 				ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
 					    gpu_list[j], 0xffffffff);
 			break;
 		case 8:
@@ -437,63 +438,75 @@ static void si_dump_descriptor_list(struct si_descriptors *desc,
 					    gpu_list[12+j], 0xffffffff);
 			break;
 		}
 
 		if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
 			fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
 				COLOR_RESET "\n");
 		}
 
 		fprintf(f, "\n");
-		gpu_list += desc->element_dw_size;
-		cpu_list += desc->element_dw_size;
 	}
 }
 
+static unsigned si_identity(unsigned slot)
+{
+	return slot;
+}
+
 static void si_dump_descriptors(struct si_context *sctx,
 				enum pipe_shader_type processor,
 				const struct tgsi_shader_info *info, FILE *f)
 {
 	struct si_descriptors *descs =
 		&sctx->descriptors[SI_DESCS_FIRST_SHADER +
 				   processor * SI_NUM_SHADER_DESCS];
 	static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
 
 	static const char *elem_name[] = {
 		" - Constant buffer",
 		" - Shader buffer",
 		" - Sampler",
 		" - Image",
 	};
+	static const slot_remap_func remap_func[] = {
+		si_get_constbuf_slot,
+		si_get_shaderbuf_slot,
+		si_identity,
+		si_identity,
+	};
 	unsigned enabled_slots[] = {
-		sctx->const_buffers[processor].enabled_mask,
-		sctx->shader_buffers[processor].enabled_mask,
+		sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS,
+		util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask &
+				u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS)),
 		sctx->samplers[processor].views.enabled_mask,
 		sctx->images[processor].enabled_mask,
 	};
 	unsigned required_slots[] = {
 		info ? info->const_buffers_declared : 0,
 		info ? info->shader_buffers_declared : 0,
 		info ? info->samplers_declared : 0,
 		info ? info->images_declared : 0,
 	};
 
 	if (processor == PIPE_SHADER_VERTEX) {
 		assert(info); /* only CS may not have an info struct */
 
 		si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor],
-					" - Vertex buffer", info->num_inputs, f);
+					" - Vertex buffer", info->num_inputs,
+					si_identity, f);
 	}
 
 	for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
 		si_dump_descriptor_list(descs, shader_name[processor], elem_name[i],
-					util_last_bit(enabled_slots[i] | required_slots[i]), f);
+					util_last_bit(enabled_slots[i] | required_slots[i]),
+					remap_func[i], f);
 }
 
 static void si_dump_gfx_descriptors(struct si_context *sctx,
 				    const struct si_shader_ctx_state *state,
 				    FILE *f)
 {
 	if (!state->cso || !state->current)
 		return;
 
 	si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
@@ -798,21 +811,22 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
 		si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
 		si_dump_compute_shader(sctx->screen, &sctx->cs_shader_state, f);
 
 		if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
 			si_dump_annotated_shaders(sctx, f);
 			si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
 			si_dump_command("Wave information", "umr -O bits -wa", f);
 		}
 
 		si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
-					"", "RW buffers", SI_NUM_RW_BUFFERS, f);
+					"", "RW buffers", SI_NUM_RW_BUFFERS,
+					si_identity, f);
 		si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
 		si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
 		si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
 		si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
 		si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
 		si_dump_compute_descriptors(sctx, f);
 	}
 
 	if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
 		si_dump_bo_list(sctx, &sctx->last_gfx, f);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index c92a657..5dc7068 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -929,25 +929,29 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
 	}
 }
 
 /* BUFFER RESOURCES */
 
 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
 				     struct si_descriptors *descs,
 				     unsigned num_buffers,
 				     unsigned shader_userdata_index,
 				     enum radeon_bo_usage shader_usage,
+				     enum radeon_bo_usage shader_usage_constbuf,
 				     enum radeon_bo_priority priority,
+				     enum radeon_bo_priority priority_constbuf,
 				     unsigned *ce_offset)
 {
 	buffers->shader_usage = shader_usage;
+	buffers->shader_usage_constbuf = shader_usage_constbuf;
 	buffers->priority = priority;
+	buffers->priority_constbuf = priority_constbuf;
 	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
 
 	si_init_descriptors(descs, shader_userdata_index, 4,
 			    num_buffers, NULL, ce_offset);
 }
 
 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
 					struct si_descriptors *descs)
 {
 	int i;
@@ -962,22 +966,25 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers,
 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 					     struct si_buffer_resources *buffers)
 {
 	unsigned mask = buffers->enabled_mask;
 
 	/* Add buffers to the CS. */
 	while (mask) {
 		int i = u_bit_scan(&mask);
 
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-				      (struct r600_resource*)buffers->buffers[i],
-				      buffers->shader_usage, buffers->priority);
+			r600_resource(buffers->buffers[i]),
+			i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
+						    buffers->shader_usage_constbuf,
+			i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
+						    buffers->priority_constbuf);
 	}
 }
 
 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
 					   struct si_descriptors *descs,
 					   unsigned idx, struct pipe_resource **buf,
 					   unsigned *offset, unsigned *size)
 {
 	pipe_resource_reference(buf, buffers->buffers[idx]);
 	if (*buf) {
@@ -1112,30 +1119,30 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 		si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
 	sctx->vertex_buffers_dirty = false;
 	sctx->vertex_buffer_pointer_dirty = true;
 	return true;
 }
 
 
 /* CONSTANT BUFFERS */
 
 static unsigned
-si_const_buffer_descriptors_idx(unsigned shader)
+si_const_and_shader_buffer_descriptors_idx(unsigned shader)
 {
 	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
-	       SI_SHADER_DESCS_CONST_BUFFERS;
+	       SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
 }
 
 static struct si_descriptors *
-si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
+si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
 {
-	return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
+	return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
 }
 
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset)
 {
 	void *tmp;
 
 	u_upload_alloc(sctx->b.b.const_uploader, 0, size,
 		       si_optimal_tcc_alignment(sctx, size),
 		       const_offset,
@@ -1192,22 +1199,22 @@ static void si_set_constant_buffer(struct si_context *sctx,
 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
 			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 		buffers->buffers[slot] = buffer;
 		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
 						    (struct r600_resource*)buffer,
-						    buffers->shader_usage,
-						    buffers->priority, true);
+						    buffers->shader_usage_constbuf,
+						    buffers->priority_constbuf, true);
 		buffers->enabled_mask |= 1u << slot;
 	} else {
 		/* Clear the descriptor. */
 		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
 		buffers->enabled_mask &= ~(1u << slot);
 	}
 
 	descs->dirty_mask |= 1u << slot;
 	sctx->descriptors_dirty |= 1u << descriptors_idx;
 }
@@ -1221,77 +1228,64 @@ void si_set_rw_buffer(struct si_context *sctx,
 
 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
 					enum pipe_shader_type shader, uint slot,
 					const struct pipe_constant_buffer *input)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	if (shader >= SI_NUM_SHADERS)
 		return;
 
-	si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
-			       si_const_buffer_descriptors_idx(shader),
+	slot = si_get_constbuf_slot(slot);
+	si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
+			       si_const_and_shader_buffer_descriptors_idx(shader),
 			       slot, input);
 }
 
 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
 				 uint slot, struct pipe_constant_buffer *cbuf)
 {
 	cbuf->user_buffer = NULL;
 	si_get_buffer_from_descriptors(
-		&sctx->const_buffers[shader],
-		si_const_buffer_descriptors(sctx, shader),
+		&sctx->const_and_shader_buffers[shader],
+		si_const_and_shader_buffer_descriptors(sctx, shader),
 		slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
 }
 
 /* SHADER BUFFERS */
 
-static unsigned
-si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
-{
-	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
-	       SI_SHADER_DESCS_SHADER_BUFFERS;
-}
-
-static struct si_descriptors *
-si_shader_buffer_descriptors(struct si_context *sctx,
-				  enum pipe_shader_type shader)
-{
-	return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
-}
-
 static void si_set_shader_buffers(struct pipe_context *ctx,
 				  enum pipe_shader_type shader,
 				  unsigned start_slot, unsigned count,
 				  const struct pipe_shader_buffer *sbuffers)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
-	struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
+	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
+	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
 	unsigned i;
 
 	assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
 
 	for (i = 0; i < count; ++i) {
 		const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
 		struct r600_resource *buf;
-		unsigned slot = start_slot + i;
+		unsigned slot = si_get_shaderbuf_slot(start_slot + i);
 		uint32_t *desc = descs->list + slot * 4;
 		uint64_t va;
 
 		if (!sbuffer || !sbuffer->buffer) {
 			pipe_resource_reference(&buffers->buffers[slot], NULL);
 			memset(desc, 0, sizeof(uint32_t) * 4);
 			buffers->enabled_mask &= ~(1u << slot);
 			descs->dirty_mask |= 1u << slot;
 			sctx->descriptors_dirty |=
-				1u << si_shader_buffer_descriptors_idx(shader);
+				1u << si_const_and_shader_buffer_descriptors_idx(shader);
 			continue;
 		}
 
 		buf = (struct r600_resource *)sbuffer->buffer;
 		va = buf->gpu_address + sbuffer->buffer_offset;
 
 		desc[0] = va;
 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 			  S_008F04_STRIDE(0);
 		desc[2] = sbuffer->buffer_size;
@@ -1304,38 +1298,39 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
 
 		pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
 		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
 						    buffers->shader_usage,
 						    buffers->priority, true);
 		buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
 
 		buffers->enabled_mask |= 1u << slot;
 		descs->dirty_mask |= 1u << slot;
 		sctx->descriptors_dirty |=
-			1u << si_shader_buffer_descriptors_idx(shader);
+			1u << si_const_and_shader_buffer_descriptors_idx(shader);
 
 		util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
 			       sbuffer->buffer_offset + sbuffer->buffer_size);
 	}
 }
 
 void si_get_shader_buffers(struct si_context *sctx,
 			   enum pipe_shader_type shader,
 			   uint start_slot, uint count,
 			   struct pipe_shader_buffer *sbuf)
 {
-	struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
-	struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
+	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
+	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
 
 	for (unsigned i = 0; i < count; ++i) {
 		si_get_buffer_from_descriptors(
-			buffers, descs, start_slot + i,
+			buffers, descs,
+			si_get_shaderbuf_slot(start_slot + i),
 			&sbuf[i].buffer, &sbuf[i].buffer_offset,
 			&sbuf[i].buffer_size);
 	}
 }
 
 /* RING BUFFERS */
 
 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
@@ -1596,39 +1591,41 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
 		si_update_compressed_tex_shader_mask(sctx, i);
 	}
 }
 
 /* BUFFER DISCARD/INVALIDATION */
 
 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
 static void si_reset_buffer_resources(struct si_context *sctx,
 				      struct si_buffer_resources *buffers,
 				      unsigned descriptors_idx,
+				      unsigned slot_mask,
 				      struct pipe_resource *buf,
-				      uint64_t old_va)
+				      uint64_t old_va,
+				      enum radeon_bo_usage usage,
+				      enum radeon_bo_priority priority)
 {
 	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
-	unsigned mask = buffers->enabled_mask;
+	unsigned mask = buffers->enabled_mask & slot_mask;
 
 	while (mask) {
 		unsigned i = u_bit_scan(&mask);
 		if (buffers->buffers[i] == buf) {
 			si_desc_reset_buffer_offset(&sctx->b.b,
 						    descs->list + i*4,
 						    old_va, buf);
 			descs->dirty_mask |= 1u << i;
 			sctx->descriptors_dirty |= 1u << descriptors_idx;
 
 			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
 							    (struct r600_resource *)buf,
-							    buffers->shader_usage,
-							    buffers->priority, true);
+							    usage, priority, true);
 		}
 	}
 }
 
 static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
 			     uint64_t old_va)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct r600_resource *rbuffer = r600_resource(buf);
 	unsigned i, shader;
@@ -1683,30 +1680,36 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
 				r600_emit_streamout_end(&sctx->b);
 			sctx->b.streamout.append_bitmask =
 					sctx->b.streamout.enabled_mask;
 			r600_streamout_buffers_dirty(&sctx->b);
 		}
 	}
 
 	/* Constant and shader buffers. */
 	if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
-			si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
-						  si_const_buffer_descriptors_idx(shader),
-						  buf, old_va);
+			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
+						  si_const_and_shader_buffer_descriptors_idx(shader),
+						  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
+						  buf, old_va,
+						  sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
+						  sctx->const_and_shader_buffers[shader].priority_constbuf);
 	}
 
 	if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
-			si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
-						  si_shader_buffer_descriptors_idx(shader),
-						  buf, old_va);
+			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
+						  si_const_and_shader_buffer_descriptors_idx(shader),
+						  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
+						  buf, old_va,
+						  sctx->const_and_shader_buffers[shader].shader_usage,
+						  sctx->const_and_shader_buffers[shader].priority);
 	}
 
 	if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
 		/* Texture buffers - update bindings. */
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			struct si_sampler_views *views = &sctx->samplers[shader].views;
 			struct si_descriptors *descs =
 				si_sampler_descriptors(sctx, shader);
 			unsigned mask = views->enabled_mask;
 
@@ -1993,54 +1996,50 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
 	sctx->shader_pointers_dirty &= ~compute_mask;
 }
 
 /* INIT/DEINIT/UPLOAD */
 
 void si_init_all_descriptors(struct si_context *sctx)
 {
 	int i;
 	unsigned ce_offset = 0;
 
-	STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
-	STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0);
+	STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
+	STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
 				i == PIPE_SHADER_TESS_CTRL;
 		bool gfx9_gs = sctx->b.chip_class == GFX9 &&
 			       i == PIPE_SHADER_GEOMETRY;
 		/* GFX9 has only 4KB of CE, while previous chips had 32KB.
 		 * Rarely used descriptors don't use CE RAM.
 		 */
 		bool big_ce = sctx->b.chip_class <= VI;
 		bool images_use_ce = big_ce;
-		bool shaderbufs_use_ce = big_ce ||
-					 i == PIPE_SHADER_COMPUTE;
+		bool const_and_shaderbufs_use_ce = big_ce ||
+						   i == PIPE_SHADER_VERTEX ||
+						   i == PIPE_SHADER_FRAGMENT;
 		bool samplers_use_ce = big_ce ||
 				       i == PIPE_SHADER_FRAGMENT;
 
-		si_init_buffer_resources(&sctx->const_buffers[i],
-					 si_const_buffer_descriptors(sctx, i),
-					 SI_NUM_CONST_BUFFERS,
-					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
-					 gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS :
-						   SI_SGPR_CONST_BUFFERS,
-					 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
-					 &ce_offset);
-		si_init_buffer_resources(&sctx->shader_buffers[i],
-					 si_shader_buffer_descriptors(sctx, i),
-					 SI_NUM_SHADER_BUFFERS,
-					 gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
-					 gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS :
-						   SI_SGPR_SHADER_BUFFERS,
-					 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
-					 shaderbufs_use_ce ? &ce_offset : NULL);
+		si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
+					 si_const_and_shader_buffer_descriptors(sctx, i),
+					 SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS,
+					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
+					 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
+						   SI_SGPR_CONST_AND_SHADER_BUFFERS,
+					 RADEON_USAGE_READWRITE,
+					 RADEON_USAGE_READ,
+					 RADEON_PRIO_SHADER_RW_BUFFER,
+					 RADEON_PRIO_CONST_BUFFER,
+					 const_and_shaderbufs_use_ce ? &ce_offset : NULL);
 
 		si_init_descriptors(si_sampler_descriptors(sctx, i),
 				    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
 				    gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
 					      SI_SGPR_SAMPLERS,
 				    16, SI_NUM_SAMPLERS,
 				    null_texture_descriptor,
 				    samplers_use_ce ? &ce_offset : NULL);
 
 		si_init_descriptors(si_image_descriptors(sctx, i),
@@ -2048,21 +2047,24 @@ void si_init_all_descriptors(struct si_context *sctx)
 				    gfx9_gs ? GFX9_SGPR_GS_IMAGES :
 					      SI_SGPR_IMAGES,
 				    8, SI_NUM_IMAGES,
 				    null_image_descriptor,
 				    images_use_ce ? &ce_offset : NULL);
 	}
 
 	si_init_buffer_resources(&sctx->rw_buffers,
 				 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
 				 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
-				 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
+				 /* The second set of usage/priority is used by
+				  * const buffers in RW buffer slots. */
+				 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
+				 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
 				 &ce_offset);
 	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
 			    4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
 
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
 	if (sctx->b.chip_class >= GFX9)
 		assert(ce_offset <= 4096);
 	else
 		assert(ce_offset <= 32768);
@@ -2141,42 +2143,39 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx)
 	sctx->descriptors_dirty &= ~mask;
 
 	return true;
 }
 
 void si_release_all_descriptors(struct si_context *sctx)
 {
 	int i;
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
-		si_release_buffer_resources(&sctx->const_buffers[i],
-					    si_const_buffer_descriptors(sctx, i));
-		si_release_buffer_resources(&sctx->shader_buffers[i],
-					    si_shader_buffer_descriptors(sctx, i));
+		si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
+					    si_const_and_shader_buffer_descriptors(sctx, i));
 		si_release_sampler_views(&sctx->samplers[i].views);
 		si_release_image_views(&sctx->images[i]);
 	}
 	si_release_buffer_resources(&sctx->rw_buffers,
 				    &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
 
 	for (i = 0; i < SI_NUM_DESCS; ++i)
 		si_release_descriptors(&sctx->descriptors[i]);
 	si_release_descriptors(&sctx->vertex_buffers);
 }
 
 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
 {
 	int i;
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
-		si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
-		si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
+		si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
 		si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
 		si_image_views_begin_new_cs(sctx, &sctx->images[i]);
 	}
 	si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
 	si_vertex_buffers_begin_new_cs(sctx);
 
 	for (i = 0; i < SI_NUM_DESCS; ++i)
 		si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
 
 	si_shader_userdata_begin_new_cs(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 431d8a3..449a802 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -288,22 +288,21 @@ struct si_context {
 	bool				flatshade;
 	bool				do_update_shaders;
 
 	/* shader descriptors */
 	struct si_descriptors		vertex_buffers;
 	struct si_descriptors		descriptors[SI_NUM_DESCS];
 	unsigned			descriptors_dirty;
 	unsigned			shader_pointers_dirty;
 	unsigned			compressed_tex_shader_mask;
 	struct si_buffer_resources	rw_buffers;
-	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
-	struct si_buffer_resources	shader_buffers[SI_NUM_SHADERS];
+	struct si_buffer_resources	const_and_shader_buffers[SI_NUM_SHADERS];
 	struct si_textures_info		samplers[SI_NUM_SHADERS];
 	struct si_images_info		images[SI_NUM_SHADERS];
 
 	/* other shader resources */
 	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
 	struct pipe_resource		*esgs_ring;
 	struct pipe_resource		*gsvs_ring;
 	struct pipe_resource		*tf_ring;
 	struct pipe_resource		*tess_offchip_ring;
 	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a49449b..8c5bcb9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1719,24 +1719,24 @@ static void declare_compute_memory(struct si_shader_context *ctx,
 	                                  "compute_lds",
 	                                  LOCAL_ADDR_SPACE);
 	LLVMSetAlignment(var, 4);
 
 	ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, "");
 }
 
 static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
 {
 	LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
-					     ctx->param_const_buffers);
+					     ctx->param_const_and_shader_buffers);
 
 	return ac_build_indexed_load_const(&ctx->ac, list_ptr,
-					LLVMConstInt(ctx->i32, i, 0));
+			LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
 }
 
 static LLVMValueRef fetch_constant(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
 	enum tgsi_opcode_type type,
 	unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct lp_build_context *base = &bld_base->base;
@@ -1752,25 +1752,27 @@ static LLVMValueRef fetch_constant(
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
 			values[chan] = fetch_constant(bld_base, reg, type, chan);
 
 		return lp_build_gather_values(&ctx->gallivm, values, 4);
 	}
 
 	buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
 	idx = reg->Register.Index * 4 + swizzle;
 
 	if (reg->Register.Dimension && reg->Dimension.Indirect) {
-		LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_buffers);
+		LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
 		LLVMValueRef index;
 		index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
 						      reg->Dimension.Index,
 						      SI_NUM_CONST_BUFFERS);
+		index = LLVMBuildAdd(ctx->gallivm.builder, index,
+				     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
 		bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
 	} else
 		bufp = load_const_buffer_desc(ctx, buf);
 
 	if (reg->Register.Indirect) {
 		addr = ctx->addrs[ireg->Index][ireg->Swizzle];
 		addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
 		addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
 		addr = lp_build_add(&bld_base->uint_bld, addr,
 				    LLVMConstInt(ctx->i32, idx * 4, 0));
@@ -2789,27 +2791,25 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 				  8 + GFX9_SGPR_TCS_OUT_OFFSETS);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
 				  8 + GFX9_SGPR_TCS_OUT_LAYOUT);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
 				  8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
 				  8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
 
 	unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
-					   8 + GFX9_SGPR_TCS_CONST_BUFFERS);
+					   8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
 					   8 + GFX9_SGPR_TCS_SAMPLERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
 					   8 + GFX9_SGPR_TCS_IMAGES);
-	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
-					   8 + GFX9_SGPR_TCS_SHADER_BUFFERS);
 
 	unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
 	ret = si_insert_input_ret_float(ctx, ret,
 					ctx->param_tcs_patch_id, vgpr++);
 	ret = si_insert_input_ret_float(ctx, ret,
 					ctx->param_tcs_rel_ids, vgpr++);
 	ctx->return_value = ret;
 }
 
 /* Pass GS inputs from ES to GS on GFX9. */
@@ -2818,27 +2818,25 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
 	LLVMValueRef ret = ctx->return_value;
 
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
 
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
 
 	unsigned desc_param = ctx->param_vs_state_bits + 1;
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
-					   8 + GFX9_SGPR_GS_CONST_BUFFERS);
+					   8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
 					   8 + GFX9_SGPR_GS_SAMPLERS);
 	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
 					   8 + GFX9_SGPR_GS_IMAGES);
-	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
-					   8 + GFX9_SGPR_GS_SHADER_BUFFERS);
 
 	unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
 	for (unsigned i = 0; i < 5; i++) {
 		unsigned param = ctx->param_gs_vtx01_offset + i;
 		ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
 	}
 	ctx->return_value = ret;
 }
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -4054,30 +4052,29 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
 		max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
 	}
 	return max_work_group_size;
 }
 
 static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
 					    LLVMTypeRef *params,
 					    unsigned *num_params,
 					    bool assign_params)
 {
-	params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
+	params[(*num_params)++] = si_const_array(ctx->v4i32,
+						 SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
 	params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
 	params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
-	params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
 
 	if (assign_params) {
-		ctx->param_const_buffers  = *num_params - 4;
-		ctx->param_samplers	  = *num_params - 3;
-		ctx->param_images	  = *num_params - 2;
-		ctx->param_shader_buffers = *num_params - 1;
+		ctx->param_const_and_shader_buffers = *num_params - 3;
+		ctx->param_samplers	  = *num_params - 2;
+		ctx->param_images	  = *num_params - 1;
 	}
 }
 
 static void declare_default_desc_pointers(struct si_shader_context *ctx,
 					  LLVMTypeRef *params,
 				          unsigned *num_params)
 {
 	params[ctx->param_rw_buffers = (*num_params)++] =
 		si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
 	declare_per_stage_desc_pointers(ctx, params, num_params, true);
@@ -6663,36 +6660,34 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 		params[num_params++] = ctx->i32; /* wave info */
 		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
-		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
 	} else {
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
 		params[num_params++] = ctx->i64;
-		params[num_params++] = ctx->i64;
 		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
 		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
 		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
 	}
 	last_sgpr = num_params - 1;
@@ -7035,24 +7030,23 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	LLVMTypeRef params[16+8*4+3];
 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
 	int last_sgpr, num_params = 0, i;
 	struct si_ps_exports exp = {};
 
 	/* Declare input SGPRs. */
 	params[ctx->param_rw_buffers = num_params++] = ctx->i64;
-	params[ctx->param_const_buffers = num_params++] = ctx->i64;
+	params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
 	params[ctx->param_samplers = num_params++] = ctx->i64;
 	params[ctx->param_images = num_params++] = ctx->i64;
-	params[ctx->param_shader_buffers = num_params++] = ctx->i64;
 	assert(num_params == SI_PARAM_ALPHA_REF);
 	params[SI_PARAM_ALPHA_REF] = ctx->f32;
 	last_sgpr = SI_PARAM_ALPHA_REF;
 
 	/* Declare input VGPRs. */
 	num_params = (last_sgpr + 1) +
 		     util_bitcount(key->ps_epilog.colors_written) * 4 +
 		     key->ps_epilog.writes_z +
 		     key->ps_epilog.writes_stencil +
 		     key->ps_epilog.writes_samplemask;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1627de3..08e809c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -150,28 +150,26 @@ struct ac_shader_binary;
  */
 #define SI_MAX_IO_GENERIC       46
 
 /* SGPR user data indices */
 enum {
 	/* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
 	 * and these two are used for other purposes.
 	 */
 	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
 	SI_SGPR_RW_BUFFERS_HI,
-	SI_SGPR_CONST_BUFFERS,
-	SI_SGPR_CONST_BUFFERS_HI,
+	SI_SGPR_CONST_AND_SHADER_BUFFERS,
+	SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
 	SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
 	SI_SGPR_SAMPLERS_HI,
 	SI_SGPR_IMAGES,
 	SI_SGPR_IMAGES_HI,
-	SI_SGPR_SHADER_BUFFERS,
-	SI_SGPR_SHADER_BUFFERS_HI,
 	SI_NUM_RESOURCE_SGPRS,
 
 	/* all VS variants */
 	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
 	SI_SGPR_VERTEX_BUFFERS_HI,
 	SI_SGPR_BASE_VERTEX,
 	SI_SGPR_START_INSTANCE,
 	SI_SGPR_DRAWID,
 	SI_SGPR_VS_STATE_BITS,
 	SI_VS_NUM_USER_SGPR,
@@ -190,53 +188,49 @@ enum {
 	GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K,
 	GFX6_TCS_NUM_USER_SGPR,
 
 	/* GFX9: Merged LS-HS (VS-TCS) only. */
 	GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
 	GFX9_SGPR_TCS_OUT_OFFSETS,
 	GFX9_SGPR_TCS_OUT_LAYOUT,
 	GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
 	GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
 	GFX9_SGPR_unused_to_align_the_next_pointer,
-	GFX9_SGPR_TCS_CONST_BUFFERS,
-	GFX9_SGPR_TCS_CONST_BUFFERS_HI,
+	GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
+	GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
 	GFX9_SGPR_TCS_SAMPLERS,  /* images & sampler states interleaved */
 	GFX9_SGPR_TCS_SAMPLERS_HI,
 	GFX9_SGPR_TCS_IMAGES,
 	GFX9_SGPR_TCS_IMAGES_HI,
-	GFX9_SGPR_TCS_SHADER_BUFFERS,
-	GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
 	GFX9_TCS_NUM_USER_SGPR,
 
 	/* GFX9: Merged ES-GS (VS-GS or TES-GS). */
-	GFX9_SGPR_GS_CONST_BUFFERS = SI_VS_NUM_USER_SGPR,
-	GFX9_SGPR_GS_CONST_BUFFERS_HI,
+	GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
+	GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
 	GFX9_SGPR_GS_SAMPLERS,
 	GFX9_SGPR_GS_SAMPLERS_HI,
 	GFX9_SGPR_GS_IMAGES,
 	GFX9_SGPR_GS_IMAGES_HI,
-	GFX9_SGPR_GS_SHADER_BUFFERS,
-	GFX9_SGPR_GS_SHADER_BUFFERS_HI,
 	GFX9_GS_NUM_USER_SGPR,
 
 	/* GS limits */
 	GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
 	SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
 
 	/* PS only */
 	SI_SGPR_ALPHA_REF	= SI_NUM_RESOURCE_SGPRS,
 	SI_PS_NUM_USER_SGPR,
 };
 
 /* LLVM function parameter indices */
 enum {
-	SI_NUM_RESOURCE_PARAMS = 5,
+	SI_NUM_RESOURCE_PARAMS = 4,
 
 	/* PS only parameters */
 	SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
 	SI_PARAM_PRIM_MASK,
 	SI_PARAM_PERSP_SAMPLE,
 	SI_PARAM_PERSP_CENTER,
 	SI_PARAM_PERSP_CENTROID,
 	SI_PARAM_PERSP_PULL_MODEL,
 	SI_PARAM_LINEAR_SAMPLE,
 	SI_PARAM_LINEAR_CENTER,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 69e6dfc..9fd027d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -102,24 +102,23 @@ struct si_shader_context {
 	struct tgsi_array_info *temp_arrays;
 	LLVMValueRef *temp_array_allocas;
 
 	LLVMValueRef undef_alloca;
 
 	LLVMValueRef main_fn;
 	LLVMTypeRef return_type;
 
 	/* Parameter indices for LLVMGetParam. */
 	int param_rw_buffers;
-	int param_const_buffers;
+	int param_const_and_shader_buffers;
 	int param_samplers;
 	int param_images;
-	int param_shader_buffers;
 	/* Common inputs for merged shaders. */
 	int param_merged_wave_info;
 	int param_merged_scratch_offset;
 	/* API VS */
 	int param_vertex_buffers;
 	int param_base_vertex;
 	int param_start_instance;
 	int param_draw_id;
 	int param_vertex_id;
 	int param_rel_auto_id;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 13b4694..9c44cff 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -77,28 +77,33 @@ static LLVMValueRef get_buffer_size(
 
 	return size;
 }
 
 static LLVMValueRef
 shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
 			 const struct tgsi_full_src_register *reg)
 {
 	LLVMValueRef index;
 	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
-					     ctx->param_shader_buffers);
+					     ctx->param_const_and_shader_buffers);
 
-	if (!reg->Register.Indirect)
-		index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
-	else
+	if (!reg->Register.Indirect) {
+		index = LLVMConstInt(ctx->i32,
+				     si_get_shaderbuf_slot(reg->Register.Index), 0);
+	} else {
 		index = si_get_bounded_indirect_index(ctx, &reg->Indirect,
 						      reg->Register.Index,
 						      SI_NUM_SHADER_BUFFERS);
+		index = LLVMBuildSub(ctx->gallivm.builder,
+				     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
+				     index, "");
+	}
 
 	return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
 }
 
 static bool tgsi_is_array_sampler(unsigned target)
 {
 	return target == TGSI_TEXTURE_1D_ARRAY ||
 	       target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
 	       target == TGSI_TEXTURE_2D_ARRAY ||
 	       target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 629d614..90d0972 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -187,25 +187,26 @@ enum {
  *
  *  0 - rw buffers
  *  1 - vertex const buffers
  *  2 - vertex shader buffers
  *   ...
  *  5 - fragment const buffers
  *   ...
  *  21 - compute const buffers
  *   ...
  */
-#define SI_SHADER_DESCS_CONST_BUFFERS  0
-#define SI_SHADER_DESCS_SHADER_BUFFERS 1
-#define SI_SHADER_DESCS_SAMPLERS       2
-#define SI_SHADER_DESCS_IMAGES         3
-#define SI_NUM_SHADER_DESCS            4
+enum {
+	SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
+	SI_SHADER_DESCS_SAMPLERS,
+	SI_SHADER_DESCS_IMAGES,
+	SI_NUM_SHADER_DESCS,
+};
 
 #define SI_DESCS_RW_BUFFERS            0
 #define SI_DESCS_FIRST_SHADER          1
 #define SI_DESCS_FIRST_COMPUTE         (SI_DESCS_FIRST_SHADER + \
                                         PIPE_SHADER_COMPUTE * SI_NUM_SHADER_DESCS)
 #define SI_NUM_DESCS                   (SI_DESCS_FIRST_SHADER + \
                                         SI_NUM_SHADERS * SI_NUM_SHADER_DESCS)
 
 /* This represents descriptors in memory, such as buffer resources,
  * image resources, and sampler states.
@@ -244,21 +245,23 @@ struct si_descriptors {
 struct si_sampler_views {
 	struct pipe_sampler_view	*views[SI_NUM_SAMPLERS];
 	struct si_sampler_state		*sampler_states[SI_NUM_SAMPLERS];
 
 	/* The i-th bit is set if that element is enabled (non-NULL resource). */
 	unsigned			enabled_mask;
 };
 
 struct si_buffer_resources {
 	enum radeon_bo_usage		shader_usage; /* READ, WRITE, or READWRITE */
+	enum radeon_bo_usage		shader_usage_constbuf;
 	enum radeon_bo_priority		priority;
+	enum radeon_bo_priority		priority_constbuf;
 	struct pipe_resource		**buffers; /* this has num_buffers elements */
 
 	/* The i-th bit is set if that element is enabled (non-NULL resource). */
 	unsigned			enabled_mask;
 };
 
 #define si_pm4_block_idx(member) \
 	(offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
 
 #define si_pm4_state_changed(sctx, member) \
@@ -365,11 +368,23 @@ void si_trace_emit(struct si_context *sctx);
 
 static inline unsigned
 si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
 {
 	if (stencil)
 		return rtex->surface.u.legacy.stencil_tiling_index[level];
 	else
 		return rtex->surface.u.legacy.tiling_index[level];
 }
 
+static inline unsigned si_get_constbuf_slot(unsigned slot)
+{
+	/* Constant buffers are in slots [16..31], ascending */
+	return SI_NUM_SHADER_BUFFERS + slot;
+}
+
+static inline unsigned si_get_shaderbuf_slot(unsigned slot)
+{
+	/* shader buffers are in slots [15..0], descending */
+	return SI_NUM_SHADER_BUFFERS - 1 - slot;
+}
+
 #endif
-- 
2.7.4



More information about the mesa-dev mailing list