[Mesa-dev] [PATCH 01/10] radeonsi: merge constant and shader buffers descriptor lists into one

Nicolai Hähnle nhaehnle at gmail.com
Thu May 18 09:28:35 UTC 2017


On 17.05.2017 21:38, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Constant buffers: slot[16], .. slot[31] (ascending)
> Shader buffers: slot[15], .. slot[0] (descending)
>
> The idea is that if we have 4 constant buffers and 2 shader buffers, we only
> have to upload 6 slots. That optimization is left for a later commit.
> ---
>  src/gallium/drivers/radeonsi/si_debug.c           |  44 ++++---
>  src/gallium/drivers/radeonsi/si_descriptors.c     | 141 +++++++++++-----------
>  src/gallium/drivers/radeonsi/si_pipe.h            |   3 +-
>  src/gallium/drivers/radeonsi/si_shader.c          |  32 ++---
>  src/gallium/drivers/radeonsi/si_shader.h          |  20 ++-
>  src/gallium/drivers/radeonsi/si_shader_internal.h |   3 +-
>  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  13 +-
>  src/gallium/drivers/radeonsi/si_state.h           |  25 +++-
>  8 files changed, 150 insertions(+), 131 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
> index d1159ad..25c3882 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -373,37 +373,38 @@ static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
>  	}
>
>  	if (state->zsbuf) {
>  		rtex = (struct r600_texture*)state->zsbuf->texture;
>  		fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
>  		r600_print_texture_info(sctx->b.screen, rtex, f);
>  		fprintf(f, "\n");
>  	}
>  }
>
> +typedef unsigned (*slot_remap_func)(unsigned);
> +
>  static void si_dump_descriptor_list(struct si_descriptors *desc,
>  				    const char *shader_name,
>  				    const char *elem_name,
>  				    unsigned num_elements,
> +				    slot_remap_func slot_remap,
>  				    FILE *f)
>  {
>  	unsigned i, j;
> -	uint32_t *cpu_list = desc->list;
> -	uint32_t *gpu_list = desc->gpu_list;
> -	const char *list_note = "GPU list";
> -
> -	if (!gpu_list) {
> -		gpu_list = cpu_list;
> -		list_note = "CPU list";
> -	}
>
>  	for (i = 0; i < num_elements; i++) {
> +		unsigned dw_offset = slot_remap(i) * desc->element_dw_size;
> +		uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list;
> +		const char *list_note = desc->gpu_list ? "GPU list" : "CPU list";
> +		uint32_t *cpu_list = desc->list + dw_offset;
> +		uint32_t *gpu_list = gpu_ptr + dw_offset;
> +
>  		fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
>  			shader_name, elem_name, i, list_note);
>
>  		switch (desc->element_dw_size) {
>  		case 4:
>  			for (j = 0; j < 4; j++)
>  				ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
>  					    gpu_list[j], 0xffffffff);
>  			break;
>  		case 8:
> @@ -437,63 +438,75 @@ static void si_dump_descriptor_list(struct si_descriptors *desc,
>  					    gpu_list[12+j], 0xffffffff);
>  			break;
>  		}
>
>  		if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
>  			fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
>  				COLOR_RESET "\n");
>  		}
>
>  		fprintf(f, "\n");
> -		gpu_list += desc->element_dw_size;
> -		cpu_list += desc->element_dw_size;
>  	}
>  }
>
> +static unsigned si_identity(unsigned slot)
> +{
> +	return slot;
> +}
> +
>  static void si_dump_descriptors(struct si_context *sctx,
>  				enum pipe_shader_type processor,
>  				const struct tgsi_shader_info *info, FILE *f)
>  {
>  	struct si_descriptors *descs =
>  		&sctx->descriptors[SI_DESCS_FIRST_SHADER +
>  				   processor * SI_NUM_SHADER_DESCS];
>  	static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
>
>  	static const char *elem_name[] = {
>  		" - Constant buffer",
>  		" - Shader buffer",
>  		" - Sampler",
>  		" - Image",
>  	};
> +	static const slot_remap_func remap_func[] = {
> +		si_get_constbuf_slot,
> +		si_get_shaderbuf_slot,
> +		si_identity,
> +		si_identity,
> +	};
>  	unsigned enabled_slots[] = {
> -		sctx->const_buffers[processor].enabled_mask,
> -		sctx->shader_buffers[processor].enabled_mask,
> +		sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS,
> +		util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask &
> +				u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS)),
>  		sctx->samplers[processor].views.enabled_mask,
>  		sctx->images[processor].enabled_mask,
>  	};
>  	unsigned required_slots[] = {
>  		info ? info->const_buffers_declared : 0,
>  		info ? info->shader_buffers_declared : 0,
>  		info ? info->samplers_declared : 0,
>  		info ? info->images_declared : 0,
>  	};
>
>  	if (processor == PIPE_SHADER_VERTEX) {
>  		assert(info); /* only CS may not have an info struct */
>
>  		si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor],
> -					" - Vertex buffer", info->num_inputs, f);
> +					" - Vertex buffer", info->num_inputs,
> +					si_identity, f);
>  	}
>
>  	for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
>  		si_dump_descriptor_list(descs, shader_name[processor], elem_name[i],
> -					util_last_bit(enabled_slots[i] | required_slots[i]), f);
> +					util_last_bit(enabled_slots[i] | required_slots[i]),
> +					remap_func[i], f);
>  }
>
>  static void si_dump_gfx_descriptors(struct si_context *sctx,
>  				    const struct si_shader_ctx_state *state,
>  				    FILE *f)
>  {
>  	if (!state->cso || !state->current)
>  		return;
>
>  	si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
> @@ -798,21 +811,22 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
>  		si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
>  		si_dump_compute_shader(sctx->screen, &sctx->cs_shader_state, f);
>
>  		if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
>  			si_dump_annotated_shaders(sctx, f);
>  			si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
>  			si_dump_command("Wave information", "umr -O bits -wa", f);
>  		}
>
>  		si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
> -					"", "RW buffers", SI_NUM_RW_BUFFERS, f);
> +					"", "RW buffers", SI_NUM_RW_BUFFERS,
> +					si_identity, f);
>  		si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
>  		si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
>  		si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
>  		si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
>  		si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
>  		si_dump_compute_descriptors(sctx, f);
>  	}
>
>  	if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
>  		si_dump_bo_list(sctx, &sctx->last_gfx, f);
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index c92a657..5dc7068 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -929,25 +929,29 @@ static void si_bind_sampler_states(struct pipe_context *ctx,
>  	}
>  }
>
>  /* BUFFER RESOURCES */
>
>  static void si_init_buffer_resources(struct si_buffer_resources *buffers,
>  				     struct si_descriptors *descs,
>  				     unsigned num_buffers,
>  				     unsigned shader_userdata_index,
>  				     enum radeon_bo_usage shader_usage,
> +				     enum radeon_bo_usage shader_usage_constbuf,
>  				     enum radeon_bo_priority priority,
> +				     enum radeon_bo_priority priority_constbuf,
>  				     unsigned *ce_offset)
>  {
>  	buffers->shader_usage = shader_usage;
> +	buffers->shader_usage_constbuf = shader_usage_constbuf;
>  	buffers->priority = priority;
> +	buffers->priority_constbuf = priority_constbuf;
>  	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
>
>  	si_init_descriptors(descs, shader_userdata_index, 4,
>  			    num_buffers, NULL, ce_offset);
>  }
>
>  static void si_release_buffer_resources(struct si_buffer_resources *buffers,
>  					struct si_descriptors *descs)
>  {
>  	int i;
> @@ -962,22 +966,25 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers,
>  static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
>  					     struct si_buffer_resources *buffers)
>  {
>  	unsigned mask = buffers->enabled_mask;
>
>  	/* Add buffers to the CS. */
>  	while (mask) {
>  		int i = u_bit_scan(&mask);
>
>  		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> -				      (struct r600_resource*)buffers->buffers[i],
> -				      buffers->shader_usage, buffers->priority);
> +			r600_resource(buffers->buffers[i]),
> +			i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
> +						    buffers->shader_usage_constbuf,
> +			i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
> +						    buffers->priority_constbuf);
>  	}
>  }
>
>  static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
>  					   struct si_descriptors *descs,
>  					   unsigned idx, struct pipe_resource **buf,
>  					   unsigned *offset, unsigned *size)
>  {
>  	pipe_resource_reference(buf, buffers->buffers[idx]);
>  	if (*buf) {
> @@ -1112,30 +1119,30 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
>  		si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
>  	sctx->vertex_buffers_dirty = false;
>  	sctx->vertex_buffer_pointer_dirty = true;
>  	return true;
>  }
>
>
>  /* CONSTANT BUFFERS */
>
>  static unsigned
> -si_const_buffer_descriptors_idx(unsigned shader)
> +si_const_and_shader_buffer_descriptors_idx(unsigned shader)
>  {
>  	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
> -	       SI_SHADER_DESCS_CONST_BUFFERS;
> +	       SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
>  }
>
>  static struct si_descriptors *
> -si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
> +si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
>  {
> -	return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
> +	return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
>  }
>
>  void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
>  			    const uint8_t *ptr, unsigned size, uint32_t *const_offset)
>  {
>  	void *tmp;
>
>  	u_upload_alloc(sctx->b.b.const_uploader, 0, size,
>  		       si_optimal_tcc_alignment(sctx, size),
>  		       const_offset,
> @@ -1192,22 +1199,22 @@ static void si_set_constant_buffer(struct si_context *sctx,
>  		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>  			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>  			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>  			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>  			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>  			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
>
>  		buffers->buffers[slot] = buffer;
>  		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
>  						    (struct r600_resource*)buffer,
> -						    buffers->shader_usage,
> -						    buffers->priority, true);
> +						    buffers->shader_usage_constbuf,
> +						    buffers->priority_constbuf, true);
>  		buffers->enabled_mask |= 1u << slot;
>  	} else {
>  		/* Clear the descriptor. */
>  		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
>  		buffers->enabled_mask &= ~(1u << slot);
>  	}
>
>  	descs->dirty_mask |= 1u << slot;
>  	sctx->descriptors_dirty |= 1u << descriptors_idx;
>  }
> @@ -1221,77 +1228,64 @@ void si_set_rw_buffer(struct si_context *sctx,
>
>  static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
>  					enum pipe_shader_type shader, uint slot,
>  					const struct pipe_constant_buffer *input)
>  {
>  	struct si_context *sctx = (struct si_context *)ctx;
>
>  	if (shader >= SI_NUM_SHADERS)
>  		return;
>
> -	si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
> -			       si_const_buffer_descriptors_idx(shader),
> +	slot = si_get_constbuf_slot(slot);
> +	si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
> +			       si_const_and_shader_buffer_descriptors_idx(shader),
>  			       slot, input);
>  }
>
>  void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
>  				 uint slot, struct pipe_constant_buffer *cbuf)
>  {
>  	cbuf->user_buffer = NULL;
>  	si_get_buffer_from_descriptors(
> -		&sctx->const_buffers[shader],
> -		si_const_buffer_descriptors(sctx, shader),
> +		&sctx->const_and_shader_buffers[shader],
> +		si_const_and_shader_buffer_descriptors(sctx, shader),
>  		slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);

slot needs to be remapped here as well, doesn't it?

With that fixed, the patch is:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


>  }
>
>  /* SHADER BUFFERS */
>
> -static unsigned
> -si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
> -{
> -	return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
> -	       SI_SHADER_DESCS_SHADER_BUFFERS;
> -}
> -
> -static struct si_descriptors *
> -si_shader_buffer_descriptors(struct si_context *sctx,
> -				  enum pipe_shader_type shader)
> -{
> -	return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
> -}
> -
>  static void si_set_shader_buffers(struct pipe_context *ctx,
>  				  enum pipe_shader_type shader,
>  				  unsigned start_slot, unsigned count,
>  				  const struct pipe_shader_buffer *sbuffers)
>  {
>  	struct si_context *sctx = (struct si_context *)ctx;
> -	struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
> -	struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
> +	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
> +	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
>  	unsigned i;
>
>  	assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
>
>  	for (i = 0; i < count; ++i) {
>  		const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
>  		struct r600_resource *buf;
> -		unsigned slot = start_slot + i;
> +		unsigned slot = si_get_shaderbuf_slot(start_slot + i);
>  		uint32_t *desc = descs->list + slot * 4;
>  		uint64_t va;
>
>  		if (!sbuffer || !sbuffer->buffer) {
>  			pipe_resource_reference(&buffers->buffers[slot], NULL);
>  			memset(desc, 0, sizeof(uint32_t) * 4);
>  			buffers->enabled_mask &= ~(1u << slot);
>  			descs->dirty_mask |= 1u << slot;
>  			sctx->descriptors_dirty |=
> -				1u << si_shader_buffer_descriptors_idx(shader);
> +				1u << si_const_and_shader_buffer_descriptors_idx(shader);
>  			continue;
>  		}
>
>  		buf = (struct r600_resource *)sbuffer->buffer;
>  		va = buf->gpu_address + sbuffer->buffer_offset;
>
>  		desc[0] = va;
>  		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
>  			  S_008F04_STRIDE(0);
>  		desc[2] = sbuffer->buffer_size;
> @@ -1304,38 +1298,39 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
>
>  		pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
>  		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
>  						    buffers->shader_usage,
>  						    buffers->priority, true);
>  		buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
>
>  		buffers->enabled_mask |= 1u << slot;
>  		descs->dirty_mask |= 1u << slot;
>  		sctx->descriptors_dirty |=
> -			1u << si_shader_buffer_descriptors_idx(shader);
> +			1u << si_const_and_shader_buffer_descriptors_idx(shader);
>
>  		util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
>  			       sbuffer->buffer_offset + sbuffer->buffer_size);
>  	}
>  }
>
>  void si_get_shader_buffers(struct si_context *sctx,
>  			   enum pipe_shader_type shader,
>  			   uint start_slot, uint count,
>  			   struct pipe_shader_buffer *sbuf)
>  {
> -	struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
> -	struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
> +	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
> +	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
>
>  	for (unsigned i = 0; i < count; ++i) {
>  		si_get_buffer_from_descriptors(
> -			buffers, descs, start_slot + i,
> +			buffers, descs,
> +			si_get_shaderbuf_slot(start_slot + i),
>  			&sbuf[i].buffer, &sbuf[i].buffer_offset,
>  			&sbuf[i].buffer_size);
>  	}
>  }
>
>  /* RING BUFFERS */
>
>  void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
>  			struct pipe_resource *buffer,
>  			unsigned stride, unsigned num_records,
> @@ -1596,39 +1591,41 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
>  		si_update_compressed_tex_shader_mask(sctx, i);
>  	}
>  }
>
>  /* BUFFER DISCARD/INVALIDATION */
>
>  /** Reset descriptors of buffer resources after \p buf has been invalidated. */
>  static void si_reset_buffer_resources(struct si_context *sctx,
>  				      struct si_buffer_resources *buffers,
>  				      unsigned descriptors_idx,
> +				      unsigned slot_mask,
>  				      struct pipe_resource *buf,
> -				      uint64_t old_va)
> +				      uint64_t old_va,
> +				      enum radeon_bo_usage usage,
> +				      enum radeon_bo_priority priority)
>  {
>  	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
> -	unsigned mask = buffers->enabled_mask;
> +	unsigned mask = buffers->enabled_mask & slot_mask;
>
>  	while (mask) {
>  		unsigned i = u_bit_scan(&mask);
>  		if (buffers->buffers[i] == buf) {
>  			si_desc_reset_buffer_offset(&sctx->b.b,
>  						    descs->list + i*4,
>  						    old_va, buf);
>  			descs->dirty_mask |= 1u << i;
>  			sctx->descriptors_dirty |= 1u << descriptors_idx;
>
>  			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
>  							    (struct r600_resource *)buf,
> -							    buffers->shader_usage,
> -							    buffers->priority, true);
> +							    usage, priority, true);
>  		}
>  	}
>  }
>
>  static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
>  			     uint64_t old_va)
>  {
>  	struct si_context *sctx = (struct si_context*)ctx;
>  	struct r600_resource *rbuffer = r600_resource(buf);
>  	unsigned i, shader;
> @@ -1683,30 +1680,36 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
>  				r600_emit_streamout_end(&sctx->b);
>  			sctx->b.streamout.append_bitmask =
>  					sctx->b.streamout.enabled_mask;
>  			r600_streamout_buffers_dirty(&sctx->b);
>  		}
>  	}
>
>  	/* Constant and shader buffers. */
>  	if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> -			si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
> -						  si_const_buffer_descriptors_idx(shader),
> -						  buf, old_va);
> +			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
> +						  si_const_and_shader_buffer_descriptors_idx(shader),
> +						  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
> +						  buf, old_va,
> +						  sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
> +						  sctx->const_and_shader_buffers[shader].priority_constbuf);
>  	}
>
>  	if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
> -			si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
> -						  si_shader_buffer_descriptors_idx(shader),
> -						  buf, old_va);
> +			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
> +						  si_const_and_shader_buffer_descriptors_idx(shader),
> +						  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
> +						  buf, old_va,
> +						  sctx->const_and_shader_buffers[shader].shader_usage,
> +						  sctx->const_and_shader_buffers[shader].priority);
>  	}
>
>  	if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
>  		/* Texture buffers - update bindings. */
>  		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>  			struct si_sampler_views *views = &sctx->samplers[shader].views;
>  			struct si_descriptors *descs =
>  				si_sampler_descriptors(sctx, shader);
>  			unsigned mask = views->enabled_mask;
>
> @@ -1993,54 +1996,50 @@ void si_emit_compute_shader_userdata(struct si_context *sctx)
>  	sctx->shader_pointers_dirty &= ~compute_mask;
>  }
>
>  /* INIT/DEINIT/UPLOAD */
>
>  void si_init_all_descriptors(struct si_context *sctx)
>  {
>  	int i;
>  	unsigned ce_offset = 0;
>
> -	STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
> -	STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0);
> +	STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
> +	STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
>
>  	for (i = 0; i < SI_NUM_SHADERS; i++) {
>  		bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
>  				i == PIPE_SHADER_TESS_CTRL;
>  		bool gfx9_gs = sctx->b.chip_class == GFX9 &&
>  			       i == PIPE_SHADER_GEOMETRY;
>  		/* GFX9 has only 4KB of CE, while previous chips had 32KB.
>  		 * Rarely used descriptors don't use CE RAM.
>  		 */
>  		bool big_ce = sctx->b.chip_class <= VI;
>  		bool images_use_ce = big_ce;
> -		bool shaderbufs_use_ce = big_ce ||
> -					 i == PIPE_SHADER_COMPUTE;
> +		bool const_and_shaderbufs_use_ce = big_ce ||
> +						   i == PIPE_SHADER_VERTEX ||
> +						   i == PIPE_SHADER_FRAGMENT;
>  		bool samplers_use_ce = big_ce ||
>  				       i == PIPE_SHADER_FRAGMENT;
>
> -		si_init_buffer_resources(&sctx->const_buffers[i],
> -					 si_const_buffer_descriptors(sctx, i),
> -					 SI_NUM_CONST_BUFFERS,
> -					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
> -					 gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS :
> -						   SI_SGPR_CONST_BUFFERS,
> -					 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
> -					 &ce_offset);
> -		si_init_buffer_resources(&sctx->shader_buffers[i],
> -					 si_shader_buffer_descriptors(sctx, i),
> -					 SI_NUM_SHADER_BUFFERS,
> -					 gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
> -					 gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS :
> -						   SI_SGPR_SHADER_BUFFERS,
> -					 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
> -					 shaderbufs_use_ce ? &ce_offset : NULL);
> +		si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
> +					 si_const_and_shader_buffer_descriptors(sctx, i),
> +					 SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS,
> +					 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
> +					 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
> +						   SI_SGPR_CONST_AND_SHADER_BUFFERS,
> +					 RADEON_USAGE_READWRITE,
> +					 RADEON_USAGE_READ,
> +					 RADEON_PRIO_SHADER_RW_BUFFER,
> +					 RADEON_PRIO_CONST_BUFFER,
> +					 const_and_shaderbufs_use_ce ? &ce_offset : NULL);
>
>  		si_init_descriptors(si_sampler_descriptors(sctx, i),
>  				    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
>  				    gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
>  					      SI_SGPR_SAMPLERS,
>  				    16, SI_NUM_SAMPLERS,
>  				    null_texture_descriptor,
>  				    samplers_use_ce ? &ce_offset : NULL);
>
>  		si_init_descriptors(si_image_descriptors(sctx, i),
> @@ -2048,21 +2047,24 @@ void si_init_all_descriptors(struct si_context *sctx)
>  				    gfx9_gs ? GFX9_SGPR_GS_IMAGES :
>  					      SI_SGPR_IMAGES,
>  				    8, SI_NUM_IMAGES,
>  				    null_image_descriptor,
>  				    images_use_ce ? &ce_offset : NULL);
>  	}
>
>  	si_init_buffer_resources(&sctx->rw_buffers,
>  				 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
>  				 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
> -				 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
> +				 /* The second set of usage/priority is used by
> +				  * const buffers in RW buffer slots. */
> +				 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
> +				 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
>  				 &ce_offset);
>  	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
>  			    4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
>
>  	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
>
>  	if (sctx->b.chip_class >= GFX9)
>  		assert(ce_offset <= 4096);
>  	else
>  		assert(ce_offset <= 32768);
> @@ -2141,42 +2143,39 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx)
>  	sctx->descriptors_dirty &= ~mask;
>
>  	return true;
>  }
>
>  void si_release_all_descriptors(struct si_context *sctx)
>  {
>  	int i;
>
>  	for (i = 0; i < SI_NUM_SHADERS; i++) {
> -		si_release_buffer_resources(&sctx->const_buffers[i],
> -					    si_const_buffer_descriptors(sctx, i));
> -		si_release_buffer_resources(&sctx->shader_buffers[i],
> -					    si_shader_buffer_descriptors(sctx, i));
> +		si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
> +					    si_const_and_shader_buffer_descriptors(sctx, i));
>  		si_release_sampler_views(&sctx->samplers[i].views);
>  		si_release_image_views(&sctx->images[i]);
>  	}
>  	si_release_buffer_resources(&sctx->rw_buffers,
>  				    &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
>
>  	for (i = 0; i < SI_NUM_DESCS; ++i)
>  		si_release_descriptors(&sctx->descriptors[i]);
>  	si_release_descriptors(&sctx->vertex_buffers);
>  }
>
>  void si_all_descriptors_begin_new_cs(struct si_context *sctx)
>  {
>  	int i;
>
>  	for (i = 0; i < SI_NUM_SHADERS; i++) {
> -		si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
> -		si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
> +		si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
>  		si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
>  		si_image_views_begin_new_cs(sctx, &sctx->images[i]);
>  	}
>  	si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
>  	si_vertex_buffers_begin_new_cs(sctx);
>
>  	for (i = 0; i < SI_NUM_DESCS; ++i)
>  		si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
>
>  	si_shader_userdata_begin_new_cs(sctx);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 431d8a3..449a802 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -288,22 +288,21 @@ struct si_context {
>  	bool				flatshade;
>  	bool				do_update_shaders;
>
>  	/* shader descriptors */
>  	struct si_descriptors		vertex_buffers;
>  	struct si_descriptors		descriptors[SI_NUM_DESCS];
>  	unsigned			descriptors_dirty;
>  	unsigned			shader_pointers_dirty;
>  	unsigned			compressed_tex_shader_mask;
>  	struct si_buffer_resources	rw_buffers;
> -	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
> -	struct si_buffer_resources	shader_buffers[SI_NUM_SHADERS];
> +	struct si_buffer_resources	const_and_shader_buffers[SI_NUM_SHADERS];
>  	struct si_textures_info		samplers[SI_NUM_SHADERS];
>  	struct si_images_info		images[SI_NUM_SHADERS];
>
>  	/* other shader resources */
>  	struct pipe_constant_buffer	null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
>  	struct pipe_resource		*esgs_ring;
>  	struct pipe_resource		*gsvs_ring;
>  	struct pipe_resource		*tf_ring;
>  	struct pipe_resource		*tess_offchip_ring;
>  	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index a49449b..8c5bcb9 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1719,24 +1719,24 @@ static void declare_compute_memory(struct si_shader_context *ctx,
>  	                                  "compute_lds",
>  	                                  LOCAL_ADDR_SPACE);
>  	LLVMSetAlignment(var, 4);
>
>  	ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, "");
>  }
>
>  static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
>  {
>  	LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
> -					     ctx->param_const_buffers);
> +					     ctx->param_const_and_shader_buffers);
>
>  	return ac_build_indexed_load_const(&ctx->ac, list_ptr,
> -					LLVMConstInt(ctx->i32, i, 0));
> +			LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
>  }
>
>  static LLVMValueRef fetch_constant(
>  	struct lp_build_tgsi_context *bld_base,
>  	const struct tgsi_full_src_register *reg,
>  	enum tgsi_opcode_type type,
>  	unsigned swizzle)
>  {
>  	struct si_shader_context *ctx = si_shader_context(bld_base);
>  	struct lp_build_context *base = &bld_base->base;
> @@ -1752,25 +1752,27 @@ static LLVMValueRef fetch_constant(
>  		for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
>  			values[chan] = fetch_constant(bld_base, reg, type, chan);
>
>  		return lp_build_gather_values(&ctx->gallivm, values, 4);
>  	}
>
>  	buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
>  	idx = reg->Register.Index * 4 + swizzle;
>
>  	if (reg->Register.Dimension && reg->Dimension.Indirect) {
> -		LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_buffers);
> +		LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
>  		LLVMValueRef index;
>  		index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
>  						      reg->Dimension.Index,
>  						      SI_NUM_CONST_BUFFERS);
> +		index = LLVMBuildAdd(ctx->gallivm.builder, index,
> +				     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
>  		bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
>  	} else
>  		bufp = load_const_buffer_desc(ctx, buf);
>
>  	if (reg->Register.Indirect) {
>  		addr = ctx->addrs[ireg->Index][ireg->Swizzle];
>  		addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
>  		addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
>  		addr = lp_build_add(&bld_base->uint_bld, addr,
>  				    LLVMConstInt(ctx->i32, idx * 4, 0));
> @@ -2789,27 +2791,25 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
>  				  8 + GFX9_SGPR_TCS_OUT_OFFSETS);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
>  				  8 + GFX9_SGPR_TCS_OUT_LAYOUT);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
>  				  8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
>  				  8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
>
>  	unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
> -					   8 + GFX9_SGPR_TCS_CONST_BUFFERS);
> +					   8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
>  					   8 + GFX9_SGPR_TCS_SAMPLERS);
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
>  					   8 + GFX9_SGPR_TCS_IMAGES);
> -	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
> -					   8 + GFX9_SGPR_TCS_SHADER_BUFFERS);
>
>  	unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
>  	ret = si_insert_input_ret_float(ctx, ret,
>  					ctx->param_tcs_patch_id, vgpr++);
>  	ret = si_insert_input_ret_float(ctx, ret,
>  					ctx->param_tcs_rel_ids, vgpr++);
>  	ctx->return_value = ret;
>  }
>
>  /* Pass GS inputs from ES to GS on GFX9. */
> @@ -2818,27 +2818,25 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
>  	LLVMValueRef ret = ctx->return_value;
>
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
>
>  	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
>
>  	unsigned desc_param = ctx->param_vs_state_bits + 1;
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
> -					   8 + GFX9_SGPR_GS_CONST_BUFFERS);
> +					   8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
>  					   8 + GFX9_SGPR_GS_SAMPLERS);
>  	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
>  					   8 + GFX9_SGPR_GS_IMAGES);
> -	ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
> -					   8 + GFX9_SGPR_GS_SHADER_BUFFERS);
>
>  	unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
>  	for (unsigned i = 0; i < 5; i++) {
>  		unsigned param = ctx->param_gs_vtx01_offset + i;
>  		ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
>  	}
>  	ctx->return_value = ret;
>  }
>
>  static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
> @@ -4054,30 +4052,29 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
>  		max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
>  	}
>  	return max_work_group_size;
>  }
>
>  static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
>  					    LLVMTypeRef *params,
>  					    unsigned *num_params,
>  					    bool assign_params)
>  {
> -	params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
> +	params[(*num_params)++] = si_const_array(ctx->v4i32,
> +						 SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
>  	params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
>  	params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
> -	params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
>
>  	if (assign_params) {
> -		ctx->param_const_buffers  = *num_params - 4;
> -		ctx->param_samplers	  = *num_params - 3;
> -		ctx->param_images	  = *num_params - 2;
> -		ctx->param_shader_buffers = *num_params - 1;
> +		ctx->param_const_and_shader_buffers = *num_params - 3;
> +		ctx->param_samplers	  = *num_params - 2;
> +		ctx->param_images	  = *num_params - 1;
>  	}
>  }
>
>  static void declare_default_desc_pointers(struct si_shader_context *ctx,
>  					  LLVMTypeRef *params,
>  				          unsigned *num_params)
>  {
>  	params[ctx->param_rw_buffers = (*num_params)++] =
>  		si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
>  	declare_per_stage_desc_pointers(ctx, params, num_params, true);
> @@ -6663,36 +6660,34 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
>  		params[num_params++] = ctx->i32; /* wave info */
>  		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
> -		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
>  	} else {
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
>  		params[num_params++] = ctx->i64;
> -		params[num_params++] = ctx->i64;
>  		params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[num_params++] = ctx->i32;
>  		params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
>  		params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
>  	}
>  	last_sgpr = num_params - 1;
> @@ -7035,24 +7030,23 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
>  {
>  	struct gallivm_state *gallivm = &ctx->gallivm;
>  	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
>  	LLVMTypeRef params[16+8*4+3];
>  	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
>  	int last_sgpr, num_params = 0, i;
>  	struct si_ps_exports exp = {};
>
>  	/* Declare input SGPRs. */
>  	params[ctx->param_rw_buffers = num_params++] = ctx->i64;
> -	params[ctx->param_const_buffers = num_params++] = ctx->i64;
> +	params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
>  	params[ctx->param_samplers = num_params++] = ctx->i64;
>  	params[ctx->param_images = num_params++] = ctx->i64;
> -	params[ctx->param_shader_buffers = num_params++] = ctx->i64;
>  	assert(num_params == SI_PARAM_ALPHA_REF);
>  	params[SI_PARAM_ALPHA_REF] = ctx->f32;
>  	last_sgpr = SI_PARAM_ALPHA_REF;
>
>  	/* Declare input VGPRs. */
>  	num_params = (last_sgpr + 1) +
>  		     util_bitcount(key->ps_epilog.colors_written) * 4 +
>  		     key->ps_epilog.writes_z +
>  		     key->ps_epilog.writes_stencil +
>  		     key->ps_epilog.writes_samplemask;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 1627de3..08e809c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -150,28 +150,26 @@ struct ac_shader_binary;
>   */
>  #define SI_MAX_IO_GENERIC       46
>
>  /* SGPR user data indices */
>  enum {
>  	/* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
>  	 * and these two are used for other purposes.
>  	 */
>  	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
>  	SI_SGPR_RW_BUFFERS_HI,
> -	SI_SGPR_CONST_BUFFERS,
> -	SI_SGPR_CONST_BUFFERS_HI,
> +	SI_SGPR_CONST_AND_SHADER_BUFFERS,
> +	SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
>  	SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
>  	SI_SGPR_SAMPLERS_HI,
>  	SI_SGPR_IMAGES,
>  	SI_SGPR_IMAGES_HI,
> -	SI_SGPR_SHADER_BUFFERS,
> -	SI_SGPR_SHADER_BUFFERS_HI,
>  	SI_NUM_RESOURCE_SGPRS,
>
>  	/* all VS variants */
>  	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
>  	SI_SGPR_VERTEX_BUFFERS_HI,
>  	SI_SGPR_BASE_VERTEX,
>  	SI_SGPR_START_INSTANCE,
>  	SI_SGPR_DRAWID,
>  	SI_SGPR_VS_STATE_BITS,
>  	SI_VS_NUM_USER_SGPR,
> @@ -190,53 +188,49 @@ enum {
>  	GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K,
>  	GFX6_TCS_NUM_USER_SGPR,
>
>  	/* GFX9: Merged LS-HS (VS-TCS) only. */
>  	GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
>  	GFX9_SGPR_TCS_OUT_OFFSETS,
>  	GFX9_SGPR_TCS_OUT_LAYOUT,
>  	GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
>  	GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
>  	GFX9_SGPR_unused_to_align_the_next_pointer,
> -	GFX9_SGPR_TCS_CONST_BUFFERS,
> -	GFX9_SGPR_TCS_CONST_BUFFERS_HI,
> +	GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
> +	GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
>  	GFX9_SGPR_TCS_SAMPLERS,  /* images & sampler states interleaved */
>  	GFX9_SGPR_TCS_SAMPLERS_HI,
>  	GFX9_SGPR_TCS_IMAGES,
>  	GFX9_SGPR_TCS_IMAGES_HI,
> -	GFX9_SGPR_TCS_SHADER_BUFFERS,
> -	GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
>  	GFX9_TCS_NUM_USER_SGPR,
>
>  	/* GFX9: Merged ES-GS (VS-GS or TES-GS). */
> -	GFX9_SGPR_GS_CONST_BUFFERS = SI_VS_NUM_USER_SGPR,
> -	GFX9_SGPR_GS_CONST_BUFFERS_HI,
> +	GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
> +	GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
>  	GFX9_SGPR_GS_SAMPLERS,
>  	GFX9_SGPR_GS_SAMPLERS_HI,
>  	GFX9_SGPR_GS_IMAGES,
>  	GFX9_SGPR_GS_IMAGES_HI,
> -	GFX9_SGPR_GS_SHADER_BUFFERS,
> -	GFX9_SGPR_GS_SHADER_BUFFERS_HI,
>  	GFX9_GS_NUM_USER_SGPR,
>
>  	/* GS limits */
>  	GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
>  	SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
>
>  	/* PS only */
>  	SI_SGPR_ALPHA_REF	= SI_NUM_RESOURCE_SGPRS,
>  	SI_PS_NUM_USER_SGPR,
>  };
>
>  /* LLVM function parameter indices */
>  enum {
> -	SI_NUM_RESOURCE_PARAMS = 5,
> +	SI_NUM_RESOURCE_PARAMS = 4,
>
>  	/* PS only parameters */
>  	SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
>  	SI_PARAM_PRIM_MASK,
>  	SI_PARAM_PERSP_SAMPLE,
>  	SI_PARAM_PERSP_CENTER,
>  	SI_PARAM_PERSP_CENTROID,
>  	SI_PARAM_PERSP_PULL_MODEL,
>  	SI_PARAM_LINEAR_SAMPLE,
>  	SI_PARAM_LINEAR_CENTER,
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 69e6dfc..9fd027d 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -102,24 +102,23 @@ struct si_shader_context {
>  	struct tgsi_array_info *temp_arrays;
>  	LLVMValueRef *temp_array_allocas;
>
>  	LLVMValueRef undef_alloca;
>
>  	LLVMValueRef main_fn;
>  	LLVMTypeRef return_type;
>
>  	/* Parameter indices for LLVMGetParam. */
>  	int param_rw_buffers;
> -	int param_const_buffers;
> +	int param_const_and_shader_buffers;
>  	int param_samplers;
>  	int param_images;
> -	int param_shader_buffers;
>  	/* Common inputs for merged shaders. */
>  	int param_merged_wave_info;
>  	int param_merged_scratch_offset;
>  	/* API VS */
>  	int param_vertex_buffers;
>  	int param_base_vertex;
>  	int param_start_instance;
>  	int param_draw_id;
>  	int param_vertex_id;
>  	int param_rel_auto_id;
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index 13b4694..9c44cff 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -77,28 +77,33 @@ static LLVMValueRef get_buffer_size(
>
>  	return size;
>  }
>
>  static LLVMValueRef
>  shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
>  			 const struct tgsi_full_src_register *reg)
>  {
>  	LLVMValueRef index;
>  	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
> -					     ctx->param_shader_buffers);
> +					     ctx->param_const_and_shader_buffers);
>
> -	if (!reg->Register.Indirect)
> -		index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
> -	else
> +	if (!reg->Register.Indirect) {
> +		index = LLVMConstInt(ctx->i32,
> +				     si_get_shaderbuf_slot(reg->Register.Index), 0);
> +	} else {
>  		index = si_get_bounded_indirect_index(ctx, &reg->Indirect,
>  						      reg->Register.Index,
>  						      SI_NUM_SHADER_BUFFERS);
> +		index = LLVMBuildSub(ctx->gallivm.builder,
> +				     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
> +				     index, "");
> +	}
>
>  	return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
>  }
>
>  static bool tgsi_is_array_sampler(unsigned target)
>  {
>  	return target == TGSI_TEXTURE_1D_ARRAY ||
>  	       target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
>  	       target == TGSI_TEXTURE_2D_ARRAY ||
>  	       target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 629d614..90d0972 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -187,25 +187,26 @@ enum {
>   *
>   *  0 - rw buffers
>   *  1 - vertex const buffers
>   *  2 - vertex shader buffers
>   *   ...
>   *  5 - fragment const buffers
>   *   ...
>   *  21 - compute const buffers
>   *   ...
>   */
> -#define SI_SHADER_DESCS_CONST_BUFFERS  0
> -#define SI_SHADER_DESCS_SHADER_BUFFERS 1
> -#define SI_SHADER_DESCS_SAMPLERS       2
> -#define SI_SHADER_DESCS_IMAGES         3
> -#define SI_NUM_SHADER_DESCS            4
> +enum {
> +	SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
> +	SI_SHADER_DESCS_SAMPLERS,
> +	SI_SHADER_DESCS_IMAGES,
> +	SI_NUM_SHADER_DESCS,
> +};
>
>  #define SI_DESCS_RW_BUFFERS            0
>  #define SI_DESCS_FIRST_SHADER          1
>  #define SI_DESCS_FIRST_COMPUTE         (SI_DESCS_FIRST_SHADER + \
>                                          PIPE_SHADER_COMPUTE * SI_NUM_SHADER_DESCS)
>  #define SI_NUM_DESCS                   (SI_DESCS_FIRST_SHADER + \
>                                          SI_NUM_SHADERS * SI_NUM_SHADER_DESCS)
>
>  /* This represents descriptors in memory, such as buffer resources,
>   * image resources, and sampler states.
> @@ -244,21 +245,23 @@ struct si_descriptors {
>  struct si_sampler_views {
>  	struct pipe_sampler_view	*views[SI_NUM_SAMPLERS];
>  	struct si_sampler_state		*sampler_states[SI_NUM_SAMPLERS];
>
>  	/* The i-th bit is set if that element is enabled (non-NULL resource). */
>  	unsigned			enabled_mask;
>  };
>
>  struct si_buffer_resources {
>  	enum radeon_bo_usage		shader_usage; /* READ, WRITE, or READWRITE */
> +	enum radeon_bo_usage		shader_usage_constbuf;
>  	enum radeon_bo_priority		priority;
> +	enum radeon_bo_priority		priority_constbuf;
>  	struct pipe_resource		**buffers; /* this has num_buffers elements */
>
>  	/* The i-th bit is set if that element is enabled (non-NULL resource). */
>  	unsigned			enabled_mask;
>  };
>
>  #define si_pm4_block_idx(member) \
>  	(offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
>
>  #define si_pm4_state_changed(sctx, member) \
> @@ -365,11 +368,23 @@ void si_trace_emit(struct si_context *sctx);
>
>  static inline unsigned
>  si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
>  {
>  	if (stencil)
>  		return rtex->surface.u.legacy.stencil_tiling_index[level];
>  	else
>  		return rtex->surface.u.legacy.tiling_index[level];
>  }
>
> +static inline unsigned si_get_constbuf_slot(unsigned slot)
> +{
> +	/* Constant buffers are in slots [16..31], ascending */
> +	return SI_NUM_SHADER_BUFFERS + slot;
> +}
> +
> +static inline unsigned si_get_shaderbuf_slot(unsigned slot)
> +{
> +	/* shader buffers are in slots [15..0], descending */
> +	return SI_NUM_SHADER_BUFFERS - 1 - slot;
> +}
> +
>  #endif
>


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list