[Mesa-dev] [PATCH 5/6] radeonsi: convert constant buffers to si_descriptors

Marek Olšák maraeo at gmail.com
Wed Aug 28 10:17:28 PDT 2013


There is a new "class" si_buffer_resources, which should be good enough for
implementing any kind of buffer bindings (constant buffers, vertex buffers,
streamout buffers, shader storage buffers, etc.)

I don't even keep a copy of pipe_constant_buffer - we don't need it.

The main motivation behind this is to have a well-tested infrastrusture
for setting up streamout buffers.
---
 src/gallium/drivers/radeonsi/radeonsi_pipe.h  |  10 +-
 src/gallium/drivers/radeonsi/si_descriptors.c | 143 +++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_state.c       |  42 --------
 src/gallium/drivers/radeonsi/si_state.h       |  15 ++-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  80 ++------------
 5 files changed, 162 insertions(+), 128 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index ef531fb..e6e99c7 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -115,13 +115,6 @@ struct r600_fence_block {
 	struct list_head		head;
 };
 
-struct r600_constbuf_state
-{
-	struct pipe_constant_buffer	cb[2];
-	uint32_t			enabled_mask;
-	uint32_t			dirty_mask;
-};
-
 #define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0]))
 #define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1)
 
@@ -138,6 +131,7 @@ struct r600_context {
 
 	union {
 		struct {
+			struct r600_atom *const_buffers[SI_NUM_SHADERS];
 			struct r600_atom *sampler_views[SI_NUM_SHADERS];
 		};
 		struct r600_atom *array[0];
@@ -164,7 +158,7 @@ struct r600_context {
 	/* shader information */
 	unsigned			sprite_coord_enable;
 	unsigned			export_16bpc;
-	struct r600_constbuf_state	constbuf_state[PIPE_SHADER_TYPES];
+	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
 	struct r600_textures_info	samplers[SI_NUM_SHADERS];
 	struct r600_resource		*border_color_table;
 	unsigned			border_color_offset;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index db0da75..2983d75 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -32,7 +32,7 @@
 
 #define SI_NUM_CONTEXTS 256
 
-static const uint32_t null_desc[8]; /* zeros */
+static uint32_t null_desc[8]; /* zeros */
 
 /* Set this if you want the 3D engine to wait until CP DMA is done.
  * It should be set on the last CP DMA packet. */
@@ -170,7 +170,7 @@ static void si_emit_shader_pointer(struct r600_context *rctx,
 
 static void si_emit_descriptors(struct r600_context *rctx,
 				struct si_descriptors *desc,
-				const uint32_t **descriptors)
+				uint32_t **descriptors)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	uint64_t va_base;
@@ -325,6 +325,135 @@ void si_set_sampler_view(struct r600_context *rctx, unsigned shader,
 	si_update_descriptors(&views->desc);
 }
 
+/* BUFFER RESOURCES */
+
+static void si_emit_buffer_resources(struct r600_context *rctx, struct r600_atom *atom)
+{
+	struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom;
+
+	si_emit_descriptors(rctx, &buffers->desc, buffers->desc_data);
+}
+
+static void si_init_buffer_resources(struct r600_context *rctx,
+				     struct si_buffer_resources *buffers,
+				     unsigned num_buffers, unsigned shader,
+				     unsigned shader_userdata_index,
+				     enum radeon_bo_usage shader_usage)
+{
+	int i;
+
+	buffers->num_buffers = num_buffers;
+	buffers->shader_usage = shader_usage;
+	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
+	buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4);
+
+	/* si_emit_descriptors only accepts an array of arrays.
+	 * This adds such an array. */
+	buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*));
+	for (i = 0; i < num_buffers; i++) {
+		buffers->desc_data[i] = &buffers->desc_storage[i*4];
+	}
+
+	si_init_descriptors(rctx, &buffers->desc,
+			    si_get_shader_user_data_base(shader) +
+			    shader_userdata_index*4, 4, num_buffers,
+			    si_emit_buffer_resources);
+}
+
+static void si_release_buffer_resources(struct si_buffer_resources *buffers)
+{
+	int i;
+
+	for (i = 0; i < Elements(buffers->buffers); i++) {
+		pipe_resource_reference(&buffers->buffers[i], NULL);
+	}
+
+	FREE(buffers->buffers);
+	FREE(buffers->desc_storage);
+	FREE(buffers->desc_data);
+	si_release_descriptors(&buffers->desc);
+}
+
+static void si_buffer_resources_begin_new_cs(struct r600_context *rctx,
+					     struct si_buffer_resources *buffers)
+{
+	unsigned mask = buffers->desc.enabled_mask;
+
+	/* Add relocations to the CS. */
+	while (mask) {
+		int i = u_bit_scan(&mask);
+
+		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+				      (struct r600_resource*)buffers->buffers[i],
+				      buffers->shader_usage);
+	}
+
+	r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+			      buffers->desc.buffer, RADEON_USAGE_READWRITE);
+
+	si_emit_shader_pointer(rctx, &buffers->desc);
+}
+
+/* CONSTANT BUFFERS */
+
+static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
+				   struct pipe_constant_buffer *input)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct si_buffer_resources *buffers = &rctx->const_buffers[shader];
+
+	if (shader >= SI_NUM_SHADERS)
+		return;
+
+	rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
+
+	assert(slot < buffers->num_buffers);
+	pipe_resource_reference(&buffers->buffers[slot], NULL);
+
+	if (input && (input->buffer || input->user_buffer)) {
+		struct pipe_resource *buffer = NULL;
+		uint64_t va;
+
+		/* Upload the user buffer if needed. */
+		if (input->user_buffer) {
+			unsigned buffer_offset;
+
+			r600_upload_const_buffer(rctx,
+						 (struct r600_resource**)&buffer, input->user_buffer,
+						 input->buffer_size, &buffer_offset);
+			va = r600_resource_va(ctx->screen, buffer) + buffer_offset;
+		} else {
+			pipe_resource_reference(&buffer, input->buffer);
+			va = r600_resource_va(ctx->screen, buffer) + input->buffer_offset;
+		}
+
+		/* Set the descriptor. */
+		uint32_t *desc = buffers->desc_data[slot];
+		desc[0] = va;
+		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+			  S_008F04_STRIDE(0);
+		desc[2] = input->buffer_size;
+		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+		buffers->buffers[slot] = buffer;
+		r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+				      (struct r600_resource*)buffer, buffers->shader_usage);
+		buffers->desc.enabled_mask |= 1 << slot;
+	} else {
+		/* Clear the descriptor. */
+		memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+		buffers->desc.enabled_mask &= ~(1 << slot);
+	}
+
+	buffers->desc.dirty_mask |= 1 << slot;
+	si_update_descriptors(&buffers->desc);
+}
+
 /* INIT/DEINIT */
 
 void si_init_all_descriptors(struct r600_context *rctx)
@@ -332,10 +461,18 @@ void si_init_all_descriptors(struct r600_context *rctx)
 	int i;
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		si_init_buffer_resources(rctx, &rctx->const_buffers[i],
+					 NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
+					 RADEON_USAGE_READ);
+
 		si_init_sampler_views(rctx, &rctx->samplers[i].views, i);
 
+		rctx->atoms.const_buffers[i] = &rctx->const_buffers[i].desc.atom;
 		rctx->atoms.sampler_views[i] = &rctx->samplers[i].views.desc.atom;
 	}
+
+	/* Set pipe_context functions. */
+	rctx->b.b.set_constant_buffer = si_set_constant_buffer;
 }
 
 void si_release_all_descriptors(struct r600_context *rctx)
@@ -343,6 +480,7 @@ void si_release_all_descriptors(struct r600_context *rctx)
 	int i;
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		si_release_buffer_resources(&rctx->const_buffers[i]);
 		si_release_sampler_views(&rctx->samplers[i].views);
 	}
 }
@@ -352,6 +490,7 @@ void si_all_descriptors_begin_new_cs(struct r600_context *rctx)
 	int i;
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		si_buffer_resources_begin_new_cs(rctx, &rctx->const_buffers[i]);
 		si_sampler_views_begin_new_cs(rctx, &rctx->samplers[i].views);
 	}
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index b4370da..5ac55f2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3017,46 +3017,6 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
 }
 
 /*
- * Constants
- */
-static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
-				   struct pipe_constant_buffer *input)
-{
-	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
-	struct pipe_constant_buffer *cb;
-	const uint8_t *ptr;
-
-	/* Note that the state tracker can unbind constant buffers by
-	 * passing NULL here.
-	 */
-	if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
-		state->enabled_mask &= ~(1 << index);
-		state->dirty_mask &= ~(1 << index);
-		pipe_resource_reference(&state->cb[index].buffer, NULL);
-		return;
-	}
-
-	cb = &state->cb[index];
-	cb->buffer_size = input->buffer_size;
-
-	ptr = input->user_buffer;
-
-	if (ptr) {
-		r600_upload_const_buffer(rctx,
-				(struct r600_resource**)&cb->buffer, ptr,
-				cb->buffer_size, &cb->buffer_offset);
-	} else {
-		/* Setup the hw buffer. */
-		cb->buffer_offset = input->buffer_offset;
-		pipe_resource_reference(&cb->buffer, input->buffer);
-	}
-
-	state->enabled_mask |= 1 << index;
-	state->dirty_mask |= 1 << index;
-}
-
-/*
  * Vertex elements & buffers
  */
 
@@ -3241,8 +3201,6 @@ void si_init_state_functions(struct r600_context *rctx)
 
 	rctx->b.b.set_sample_mask = si_set_sample_mask;
 
-	rctx->b.b.set_constant_buffer = si_set_constant_buffer;
-
 	rctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
 	rctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
 	rctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 20ae433..82fac4a 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -92,11 +92,9 @@ union si_state {
 		struct si_pm4_state		*vs;
 		struct si_pm4_state		*vs_sampler_views;
 		struct si_pm4_state		*vs_sampler;
-		struct si_pm4_state		*vs_const;
 		struct si_pm4_state		*ps;
 		struct si_pm4_state		*ps_sampler_views;
 		struct si_pm4_state		*ps_sampler;
-		struct si_pm4_state		*ps_const;
 		struct si_pm4_state		*spi;
 		struct si_pm4_state		*vertex_buffers;
 		struct si_pm4_state		*texture_barrier;
@@ -114,6 +112,8 @@ union si_state {
 #define FMASK_TEX_OFFSET	NUM_TEX_UNITS
 #define NUM_SAMPLER_VIEWS	(FMASK_TEX_OFFSET+NUM_TEX_UNITS)
 
+#define NUM_CONST_BUFFERS 2
+
 /* This represents resource descriptors in memory, such as buffer resources,
  * image resources, and sampler states.
  */
@@ -149,7 +149,16 @@ struct si_descriptors {
 struct si_sampler_views {
 	struct si_descriptors		desc;
 	struct pipe_sampler_view	*views[NUM_SAMPLER_VIEWS];
-	const uint32_t			*desc_data[NUM_SAMPLER_VIEWS];
+	uint32_t			*desc_data[NUM_SAMPLER_VIEWS];
+};
+
+struct si_buffer_resources {
+	struct si_descriptors		desc;
+	unsigned			num_buffers;
+	enum radeon_bo_usage		shader_usage; /* READ, WRITE, or READWRITE */
+	struct pipe_resource		**buffers; /* this has num_buffers elements */
+	uint32_t			*desc_storage; /* this has num_buffers*4 elements */
+	uint32_t			**desc_data; /* an array of pointers pointing to desc_storage */
 };
 
 #define si_pm4_block_idx(member) \
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1a6fff8..1e555de 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -453,78 +453,6 @@ static void si_update_derived_state(struct r600_context *rctx)
 	}
 }
 
-static void si_constant_buffer_update(struct r600_context *rctx)
-{
-	struct pipe_context *ctx = &rctx->b.b;
-	struct si_pm4_state *pm4;
-	unsigned shader, i;
-	uint64_t va;
-
-	if (!rctx->constbuf_state[PIPE_SHADER_VERTEX].dirty_mask &&
-	    !rctx->constbuf_state[PIPE_SHADER_FRAGMENT].dirty_mask)
-		return;
-
-	for (shader = PIPE_SHADER_VERTEX ; shader <= PIPE_SHADER_FRAGMENT; shader++) {
-		struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
-
-		pm4 = CALLOC_STRUCT(si_pm4_state);
-		if (!pm4)
-			continue;
-
-		si_pm4_inval_shader_cache(pm4);
-		si_pm4_sh_data_begin(pm4);
-
-		for (i = 0; i < 2; i++) {
-			if (state->enabled_mask & (1 << i)) {
-				struct pipe_constant_buffer *cb = &state->cb[i];
-				struct r600_resource *rbuffer = r600_resource(cb->buffer);
-
-				va = r600_resource_va(ctx->screen, (void*)rbuffer);
-				va += cb->buffer_offset;
-
-				si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ);
-
-				/* Fill in a T# buffer resource description */
-				si_pm4_sh_data_add(pm4, va);
-				si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
-							 S_008F04_STRIDE(0)));
-				si_pm4_sh_data_add(pm4, cb->buffer_size);
-				si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-						   S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-						   S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-						   S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-						   S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-						   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32));
-			} else {
-				/* Fill in an empty T# buffer resource description */
-				si_pm4_sh_data_add(pm4, 0);
-				si_pm4_sh_data_add(pm4, 0);
-				si_pm4_sh_data_add(pm4, 0);
-				si_pm4_sh_data_add(pm4, 0);
-			}
-		}
-
-		switch (shader) {
-		case PIPE_SHADER_VERTEX:
-			si_pm4_sh_data_end(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0, SI_SGPR_CONST);
-			si_pm4_set_state(rctx, vs_const, pm4);
-			break;
-
-		case PIPE_SHADER_FRAGMENT:
-			si_pm4_sh_data_end(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0, SI_SGPR_CONST);
-			si_pm4_set_state(rctx, ps_const, pm4);
-			break;
-
-		default:
-			R600_ERR("unsupported %d\n", shader);
-			FREE(pm4);
-			return;
-		}
-
-		state->dirty_mask = 0;
-	}
-}
-
 static void si_vertex_buffer_update(struct r600_context *rctx)
 {
 	struct pipe_context *ctx = &rctx->b.b;
@@ -653,7 +581,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		return;
 
 	si_update_derived_state(rctx);
-	si_constant_buffer_update(rctx);
 	si_vertex_buffer_update(rctx);
 
 	if (info->indexed) {
@@ -678,7 +605,14 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	si_state_draw(rctx, info, &ib);
 
+	/* Cache flushing via CP_COHER_CNTL. */
 	cp_coher_cntl = si_pm4_sync_flags(rctx);
+
+	if (rctx->b.flags & R600_CONTEXT_INV_CONST_CACHE) {
+		cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1) |
+				 S_0085F0_SH_KCACHE_ACTION_ENA(1);
+	}
+
 	if (cp_coher_cntl) {
 		struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
 
-- 
1.8.1.2



More information about the mesa-dev mailing list