[Mesa-dev] [PATCH 2/2] r600g: lower number of driver const buffers

Dave Airlie airlied at gmail.com
Thu Sep 10 21:43:49 PDT 2015


From: Dave Airlie <airlied at redhat.com>

I'm going to want a driver constant buffer for tess to coordinate
LDS storage, so before I go tackling that I decided to merge the
clip/samplepos and texture info buffers into one. So I can steal
the spare one.

This creates a single constant buffer between the two, with
clip/samplepos taking up a reserved 128 bytes at the start.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/r600_pipe.h         |  28 +++--
 src/gallium/drivers/r600/r600_shader.c       |  21 ++--
 src/gallium/drivers/r600/r600_state_common.c | 152 ++++++++++++++++++---------
 3 files changed, 131 insertions(+), 70 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 25df831..d0774de 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -63,13 +63,15 @@
 #define R600_TRACE_CS_DWORDS		7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 2
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
-#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
+#define R600_UCP_SIZE (4*4*8)
+#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
+
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
  * of 16 const buffers.
  * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -77,8 +79,6 @@
  * In order to support d3d 11 mandated minimum of 15 user const buffers
  * we'd have to squash all use cases into one driver buffer.
  */
-#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-
 #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
 #ifdef PIPE_ARCH_BIG_ENDIAN
@@ -356,11 +356,15 @@ struct r600_textures_info {
 	struct r600_samplerview_state	views;
 	struct r600_sampler_states	states;
 	bool				is_array_sampler[NUM_TEX_UNITS];
+};
 
-	/* cube array txq workaround */
-	uint32_t			*txq_constants;
-	/* buffer related workarounds */
-	uint32_t			*buffer_constants;
+struct r600_shader_driver_constants_info {
+	/* currently 128 bytes for UCP/samplepos + sampler buffer constants */
+	uint32_t			*constants;
+	uint32_t			alloc_size;
+	bool				vs_ucp_dirty;
+	bool				texture_const_dirty;
+	bool				ps_sample_pos_dirty;
 };
 
 struct r600_constbuf_state
@@ -472,6 +476,9 @@ struct r600_context {
 	struct r600_gs_rings_state	gs_rings;
 	struct r600_constbuf_state	constbuf_state[PIPE_SHADER_TYPES];
 	struct r600_textures_info	samplers[PIPE_SHADER_TYPES];
+
+	struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES];
+
 	/** Vertex buffers for fetch shaders */
 	struct r600_vertexbuf_state	vertex_buffer_state;
 	/** Vertex buffers for compute shaders */
@@ -498,6 +505,7 @@ struct r600_context {
 
 	void				*sb_context;
 	struct r600_isa		*isa;
+	float sample_positions[4 * 16];
 };
 
 static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f2c9e16..93b1bf7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -60,6 +60,7 @@ issued in the w slot as well.
 The compiler must issue the source argument to slots z, y, and x
 */
 
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
 static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
 				 union r600_shader_key key);
@@ -947,7 +948,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
 
 	memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
 	vtx.op = FETCH_OP_VFETCH;
-	vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+	vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
 	vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
 	if (sample_id == NULL) {
 		vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
@@ -2307,7 +2308,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 				alu.src[0].chan = j;
 
 				alu.src[1].sel = 512 + i;
-				alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
+				alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 				alu.src[1].chan = j;
 
 				alu.dst.sel = clipdist_temp[oreg];
@@ -5499,7 +5500,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
 		alu.src[0].sel = vtx.dst_gpr;
 		alu.src[0].chan = i;
 
-		alu.src[1].sel = 512 + (id * 2);
+		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
+		alu.src[1].sel += (id * 2);
 		alu.src[1].chan = i % 4;
 		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 
@@ -5521,7 +5523,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
 		alu.src[0].sel = vtx.dst_gpr;
 		alu.src[0].chan = 3;
 
-		alu.src[1].sel = 512 + (id * 2) + 1;
+		alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
 		alu.src[1].chan = 0;
 		alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 
@@ -5542,14 +5544,14 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
 
 	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.op = ALU_OP1_MOV;
-
+	alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
 	if (ctx->bc->chip_class >= EVERGREEN) {
 		/* channel 0 or 2 of each word */
-		alu.src[0].sel = 512 + (id / 2);
+		alu.src[0].sel += (id / 2);
 		alu.src[0].chan = (id % 2) * 2;
 	} else {
 		/* r600 we have them at channel 2 of the second dword */
-		alu.src[0].sel = 512 + (id * 2) + 1;
+		alu.src[0].sel += (id * 2) + 1;
 		alu.src[0].chan = 1;
 	}
 	alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -6207,13 +6209,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ALU_OP1_MOV;
 
+		alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
 		if (ctx->bc->chip_class >= EVERGREEN) {
 			/* channel 1 or 3 of each word */
-			alu.src[0].sel = 512 + (id / 2);
+			alu.src[0].sel += (id / 2);
 			alu.src[0].chan = ((id % 2) * 2) + 1;
 		} else {
 			/* r600 we have them at channel 2 of the second dword */
-			alu.src[0].sel = 512 + (id * 2) + 1;
+			alu.src[0].sel += (id * 2) + 1;
 			alu.src[0].chan = 2;
 		}
 		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index ae13411..21c89dc 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -240,17 +240,10 @@ static void r600_set_clip_state(struct pipe_context *ctx,
 				const struct pipe_clip_state *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct pipe_constant_buffer cb;
 
 	rctx->clip_state.state = *state;
 	r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
-
-	cb.buffer = NULL;
-	cb.user_buffer = state->ucp;
-	cb.buffer_offset = 0;
-	cb.buffer_size = 4*4*8;
-	ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
+	rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
 }
 
 static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -1053,6 +1046,74 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
 	r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
 }
 
+static void r600_update_driver_const_buffers(struct r600_context *rctx)
+{
+	int sh, size;;
+	void *ptr;
+	struct pipe_constant_buffer cb;
+	for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+		struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
+		if (!info->vs_ucp_dirty &&
+		    !info->texture_const_dirty &&
+		    !info->ps_sample_pos_dirty)
+			continue;
+
+		ptr = info->constants;
+		size = info->alloc_size;
+		if (info->vs_ucp_dirty) {
+			assert(sh == PIPE_SHADER_VERTEX);
+			if (!size) {
+				ptr = rctx->clip_state.state.ucp;
+				size = R600_UCP_SIZE;
+			} else {
+				memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+			}
+			info->vs_ucp_dirty = false;
+		}
+
+		if (info->ps_sample_pos_dirty) {
+			assert(sh == PIPE_SHADER_FRAGMENT);
+			if (!size) {
+				ptr = rctx->sample_positions;
+				size = R600_UCP_SIZE;
+			} else {
+				memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+			}
+			info->ps_sample_pos_dirty = false;
+		}
+
+		if (info->texture_const_dirty) {
+			assert (ptr);
+			assert (size);
+			if (sh == PIPE_SHADER_VERTEX)
+				memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+			if (sh == PIPE_SHADER_FRAGMENT)
+				memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+		}
+		info->texture_const_dirty = false;
+
+		cb.buffer = NULL;
+		cb.user_buffer = ptr;
+		cb.buffer_offset = 0;
+		cb.buffer_size = size;
+		rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+		pipe_resource_reference(&cb.buffer, NULL);
+	}
+}
+
+static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
+				   int array_size, uint32_t *base_offset)
+{
+	struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
+	if (array_size + R600_UCP_SIZE > info->alloc_size) {
+		info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
+		info->alloc_size = array_size + R600_UCP_SIZE;
+	}
+	memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
+	info->texture_const_dirty = true;
+	*base_offset = R600_UCP_SIZE;
+	return info->constants;
+}
 /*
  * On r600/700 hw we don't have vertex fetch swizzle, though TBO
  * doesn't require full swizzles it does need masking and setting alpha
@@ -1067,9 +1128,9 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
 	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
 	int bits;
 	uint32_t array_size;
-	struct pipe_constant_buffer cb;
 	int i, j;
-
+	uint32_t *constants;
+	uint32_t base_offset;
 	if (!samplers->views.dirty_buffer_constants)
 		return;
 
@@ -1077,38 +1138,33 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
 
 	bits = util_last_bit(samplers->views.enabled_mask);
 	array_size = bits * 8 * sizeof(uint32_t) * 4;
-	samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-	memset(samplers->buffer_constants, 0, array_size);
+
+	constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
+
 	for (i = 0; i < bits; i++) {
 		if (samplers->views.enabled_mask & (1 << i)) {
-			int offset = i * 8;
+			int offset = (base_offset / 4) + i * 8;
 			const struct util_format_description *desc;
 			desc = util_format_description(samplers->views.views[i]->base.format);
 
 			for (j = 0; j < 4; j++)
 				if (j < desc->nr_channels)
-					samplers->buffer_constants[offset+j] = 0xffffffff;
+					constants[offset+j] = 0xffffffff;
 				else
-					samplers->buffer_constants[offset+j] = 0x0;
+					constants[offset+j] = 0x0;
 			if (desc->nr_channels < 4) {
 				if (desc->channel[0].pure_integer)
-					samplers->buffer_constants[offset+4] = 1;
+					constants[offset+4] = 1;
 				else
-					samplers->buffer_constants[offset+4] = fui(1.0);
+					constants[offset+4] = fui(1.0);
 			} else
-				samplers->buffer_constants[offset + 4] = 0;
+				constants[offset + 4] = 0;
 
-			samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-			samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
+			constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+			constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
 		}
 	}
 
-	cb.buffer = NULL;
-	cb.user_buffer = samplers->buffer_constants;
-	cb.buffer_offset = 0;
-	cb.buffer_size = array_size;
-	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 /* On evergreen we store two values
@@ -1120,9 +1176,9 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
 	int bits;
 	uint32_t array_size;
-	struct pipe_constant_buffer cb;
 	int i;
-
+	uint32_t *constants;
+	uint32_t base_offset;
 	if (!samplers->views.dirty_buffer_constants)
 		return;
 
@@ -1130,45 +1186,37 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 
 	bits = util_last_bit(samplers->views.enabled_mask);
 	array_size = bits * 2 * sizeof(uint32_t) * 4;
-	samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-	memset(samplers->buffer_constants, 0, array_size);
+
+	constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
+					  &base_offset);
+
 	for (i = 0; i < bits; i++) {
 		if (samplers->views.enabled_mask & (1 << i)) {
-			uint32_t offset = i * 2;
-			samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-			samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+			uint32_t offset = (base_offset / 4) + i * 2;
+			constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+			constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
 		}
 	}
-
-	cb.buffer = NULL;
-	cb.user_buffer = samplers->buffer_constants;
-	cb.buffer_offset = 0;
-	cb.buffer_size = array_size;
-	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
 }
 
 /* set sample xy locations as array of fragment shader constants */
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
 {
-	struct pipe_constant_buffer constbuf = {0};
-	float values[4*16] = {0.0f};
 	int i;
 	struct pipe_context *ctx = &rctx->b.b;
 
-	assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+	assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
+	assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
+
+	memset(rctx->sample_positions, 0, 4 * 4 * 16);
 	for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
-		ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+		ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
 		/* Also fill in center-zeroed positions used for interpolateAtSample */
-		values[4*i + 2] = values[4*i + 0] - 0.5f;
-		values[4*i + 3] = values[4*i + 1] - 0.5f;
+		rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
+		rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
 	}
 
-	constbuf.user_buffer = values;
-	constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
-	ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
-		R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
-	pipe_resource_reference(&constbuf.buffer, NULL);
+	rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
 }
 
 static void update_shader_atom(struct pipe_context *ctx,
@@ -1387,6 +1435,8 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 		}
 	}
 
+	r600_update_driver_const_buffers(rctx);
+
 	if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
 		if (!r600_adjust_gprs(rctx)) {
 			/* discard rendering */
-- 
2.4.3



More information about the mesa-dev mailing list