[Mesa-dev] [PATCH] r600g: merge the TXQ and BUFFER constant buffers

Dave Airlie airlied at gmail.com
Sun Nov 23 19:36:06 PST 2014


From: Dave Airlie <airlied at redhat.com>

We are using 1 more buffer than we have, although in the future the
driver should just end up using one buffer in total probably, this
is a good first step, it merges the txq cube array and buffer info
constants on r600 and evergreen.

this also most likely breaks llvm backend, I've changed it,
but it definitely needs fixes for this.

this fixes a bunch of geom shader textureSize tests on rv635
from gpu reset to pass.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/drivers/r600/r600_llvm.c         |  3 +-
 src/gallium/drivers/r600/r600_pipe.h         |  8 +--
 src/gallium/drivers/r600/r600_shader.c       | 18 ++++--
 src/gallium/drivers/r600/r600_state_common.c | 92 +++++++++++-----------------
 4 files changed, 54 insertions(+), 67 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index c19693a..470c65f 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -23,7 +23,6 @@
 
 #define CONSTANT_BUFFER_0_ADDR_SPACE 8
 #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER)
-#define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE + R600_TXQ_CONST_BUFFER)
 #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
 	(CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
 
@@ -690,7 +689,7 @@ static void llvm_emit_tex(
 		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
 			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
 			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
-				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
+				llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER,
 				lp_build_const_int32(gallivm, 0), "");
 
 			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 40b0328..e27e877 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -44,14 +44,13 @@
 #define R600_TRACE_CS_DWORDS		7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 4
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 /* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is
  * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16.
  * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -316,7 +315,6 @@ struct r600_samplerview_state {
 	uint32_t			dirty_mask;
 	uint32_t			compressed_depthtex_mask; /* which textures are depth */
 	uint32_t			compressed_colortex_mask;
-	boolean                         dirty_txq_constants;
 	boolean				dirty_buffer_constants;
 };
 
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index a772dee..61ff162 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5035,8 +5035,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
 	alu.op = ALU_OP1_MOV;
 
 	if (ctx->bc->chip_class >= EVERGREEN) {
-		alu.src[0].sel = 512 + (id / 4);
-		alu.src[0].chan = id % 4;
+		/* channel 0 or 2 of each word */
+		alu.src[0].sel = 512 + (id / 2);
+		alu.src[0].chan = (id % 2) * 2;
 	} else {
 		/* r600 we have them at channel 2 of the second dword */
 		alu.src[0].sel = 512 + (id * 2) + 1;
@@ -5697,9 +5698,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ALU_OP1_MOV;
 
-		alu.src[0].sel = 512 + (id / 4);
-		alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
-		alu.src[0].chan = id % 4;
+		if (ctx->bc->chip_class >= EVERGREEN) {
+			/* channel 1 or 3 of each word */
+			alu.src[0].sel = 512 + (id / 2);
+			alu.src[0].chan = ((id % 2) * 2) + 1;
+		} else {
+			/* r600 we have them at channel 2 of the second dword */
+			alu.src[0].sel = 512 + (id * 2) + 1;
+			alu.src[0].chan = 2;
+		}
+		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
 		alu.last = 1;
 		r = r600_bytecode_add_alu(ctx->bc, &alu);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c3f21cb..ff94454 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -649,7 +649,6 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
 	dst->views.dirty_mask |= new_mask;
 	dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
 	dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
-	dst->views.dirty_txq_constants = TRUE;
 	dst->views.dirty_buffer_constants = TRUE;
 	r600_sampler_views_dirty(rctx, &dst->views);
 
@@ -984,6 +983,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
  * then in the shader, we AND the 4 components with 0xffffffff or 0,
  * then OR the alpha with the value given here.
  * We use a 6th constant to store the txq buffer size in
+ * we use 7th slot for cube map
  */
 static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_type)
 {
@@ -1022,6 +1022,7 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
 				samplers->buffer_constants[offset + 4] = 0;
 
 			samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+			samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
 		}
 	}
 
@@ -1048,12 +1049,16 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 	samplers->views.dirty_buffer_constants = FALSE;
 
 	bits = util_last_bit(samplers->views.enabled_mask);
-	array_size = bits * sizeof(uint32_t) * 4;
+	array_size = bits * 2 * sizeof(uint32_t) * 4;
 	samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
 	memset(samplers->buffer_constants, 0, array_size);
-	for (i = 0; i < bits; i++)
-		if (samplers->views.enabled_mask & (1 << i))
-		   samplers->buffer_constants[i] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+	for (i = 0; i < bits; i++) {
+		if (samplers->views.enabled_mask & (1 << i)) {
+			uint32_t offset = i * 2;
+			samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+			samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+		}
+	}
 
 	cb.buffer = NULL;
 	cb.user_buffer = samplers->buffer_constants;
@@ -1063,35 +1068,6 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 	pipe_resource_reference(&cb.buffer, NULL);
 }
 
-static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int shader_type)
-{
-	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
-	int bits;
-	uint32_t array_size;
-	struct pipe_constant_buffer cb;
-	int i;
-
-	if (!samplers->views.dirty_txq_constants)
-		return;
-
-	samplers->views.dirty_txq_constants = FALSE;
-
-	bits = util_last_bit(samplers->views.enabled_mask);
-	array_size = bits * sizeof(uint32_t) * 4;
-	samplers->txq_constants = realloc(samplers->txq_constants, array_size);
-	memset(samplers->txq_constants, 0, array_size);
-	for (i = 0; i < bits; i++)
-		if (samplers->views.enabled_mask & (1 << i))
-			samplers->txq_constants[i] = samplers->views.views[i]->base.texture->array_size / 6;
-
-	cb.buffer = NULL;
-	cb.user_buffer = samplers->txq_constants;
-	cb.buffer_offset = 0;
-	cb.buffer_size = array_size;
-	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_TXQ_CONST_BUFFER, &cb);
-	pipe_resource_reference(&cb.buffer, NULL);
-}
-
 /* set sample xy locations as array of fragment shader constants */
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
 {
@@ -1175,7 +1151,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 	struct pipe_context * ctx = (struct pipe_context*)rctx;
 	bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
 	bool blend_disable;
-
+	bool need_buf_const;
 	if (!rctx->blitter->running) {
 		unsigned i;
 
@@ -1296,29 +1272,35 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 
 	/* on R600 we stuff masks + txq info into one constant buffer */
 	/* on evergreen we only need a txq info one */
-	if (rctx->b.chip_class < EVERGREEN) {
-		if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
-			r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
-		if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
-			r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
-		if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers)
-			r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
-	} else {
-		if (rctx->ps_shader && rctx->ps_shader->current->shader.uses_tex_buffers)
-			eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
-		if (rctx->vs_shader && rctx->vs_shader->current->shader.uses_tex_buffers)
-			eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
-		if (rctx->gs_shader && rctx->gs_shader->current->shader.uses_tex_buffers)
-			eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
+	if (rctx->ps_shader) {
+		need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers || rctx->ps_shader->current->shader.has_txq_cube_array_z_comp;
+		if (need_buf_const) {
+			if (rctx->b.chip_class < EVERGREEN)
+				r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+			else
+				eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
+		}
 	}
 
+	if (rctx->vs_shader) {
+		need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers || rctx->vs_shader->current->shader.has_txq_cube_array_z_comp;
+		if (need_buf_const) {
+			if (rctx->b.chip_class < EVERGREEN)
+				r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+			else
+				eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
+		}
+	}
 
-	if (rctx->ps_shader && rctx->ps_shader->current->shader.has_txq_cube_array_z_comp)
-		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT);
-	if (rctx->vs_shader && rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)
-		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX);
-	if (rctx->gs_shader && rctx->gs_shader->current->shader.has_txq_cube_array_z_comp)
-		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_GEOMETRY);
+	if (rctx->gs_shader) {
+		need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers || rctx->gs_shader->current->shader.has_txq_cube_array_z_comp;
+		if (need_buf_const) {
+			if (rctx->b.chip_class < EVERGREEN)
+				r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
+			else
+				eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
+		}
+	}
 
 	if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
 		if (!r600_adjust_gprs(rctx)) {
-- 
2.1.0



More information about the mesa-dev mailing list