Mesa (master): r600g: Add support for relative addressing on constant buffers.

Henri Verbeet hverbeet at kemper.freedesktop.org
Mon Feb 7 14:24:47 UTC 2011


Module: Mesa
Branch: master
Commit: 077c448d184799e0d9ec962013ec784c6a5c1807
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=077c448d184799e0d9ec962013ec784c6a5c1807

Author: Henri Verbeet <hverbeet at gmail.com>
Date:   Mon Feb  7 15:22:08 2011 +0100

r600g: Add support for relative addressing on constant buffers.

Relative addressing of constant buffers can't work properly through the
kcache, since you can only address within the currently locked kcache window.
Instead, this patch binds the constant buffer as a shader resource, and then
explicitly fetches the constant using a vertex fetch with fetch type
VTX_FETCH_NO_INDEX_OFFSET from the shader. There's probably still some room
for improvement, doing the fetch right before the instruction that needs the
value may not be quite optimal for example.

---

 src/gallium/drivers/r600/evergreen_state.c   |   12 ++-
 src/gallium/drivers/r600/r600_pipe.c         |    2 +-
 src/gallium/drivers/r600/r600_pipe.h         |    4 +
 src/gallium/drivers/r600/r600_shader.c       |  119 +++++++++++++++++++++++---
 src/gallium/drivers/r600/r600_state.c        |    9 ++-
 src/gallium/drivers/r600/r600_state_common.c |   28 ++++++-
 6 files changed, 152 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index bfa2199..83ab0df 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -434,7 +434,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou
 
 	for (int i = 0; i < count; i++) {
 		if (resource[i]) {
-			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+								     i + R600_MAX_CONST_BUFFERS);
 		}
 	}
 }
@@ -449,9 +450,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
 	for (i = 0; i < count; i++) {
 		if (&rctx->ps_samplers.views[i]->base != views[i]) {
 			if (resource[i])
-				evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+				evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+									     i + R600_MAX_CONST_BUFFERS);
 			else
-				evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+				evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+									     i + R600_MAX_CONST_BUFFERS);
 
 			pipe_sampler_view_reference(
 				(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -460,7 +463,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
 	}
 	for (i = count; i < NUM_TEX_UNITS; i++) {
 		if (rctx->ps_samplers.views[i]) {
-			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+								     i + R600_MAX_CONST_BUFFERS);
 			pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
 		}
 	}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 48ff95b..0b20b20 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -370,7 +370,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 	case PIPE_SHADER_CAP_MAX_CONSTS:
 		return 256; //max native parameters
 	case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-		return 1;
+		return R600_MAX_CONST_BUFFERS;
 	case PIPE_SHADER_CAP_MAX_PREDS:
 		return 0; /* FIXME */
 	case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 5f04fbf..b7ea6de 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -36,6 +36,8 @@
 #include "r600_shader.h"
 #include "r600_resource.h"
 
+#define R600_MAX_CONST_BUFFERS 1
+
 enum r600_pipe_state_id {
 	R600_PIPE_STATE_BLEND = 0,
 	R600_PIPE_STATE_BLEND_COLOR,
@@ -140,7 +142,9 @@ struct r600_pipe_context {
 	struct r600_pipe_shader 	*ps_shader;
 	struct r600_pipe_shader 	*vs_shader;
 	struct r600_pipe_state		vs_const_buffer;
+	struct r600_pipe_state		vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
 	struct r600_pipe_state		ps_const_buffer;
+	struct r600_pipe_state		ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
 	struct r600_pipe_rasterizer	*rasterizer;
 	/* shader information */
 	unsigned			sprite_coord_enable;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 50f9ed6..acb3ef2 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -28,6 +28,7 @@
 #include "r600_pipe.h"
 #include "r600_asm.h"
 #include "r600_sq.h"
+#include "r600_formats.h"
 #include "r600_opcodes.h"
 #include "r600d.h"
 #include <stdio.h>
@@ -296,6 +297,7 @@ struct r600_shader_ctx {
 	unsigned				type;
 	unsigned				file_offset[TGSI_FILE_COUNT];
 	unsigned				temp_reg;
+	unsigned				ar_reg;
 	struct r600_shader_tgsi_instruction	*inst_info;
 	struct r600_bc				*bc;
 	struct r600_shader			*shader;
@@ -541,6 +543,55 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 	}
 }
 
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+	struct r600_bc_vtx vtx;
+	unsigned int ar_reg;
+	int r;
+
+	if (offset) {
+		struct r600_bc_alu alu;
+
+		memset(&alu, 0, sizeof(alu));
+
+		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+		alu.src[0].sel = ctx->ar_reg;
+
+		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+		alu.src[1].value = offset;
+
+		alu.dst.sel = dst_reg;
+		alu.dst.write = 1;
+		alu.last = 1;
+
+		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+			return r;
+
+		ar_reg = dst_reg;
+	} else {
+		ar_reg = ctx->ar_reg;
+	}
+
+	memset(&vtx, 0, sizeof(vtx));
+	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
+	vtx.src_gpr = ar_reg;
+	vtx.mega_fetch_count = 16;
+	vtx.dst_gpr = dst_reg;
+	vtx.dst_sel_x = 0;		/* SEL_X */
+	vtx.dst_sel_y = 1;		/* SEL_Y */
+	vtx.dst_sel_z = 2;		/* SEL_Z */
+	vtx.dst_sel_w = 3;		/* SEL_W */
+	vtx.data_format = FMT_32_32_32_32_FLOAT;
+	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
+	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
+	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
+
+	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+		return r;
+
+	return 0;
+}
+
 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -554,7 +605,19 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
 	}
 	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
-		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+			continue;
+		}
+
+		if (ctx->src[i].rel) {
+			int treg = r600_get_temp(ctx);
+			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+				return r;
+
+			ctx->src[i].sel = treg;
+			ctx->src[i].rel = 0;
+			j--;
+		} else if (j > 0) {
 			int treg = r600_get_temp(ctx);
 			for (k = 0; k < 4; k++) {
 				memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -683,8 +746,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
 	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
 
 	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
-	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
 			ctx.info.file_count[TGSI_FILE_TEMPORARY];
+	ctx.temp_reg = ctx.ar_reg + 1;
 
 	ctx.nliterals = 0;
 	ctx.literals = NULL;
@@ -1760,7 +1824,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	memset(&tex, 0, sizeof(struct r600_bc_tex));
 	tex.inst = opcode;
 	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
-	tex.resource_id = tex.sampler_id;
+	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
 	tex.src_gpr = src_gpr;
 	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
 	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -2302,15 +2366,21 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
 
 	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
 	alu.last = 1;
-	alu.dst.chan = 0;
-	alu.dst.sel = ctx->temp_reg;
+	alu.dst.sel = ctx->ar_reg;
 	alu.dst.write = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
+
+	/* TODO: Note that the MOVA can be avoided if we never use AR for
+	 * indexing non-CB registers in the current ALU clause. Similarly, we
+	 * need to load AR from ar_reg again if we started a new clause
+	 * between ARL and AR usage. The easy way to do that is to remove
+	 * the MOVA here, and load it for the first AR access after ar_reg
+	 * has been modified in each clause. */
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
-	alu.src[0].sel = ctx->temp_reg;
+	alu.src[0].sel = ctx->ar_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
@@ -2325,22 +2395,47 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
 	struct r600_bc_alu alu;
 	int r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
-
 	switch (inst->Instruction.Opcode) {
 	case TGSI_OPCODE_ARL:
-		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+		memset(&alu, 0, sizeof(alu));
+		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		alu.dst.sel = ctx->ar_reg;
+		alu.dst.write = 1;
+		alu.last = 1;
+
+		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+			return r;
+
+		memset(&alu, 0, sizeof(alu));
+		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+		alu.src[0].sel = ctx->ar_reg;
+		alu.dst.sel = ctx->ar_reg;
+		alu.dst.write = 1;
+		alu.last = 1;
+
+		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+			return r;
 		break;
 	case TGSI_OPCODE_ARR:
-		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+		memset(&alu, 0, sizeof(alu));
+		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		alu.dst.sel = ctx->ar_reg;
+		alu.dst.write = 1;
+		alu.last = 1;
+
+		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+			return r;
 		break;
 	default:
 		assert(0);
 		return -1;
 	}
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-
+	memset(&alu, 0, sizeof(alu));
+	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+	alu.src[0].sel = ctx->ar_reg;
 	alu.last = 1;
 
 	r = r600_bc_add_alu(ctx->bc, &alu);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index e4382ba..74dad45 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -495,9 +495,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
 	for (i = 0; i < count; i++) {
 		if (&rctx->ps_samplers.views[i]->base != views[i]) {
 			if (resource[i])
-				r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+				r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+									i + R600_MAX_CONST_BUFFERS);
 			else
-				r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+				r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+									i + R600_MAX_CONST_BUFFERS);
 
 			pipe_sampler_view_reference(
 				(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -507,7 +509,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
 	}
 	for (i = count; i < NUM_TEX_UNITS; i++) {
 		if (rctx->ps_samplers.views[i]) {
-			r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+			r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+								i + R600_MAX_CONST_BUFFERS);
 			pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
 		}
 	}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index a2b2c17..bcaf2b9 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -317,6 +317,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_resource_buffer *rbuffer = r600_buffer(buffer);
+	struct r600_pipe_state *rstate;
 	uint32_t offset;
 
 	/* Note that the state tracker can unbind constant buffers by
@@ -327,6 +328,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 	}
 
 	r600_upload_const_buffer(rctx, &rbuffer, &offset);
+	offset += r600_bo_offset(rbuffer->r.bo);
 
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
@@ -337,8 +339,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 					0xFFFFFFFF, NULL);
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028980_ALU_CONST_CACHE_VS_0,
-					(r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
+
+		rstate = &rctx->vs_const_buffer_resource[index];
+		rstate->id = R600_PIPE_STATE_RESOURCE;
+		rstate->nregs = 0;
+		if (rctx->family >= CHIP_CEDAR) {
+			evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+		} else {
+			r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+			r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+		}
 		break;
 	case PIPE_SHADER_FRAGMENT:
 		rctx->ps_const_buffer.nregs = 0;
@@ -348,8 +361,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 					0xFFFFFFFF, NULL);
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028940_ALU_CONST_CACHE_PS_0,
-					(r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
+
+		rstate = &rctx->ps_const_buffer_resource[index];
+		rstate->id = R600_PIPE_STATE_RESOURCE;
+		rstate->nregs = 0;
+		if (rctx->family >= CHIP_CEDAR) {
+			evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+		} else {
+			r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+			r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+		}
 		break;
 	default:
 		R600_ERR("unsupported %d\n", shader);




More information about the mesa-commit mailing list