Mesa (master): r600g: fix abs() support on ALU 3 source operands instructions

Marek Olšák mareko at kemper.freedesktop.org
Fri Feb 6 19:05:40 UTC 2015


Module: Mesa
Branch: master
Commit: acef65503e79ce61a16bdba92462f0ed8a7b52c2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=acef65503e79ce61a16bdba92462f0ed8a7b52c2

Author: Xavier Bouchoux <xavierb at gmail.com>
Date:   Wed Jan 28 02:20:51 2015 +0100

r600g: fix abs() support on ALU 3 source operands instructions

Since alu does not support abs() modifier on source operands, spill
and apply the modifiers to a temp register when needed.

Signed-off-by: Xavier Bouchoux <xavierb at gmail.com>
Reviewed-by: Glenn Kennard <glenn.kennard at gmail.com>

---

 src/gallium/drivers/r600/r600_asm.c    |    6 +++
 src/gallium/drivers/r600/r600_shader.c |   63 ++++++++++++++++++++++++++++----
 src/gallium/drivers/r600/r700_asm.c    |    1 +
 3 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index ce3c2d1..79e7f74 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1133,6 +1133,11 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 		return -ENOMEM;
 	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
 
+	if (alu->is_op3) {
+		/* will fail later since alu does not support it. */
+		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
+	}
+
 	if (bc->cf_last != NULL && bc->cf_last->op != type) {
 		/* check if we could add it anyway */
 		if (bc->cf_last->op == CF_OP_ALU &&
@@ -1491,6 +1496,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecod
 				S_SQ_ALU_WORD0_LAST(alu->last);
 
 	if (alu->is_op3) {
+		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
 		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
 					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
 					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 16e820e..77c9909 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4862,6 +4862,39 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
 	return 0;
 }
 
+static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx,
+                                  unsigned temp, int temp_chan,
+                                  struct r600_bytecode_alu_src *bc_src,
+                                  const struct r600_shader_src *shader_src,
+                                  unsigned chan)
+{
+	struct r600_bytecode_alu alu;
+	int r;
+
+	r600_bytecode_src(bc_src, shader_src, chan);
+
+	/* op3 operands don't support abs modifier */
+	if (bc_src->abs) {
+		assert(temp!=0);      /* we actually need the extra register, make sure it is allocated. */
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+		alu.dst.sel = temp;
+		alu.dst.chan = temp_chan;
+		alu.dst.write = 1;
+
+		alu.src[0] = *bc_src;
+		alu.last = true; // sufficient?
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+
+		memset(bc_src, 0, sizeof(*bc_src));
+		bc_src->sel = temp;
+		bc_src->chan = temp_chan;
+	}
+	return 0;
+}
+
 static int tgsi_op3(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -4876,7 +4909,9 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ctx->inst_info->op;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
+			r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, j, &alu.src[j], &ctx->src[j], i);
+			if (r)
+				return r;
 		}
 
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -5967,7 +6002,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bytecode_alu alu;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
-	unsigned i;
+	unsigned i, extra_temp;
 	int r;
 
 	/* optimize if it's just an equal balance */
@@ -6037,6 +6072,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 	}
 
 	/* src0 * src1 + (1 - src0) * src2 */
+	if (ctx->src[0].abs || ctx->src[1].abs) /* XXX avoid dupliating condition */
+		extra_temp = r600_get_temp(ctx);
+	else
+		extra_temp = 0;
 	for (i = 0; i < lasti + 1; i++) {
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
@@ -6044,8 +6083,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ALU_OP3_MULADD;
 		alu.is_op3 = 1;
-		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
-		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+		r = tgsi_make_src_for_op3(ctx, extra_temp, 0, &alu.src[0], &ctx->src[0], i);
+		if (r)
+			return r;
+		r = tgsi_make_src_for_op3(ctx, extra_temp, 1, &alu.src[1], &ctx->src[1], i);
+		if (r)
+			return r;
 		alu.src[2].sel = ctx->temp_reg;
 		alu.src[2].chan = i;
 
@@ -6074,9 +6117,15 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ALU_OP3_CNDGE;
-		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
-		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
-		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
+		r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 0, &alu.src[0], &ctx->src[0], i);
+		if (r)
+			return r;
+		r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 1, &alu.src[1], &ctx->src[2], i);
+		if (r)
+			return r;
+		r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 2, &alu.src[2], &ctx->src[1], i);
+		if (r)
+			return r;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		alu.dst.chan = i;
 		alu.dst.write = 1;
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 4a9fa36..04f8c62 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -48,6 +48,7 @@ int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *
 
 	/* don't replace gpr by pv or ps for destination register */
 	if (alu->is_op3) {
+		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
 		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
 					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
 			                S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |




More information about the mesa-commit mailing list