Mesa (master): r600g: optimise op2 and swapped op2 emission.

Fri Aug 27 05:34:36 UTC 2010

Module: Mesa
Branch: master
Commit: d8fb13bae30b6ff214c5d5158b9bcaf430f56b43
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8fb13bae30b6ff214c5d5158b9bcaf430f56b43

Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Aug 25 16:02:38 2010 +1000

r600g: optimise op2 and swapped op2 emission.

this makes op2 emission smaller, since it skips instructions
that don't write to the dst. not sure if this could have unwanted
side effects but try it and see.

---

 src/gallium/drivers/r600/r600_shader.c |   85 ++++++++++++++------------------
 1 files changed, 37 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5b43a13..1470bb5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -642,30 +642,44 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
 	return 0;
 }
 
-static int tgsi_op2(struct r600_shader_ctx *ctx)
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
 	int i, j, r;
+	int lasti = 0;
+
+	for (i = 0; i < 4; i++) {
+		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+			lasti = i;
+		}
+	}
 
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
 		return r;
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < lasti + 1; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
-		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
-			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
-			alu.dst.chan = i;
-		} else {
-			alu.inst = ctx->inst_info->r600_opcode;
+		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		if (r)
+			return r;
+		
+		alu.inst = ctx->inst_info->r600_opcode;
+		if (!swap) {
 			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
 				alu.src[j] = r600_src[j];
 				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
 			}
-			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-			if (r)
-				return r;
+		} else {
+			alu.src[0] = r600_src[1];
+			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+
+			alu.src[1] = r600_src[0];
+			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
 		}
 		/* handle some special cases */
 		switch (ctx->inst_info->tgsi_opcode) {
@@ -678,7 +692,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
 		default:
 			break;
 		}
-		if (i == 3) {
+		if (i == lasti) {
 			alu.last = 1;
 		}
 		r = r600_bc_add_alu(ctx->bc, &alu);
@@ -688,6 +702,16 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
+static int tgsi_op2(struct r600_shader_ctx *ctx)
+{
+	return tgsi_op2_s(ctx, 0);
+}
+
+static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
+{
+	return tgsi_op2_s(ctx, 1);
+}
+
 /* 
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
@@ -833,41 +857,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
-static int tgsi_slt(struct r600_shader_ctx *ctx)
-{
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu_src r600_src[3];
-	struct r600_bc_alu alu;
-	int i, r;
-
-	r = tgsi_split_constant(ctx, r600_src);
-	if (r)
-		return r;
-	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
-		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
-			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
-			alu.dst.chan = i;
-		} else {
-			alu.inst = ctx->inst_info->r600_opcode;
-			alu.src[1] = r600_src[0];
-			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
-			alu.src[0] = r600_src[1];
-			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
-			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-			if (r)
-				return r;
-		}
-		if (i == 3) {
-			alu.last = 1;
-		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-	return 0;
-}
-
 static int tgsi_lit(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -1747,7 +1736,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
 	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
-	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
+	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
 	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
 	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
 	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
@@ -1785,7 +1774,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
 	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
-	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
+	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
 	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
 	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},