[Mesa-dev] [PATCH 1/2] r600: Correct IDIV if DST and SRC use the same temporary

Gert Wollny gw.fossdev at gmail.com
Sat May 26 16:48:31 UTC 2018


In cases like

  IDIV TEMP[0].xy TEMP[0].xx TEMP[1].yy

the result will be written to the same register that is also a source register. 
Since the components are evaluated one by one, this may result in overwriting 
the source value for a later operation. Work around this by adding another 
temporary to store the result if the destination temporary index is equal to 
one of the source temporary indices. 

Fixes:
  dEQP-GLES2.functional.shaders.operator.binary_operator.div.*
Signed-off-by: Gert Wollny <gw.fossdev at gmail.com>
---
 src/gallium/drivers/r600/r600_shader.c | 51 ++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index bd511c76ac..e29cca6b98 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5727,10 +5727,18 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
 	struct r600_bytecode_alu alu;
 	int i, r, j;
 	unsigned write_mask = inst->Dst[0].Register.WriteMask;
+	int lasti = tgsi_last_instruction(write_mask);
 	int tmp0 = ctx->temp_reg;
 	int tmp1 = r600_get_temp(ctx);
 	int tmp2 = r600_get_temp(ctx);
 	int tmp3 = r600_get_temp(ctx);
+	int tmp4 = 0;
+
+	/* Use additional temp if dst register and src register are the same */
+	if (inst->Src[0].Register.Index == inst->Dst[0].Register.Index ||
+		 inst->Src[1].Register.Index == inst->Dst[0].Register.Index) {
+		tmp4 = r600_get_temp(ctx);
+	}
 	/* Unsigned path:
 	 *
 	 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder
@@ -6345,7 +6353,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
 			alu.dst.chan = 2;
 			alu.dst.write = 1;
 		} else {
-			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			if (tmp4 > 0) {
+				alu.dst.sel = tmp4;
+				alu.dst.chan = i;
+				alu.dst.write = 1;
+			} else {
+				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			}
 		}
 
 		alu.src[0].sel = tmp1;
@@ -6387,7 +6401,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
 				alu.op = ALU_OP3_CNDGE_INT;
 				alu.is_op3 = 1;
 
-				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+				if (tmp4 > 0) {
+					alu.dst.sel = tmp4;
+					alu.dst.chan = i;
+					alu.dst.write = 1;
+				} else {
+					tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+				}
 
 				r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 				alu.src[1].sel = tmp0;
@@ -6423,7 +6443,13 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
 				alu.op = ALU_OP3_CNDGE_INT;
 				alu.is_op3 = 1;
 
-				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+				if (tmp4 > 0) {
+					alu.dst.sel = tmp4;
+					alu.dst.chan = i;
+					alu.dst.write = 1;
+				} else {
+					tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+				}
 
 				alu.src[0].sel = tmp2;
 				alu.src[0].chan = 2;
@@ -6438,6 +6464,25 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op)
 			}
 		}
 	}
+
+	if (tmp4 > 0) {
+		for (i = 0; i <= lasti; ++i) {
+			if (!(write_mask & (1<<i)))
+				continue;
+
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+			alu.op = ALU_OP1_MOV;
+			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			alu.src[0].sel = tmp4;
+			alu.src[0].chan = i;
+
+			if (i == lasti)
+				alu.last = 1;
+			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+				return r;
+		}
+	}
+
 	return 0;
 }
 
-- 
2.16.1



More information about the mesa-dev mailing list