[Mesa-dev] [PATCH v2 1/5] r600: double multiply can handle only one multiply at a time
Nicolai Hähnle
nhaehnle at gmail.com
Thu Jan 19 13:59:36 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
It seems clear that trying to multiply two pairs of doubles would result
in the temporary register getting overwritten by the second pair. So
make the code more explicit.
---
src/gallium/drivers/r600/r600_shader.c | 36 ++++++++++++++++++----------------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index ebe2744..7d1452a 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4319,39 +4319,41 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int i, j, k, r;
struct r600_bytecode_alu alu;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int t1 = ctx->temp_reg;
- for (k = 0; k < 2; k++) {
- if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
- continue;
+ /* t1 would get overwritten below if we actually tried to
+ * multiply two pairs of doubles at a time. */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
- }
- alu.dst.sel = t1;
- alu.dst.chan = i;
- alu.dst.write = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
}
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
}
for (i = 0; i <= lasti; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.src[0].sel = t1;
alu.src[0].chan = i;
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
--
2.7.4
More information about the mesa-dev
mailing list