[Mesa-dev] [PATCH 1/2] r600g: use ieee variants of multiplication instructions
Ilia Mirkin
imirkin at alum.mit.edu
Tue Jan 24 02:18:36 UTC 2017
This matches the behavior of most other drivers, including nouveau.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
Untested. This also leaves RCP and RSQ with the clamped variants. I suspect
this is wrong, but seems unrelated to multiplication per se.
src/gallium/drivers/r600/r600_shader.c | 36 +++++++++++++++---------------
src/gallium/drivers/r600/sb/sb_ra_init.cpp | 1 +
2 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index eaabb04..0114f8f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9066,16 +9066,16 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
[TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
@@ -9093,7 +9093,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9129,7 +9129,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
@@ -9264,16 +9264,16 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
@@ -9291,7 +9291,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9327,7 +9327,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
@@ -9487,16 +9487,16 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
@@ -9514,7 +9514,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9550,7 +9550,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
index 95b9290..68ee982 100644
--- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp
+++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
@@ -689,6 +689,7 @@ void ra_split::split_packed_ins(alu_packed_node *n) {
void ra_split::split_alu_packed(alu_packed_node* n) {
switch (n->op()) {
case ALU_OP2_DOT4:
+ case ALU_OP2_DOT4_IEEE:
case ALU_OP2_CUBE:
split_packed_ins(n);
break;
--
2.10.2
More information about the mesa-dev
mailing list