Mesa (master): aco: optimize v_add_u32(v_mul_lo_u16) -> v_mad_u32_u16
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Nov 12 12:42:17 UTC 2020
Module: Mesa
Branch: master
Commit: db9d13b4ffd35cbc8ecf90e1b930fe6b1392275b
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=db9d13b4ffd35cbc8ecf90e1b930fe6b1392275b
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Mon Nov 2 15:34:25 2020 +0100
aco: optimize v_add_u32(v_mul_lo_u16) -> v_mad_u32_u16
fossils-db (Vega10):
Totals from 779 (0.56% of 139517) affected shaders:
CodeSize: 1187928 -> 1187508 (-0.04%); split: -0.04%, +0.00%
Instrs: 247353 -> 244608 (-1.11%); split: -1.11%, +0.00%
Cycles: 1127472 -> 1116420 (-0.98%); split: -0.98%, +0.00%
VMEM: 139720 -> 138297 (-1.02%); split: +0.00%, -1.02%
SMEM: 51069 -> 50735 (-0.65%); split: +0.04%, -0.69%
Copies: 11548 -> 11547 (-0.01%); split: -0.03%, +0.03%
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7425>
---
src/amd/compiler/aco_optimizer.cpp | 9 ++++++++-
src/amd/compiler/tests/test_optimizer.cpp | 24 ++++++++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index f427b02c926..04fed5cf5ea 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1310,6 +1310,12 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
}
break;
}
+ case aco_opcode::v_mul_lo_u16:
+ if (instr->definitions[0].isNUW()) {
+ /* Most of 16-bit mul optimizations are only valid if no overflow. */
+ ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
+ }
+ break;
case aco_opcode::v_and_b32: { /* abs */
if (!instr->usesModifiers() && instr->operands[1].isTemp() &&
instr->operands[1].getTemp().type() == RegType::vgpr &&
@@ -2849,7 +2855,8 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_u32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add3_u32, "012", 1 | 2)) ;
else if (combine_three_valu_op(ctx, instr, aco_opcode::s_lshl_b32, aco_opcode::v_lshl_add_u32, "120", 1 | 2)) ;
- else combine_three_valu_op(ctx, instr, aco_opcode::v_lshlrev_b32, aco_opcode::v_lshl_add_u32, "210", 1 | 2);
+ else if (combine_three_valu_op(ctx, instr, aco_opcode::v_lshlrev_b32, aco_opcode::v_lshl_add_u32, "210", 1 | 2)) ;
+ else combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16, aco_opcode::v_mad_u32_u16, "120", 1 | 2) ;
}
} else if (instr->opcode == aco_opcode::v_add_co_u32 ||
instr->opcode == aco_opcode::v_add_co_u32_e64) {
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index 4ac8dc4dd11..3bf73316908 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -195,6 +195,30 @@ BEGIN_TEST(optimize.mad_u32_u16)
//! p_unit_test 5, %res5
writeout(5, create_mad_u32_u16(Operand(42u), Operand(inputs[0]), Operand(0u), false));
+ //~gfx9! v1: %mul6 = v_mul_lo_u16 %a, %b
+ //~gfx9! v1: %res6 = v_add_u32 %mul6, %b
+ //~gfx10! v1: %mul6 = v_mul_lo_u16_e64 %a, %b
+ //~gfx10! v1: %res6 = v_add_u32 %mul6, %b
+ //! p_unit_test 6, %res6
+ Temp mul;
+ if (i >= GFX10) {
+ mul = bld.vop3(aco_opcode::v_mul_lo_u16_e64, bld.def(v1), inputs[0], inputs[1]);
+ } else {
+ mul = bld.vop2(aco_opcode::v_mul_lo_u16, bld.def(v1), inputs[0], inputs[1]);
+ }
+ writeout(6, bld.vadd32(bld.def(v1), mul, inputs[1]));
+
+ //~gfx9! v1: %res7 = v_mad_u32_u16 %a, %b, %b
+ //~gfx10! v1: (nuw)%mul7 = v_mul_lo_u16_e64 %a, %b
+ //~gfx10! v1: %res7 = v_add_u32 %mul7, %b
+ //! p_unit_test 7, %res7
+ if (i >= GFX10) {
+ mul = bld.nuw().vop3(aco_opcode::v_mul_lo_u16_e64, bld.def(v1), inputs[0], inputs[1]);
+ } else {
+ mul = bld.nuw().vop2(aco_opcode::v_mul_lo_u16, bld.def(v1), inputs[0], inputs[1]);
+ }
+ writeout(7, bld.vadd32(bld.def(v1), mul, inputs[1]));
+
finish_opt_test();
}
END_TEST
More information about the mesa-commit
mailing list