Mesa (staging/22.0): aco/optimizer: fix call to can_use_opsel() in apply_insert()
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Apr 21 07:13:49 UTC 2022
Module: Mesa
Branch: staging/22.0
Commit: d979b084c81df977ccd4de981f8132ff27e29191
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d979b084c81df977ccd4de981f8132ff27e29191
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Wed Mar 23 15:12:09 2022 +0100
aco/optimizer: fix call to can_use_opsel() in apply_insert()
The definition index is -1.
Fixes: 54292e99c7844500314bfd623469c65adef954c5 ('aco: optimize 32-bit extracts and inserts using SDWA ')
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15551>
(cherry picked from commit b98a9dcc36b4fb3c7184eaf4a00d9f35314dc5ef)
---
.pick_status.json | 2 +-
src/amd/compiler/aco_optimizer.cpp | 2 +-
src/amd/compiler/tests/test_sdwa.cpp | 24 +++++++++++++-----------
3 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/.pick_status.json b/.pick_status.json
index 98a839f0eee..2e52f4c9bdc 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -7916,7 +7916,7 @@
"description": "aco/optimizer: fix call to can_use_opsel() in apply_insert()",
"nominated": true,
"nomination_type": 1,
- "resolution": 5,
+ "resolution": 1,
"because_sha": "54292e99c7844500314bfd623469c65adef954c5"
},
{
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 85701c0b70b..09e57f986e3 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -3060,7 +3060,7 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
assert(sel);
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
- can_use_opsel(ctx.program->chip_class, instr->opcode, 3, sel.offset())) {
+ can_use_opsel(ctx.program->chip_class, instr->opcode, -1, sel.offset())) {
if (instr->vop3().opsel & (1 << 3))
return false;
if (sel.offset())
diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp
index 73a9a43b8bd..8a8d8b64fe7 100644
--- a/src/amd/compiler/tests/test_sdwa.cpp
+++ b/src/amd/compiler/tests/test_sdwa.cpp
@@ -255,13 +255,13 @@ BEGIN_TEST(optimize.sdwa.extract)
Operand::c32(is_signed));
writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
- //! v1: %res13 = v_add_i16 %a, %b
- //! p_unit_test 13, %res13
+ /* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
+ //~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
+ //~gfx(9|10).*! p_unit_test 13, %res13
Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
Operand::c32(is_signed));
writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
- /* VOP3-only instructions can't use SDWA but they can use opsel instead */
//~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
//~gfx(9|10).*! p_unit_test 14, %res14
Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
@@ -496,21 +496,23 @@ BEGIN_TEST(optimize.sdwa.insert)
bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
writeout(10, val);
- //! v1: %res11 = v_sub_i16 %a, %b
- //! p_unit_test 11, %res11
+ //~gfx8! v1: %tmp11 = v_sub_i16 %a, %b
+ //~gfx8! v1: %res11 = p_insert %tmp11, 0, 16
+ //~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b
+ //~gfx(8|9|10)! p_unit_test 11, %res11
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
- //~gfx[78]! v1: %tmp12 = v_sub_i16 %a, %b
- //~gfx[78]! v1: %res12 = p_insert %tmp11, 1, 16
+ //~gfx8! v1: %tmp12 = v_sub_i16 %a, %b
+ //~gfx8! v1: %res12 = p_insert %tmp12, 1, 16
//~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi
- //! p_unit_test 12, %res12
+ //~gfx(8|9|10)! p_unit_test 12, %res12
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
- //! v1: %tmp13 = v_sub_i16 %a, %b
- //! v1: %res13 = p_insert %tmp13, 0, 8
- //! p_unit_test 13, %res13
+ //~gfx[^7]! v1: %tmp13 = v_sub_i16 %a, %b
+ //~gfx[^7]! v1: %res13 = p_insert %tmp13, 0, 8
+ //~gfx[^7]! p_unit_test 13, %res13
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
More information about the mesa-commit
mailing list