Mesa (staging/22.0): aco/optimizer: fix call to can_use_opsel() in apply_insert()

Thu Apr 21 07:13:49 UTC 2022

Module: Mesa
Branch: staging/22.0
Commit: d979b084c81df977ccd4de981f8132ff27e29191
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d979b084c81df977ccd4de981f8132ff27e29191

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Wed Mar 23 15:12:09 2022 +0100

aco/optimizer: fix call to can_use_opsel() in apply_insert()

The definition index is -1.

Fixes: 54292e99c7844500314bfd623469c65adef954c5 ('aco: optimize 32-bit extracts and inserts using SDWA ')
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15551>
(cherry picked from commit b98a9dcc36b4fb3c7184eaf4a00d9f35314dc5ef)

---

 .pick_status.json                    |  2 +-
 src/amd/compiler/aco_optimizer.cpp   |  2 +-
 src/amd/compiler/tests/test_sdwa.cpp | 24 +++++++++++++-----------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 98a839f0eee..2e52f4c9bdc 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -7916,7 +7916,7 @@
         "description": "aco/optimizer: fix call to can_use_opsel() in apply_insert()",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 5,
+        "resolution": 1,
         "because_sha": "54292e99c7844500314bfd623469c65adef954c5"
     },
     {
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 85701c0b70b..09e57f986e3 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -3060,7 +3060,7 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    assert(sel);
 
    if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
-       can_use_opsel(ctx.program->chip_class, instr->opcode, 3, sel.offset())) {
+       can_use_opsel(ctx.program->chip_class, instr->opcode, -1, sel.offset())) {
       if (instr->vop3().opsel & (1 << 3))
          return false;
       if (sel.offset())
diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp
index 73a9a43b8bd..8a8d8b64fe7 100644
--- a/src/amd/compiler/tests/test_sdwa.cpp
+++ b/src/amd/compiler/tests/test_sdwa.cpp
@@ -255,13 +255,13 @@ BEGIN_TEST(optimize.sdwa.extract)
                                     Operand::c32(is_signed));
       writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
 
-      //! v1: %res13 = v_add_i16 %a, %b
-      //! p_unit_test 13, %res13
+      /* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
+      //~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
+      //~gfx(9|10).*! p_unit_test 13, %res13
       Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
                                     Operand::c32(is_signed));
       writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
 
-      /* VOP3-only instructions can't use SDWA but they can use opsel instead */
       //~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
       //~gfx(9|10).*! p_unit_test 14, %res14
       Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
@@ -496,21 +496,23 @@ BEGIN_TEST(optimize.sdwa.insert)
       bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
       writeout(10, val);
 
-      //! v1: %res11 = v_sub_i16 %a, %b
-      //! p_unit_test 11, %res11
+      //~gfx8! v1: %tmp11 = v_sub_i16 %a, %b
+      //~gfx8! v1: %res11 = p_insert %tmp11, 0, 16
+      //~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b
+      //~gfx(8|9|10)! p_unit_test 11, %res11
       val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
       writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
 
-      //~gfx[78]! v1: %tmp12 = v_sub_i16 %a, %b
-      //~gfx[78]! v1: %res12 = p_insert %tmp11, 1, 16
+      //~gfx8! v1: %tmp12 = v_sub_i16 %a, %b
+      //~gfx8! v1: %res12 = p_insert %tmp12, 1, 16
       //~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi
-      //! p_unit_test 12, %res12
+      //~gfx(8|9|10)! p_unit_test 12, %res12
       val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
       writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
 
-      //! v1: %tmp13 = v_sub_i16 %a, %b
-      //! v1: %res13 = p_insert %tmp13, 0, 8
-      //! p_unit_test 13, %res13
+      //~gfx[^7]! v1: %tmp13 = v_sub_i16 %a, %b
+      //~gfx[^7]! v1: %res13 = p_insert %tmp13, 0, 8
+      //~gfx[^7]! p_unit_test 13, %res13
       val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
       writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));