Mesa (main): aco: remove SMEM constant/addition combining out of the loop

Fri Dec 17 22:46:51 UTC 2021

Module: Mesa
Branch: main
Commit: fa4e08112ecb54ec082c2b259842c0758ddc9f2a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa4e08112ecb54ec082c2b259842c0758ddc9f2a

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Tue Dec 14 19:51:50 2021 +0000

aco: remove SMEM constant/addition combining out of the loop

There's no reason for this optimization to be in this loop.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13755>

---

 src/amd/compiler/aco_optimizer.cpp | 110 +++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 53 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index d2efd5df968..e79c5706393 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -783,6 +783,61 @@ skip_smem_offset_align(opt_ctx& ctx, SMEM_instruction* smem)
       op.setTemp(bitwise_instr->operands[0].getTemp());
 }
 
+void
+smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
+{
+   /* skip &-4 before offset additions: load((a + 16) & -4, 0) */
+   if (!instr->operands.empty())
+      skip_smem_offset_align(ctx, &instr->smem());
+
+   /* propagate constants and combine additions */
+   if (!instr->operands.empty() && instr->operands[1].isTemp()) {
+      SMEM_instruction& smem = instr->smem();
+      ssa_info info = ctx.info[instr->operands[1].tempId()];
+
+      Temp base;
+      uint32_t offset;
+      bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow;
+      if (info.is_constant_or_literal(32) &&
+          ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
+           (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
+           (ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) {
+         instr->operands[1] = Operand::c32(info.val);
+      } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, prevent_overflow) &&
+                 base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9 &&
+                 offset % 4u == 0) {
+         bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
+         if (soe) {
+            if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) &&
+                ctx.info[smem.operands.back().tempId()].val == 0) {
+               smem.operands[1] = Operand::c32(offset);
+               smem.operands.back() = Operand(base);
+            }
+         } else {
+            SMEM_instruction* new_instr = create_instruction<SMEM_instruction>(
+               smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
+            new_instr->operands[0] = smem.operands[0];
+            new_instr->operands[1] = Operand::c32(offset);
+            if (smem.definitions.empty())
+               new_instr->operands[2] = smem.operands[2];
+            new_instr->operands.back() = Operand(base);
+            if (!smem.definitions.empty())
+               new_instr->definitions[0] = smem.definitions[0];
+            new_instr->sync = smem.sync;
+            new_instr->glc = smem.glc;
+            new_instr->dlc = smem.dlc;
+            new_instr->nv = smem.nv;
+            new_instr->disable_wqm = smem.disable_wqm;
+            instr.reset(new_instr);
+         }
+      }
+   }
+
+   /* skip &-4 after offset additions: load(a & -4, 16) */
+   if (!instr->operands.empty())
+      skip_smem_offset_align(ctx, &instr->smem());
+}
+
 unsigned
 get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
 {
@@ -1002,9 +1057,8 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                instr.get());
    }
 
-   /* skip &-4 before offset additions: load((a + 16) & -4, 0) */
-   if (instr->isSMEM() && !instr->operands.empty())
-      skip_smem_offset_align(ctx, &instr->smem());
+   if (instr->isSMEM())
+      smem_combine(ctx, instr);
 
    for (unsigned i = 0; i < instr->operands.size(); i++) {
       if (!instr->operands[i].isTemp())
@@ -1204,52 +1258,6 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
          }
       }
 
-      /* SMEM: propagate constants and combine additions */
-      else if (instr->isSMEM()) {
-
-         SMEM_instruction& smem = instr->smem();
-         Temp base;
-         uint32_t offset;
-         bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow;
-         if (i == 1 && info.is_constant_or_literal(32) &&
-             ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
-              (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
-              (ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) {
-            instr->operands[i] = Operand::c32(info.val);
-            continue;
-         } else if (i == 1 &&
-                    parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) &&
-                    base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9 &&
-                    offset % 4u == 0) {
-            bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
-            if (soe && (!ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) ||
-                        ctx.info[smem.operands.back().tempId()].val != 0)) {
-               continue;
-            }
-            if (soe) {
-               smem.operands[1] = Operand::c32(offset);
-               smem.operands.back() = Operand(base);
-            } else {
-               SMEM_instruction* new_instr = create_instruction<SMEM_instruction>(
-                  smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
-               new_instr->operands[0] = smem.operands[0];
-               new_instr->operands[1] = Operand::c32(offset);
-               if (smem.definitions.empty())
-                  new_instr->operands[2] = smem.operands[2];
-               new_instr->operands.back() = Operand(base);
-               if (!smem.definitions.empty())
-                  new_instr->definitions[0] = smem.definitions[0];
-               new_instr->sync = smem.sync;
-               new_instr->glc = smem.glc;
-               new_instr->dlc = smem.dlc;
-               new_instr->nv = smem.nv;
-               new_instr->disable_wqm = smem.disable_wqm;
-               instr.reset(new_instr);
-            }
-            continue;
-         }
-      }
-
       else if (instr->isBranch()) {
          if (ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
             /* Flip the branch instruction to get rid of the scc_invert instruction */
@@ -1260,10 +1268,6 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       }
    }
 
-   /* skip &-4 after offset additions: load(a & -4, 16) */
-   if (instr->isSMEM() && !instr->operands.empty())
-      skip_smem_offset_align(ctx, &instr->smem());
-
    /* if this instruction doesn't define anything, return */
    if (instr->definitions.empty()) {
       check_sdwa_extract(ctx, instr);