Mesa (main): aco: combine DPP into VALU before RA

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Aug 19 18:36:04 UTC 2021


Module: Mesa
Branch: main
Commit: 2e6834d4f6c9985bcaedd5ebc35ac5afc93c8f6f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e6834d4f6c9985bcaedd5ebc35ac5afc93c8f6f

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Tue Jun 30 15:33:18 2020 +0100

aco: combine DPP into VALU before RA

Mostly helps a bunch of Cyberpunk 2077 shaders. Catches some of the cases
that the post-RA can't optimize because of register assignment.

fossil-db (Siena Cichlid):
Totals from 25 (0.02% of 150170) affected shaders:
CodeSize: 78808 -> 75764 (-3.86%)
Instrs: 14311 -> 13547 (-5.34%)
Latency: 278697 -> 277885 (-0.29%)
InvThroughput: 63428 -> 62754 (-1.06%)
Copies: 1348 -> 1349 (+0.07%); split: -0.07%, +0.15%
PreVGPRs: 1035 -> 1011 (-2.32%)

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>

---

 src/amd/compiler/aco_optimizer.cpp | 51 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 5be7e3fd19c..7d5d8426321 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -22,6 +22,7 @@
  *
  */
 
+#include "aco_builder.h"
 #include "aco_ir.h"
 
 #include "util/half_float.h"
@@ -119,11 +120,12 @@ enum Label {
    label_canonicalized = 1ull << 32,
    label_extract = 1ull << 33,
    label_insert = 1ull << 34,
+   label_dpp = 1ull << 35,
 };
 
 static constexpr uint64_t instr_usedef_labels =
    label_vec | label_mul | label_mad | label_add_sub | label_vop3p | label_bitwise |
-   label_uniform_bitwise | label_minmax | label_vopc | label_usedef | label_extract;
+   label_uniform_bitwise | label_minmax | label_vopc | label_usedef | label_extract | label_dpp;
 static constexpr uint64_t instr_mod_labels =
    label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert;
 
@@ -452,6 +454,14 @@ struct ssa_info {
    }
 
    bool is_insert() { return label & label_insert; }
+
+   void set_dpp(Instruction* mov)
+   {
+      add_label(label_dpp);
+      instr = mov;
+   }
+
+   bool is_dpp() { return label & label_dpp; }
 };
 
 struct opt_ctx {
@@ -1046,6 +1056,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                instr->vop3().abs[i] = true;
             continue;
          }
+
          unsigned bits = get_operand_size(instr, i);
          if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i) &&
              (!instr->isSDWA() || ctx.program->chip_class >= GFX9)) {
@@ -1404,6 +1415,13 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
          assert(instr->operands[0].isFixed());
       }
       break;
+   case aco_opcode::v_mov_b32:
+      if (instr->isDPP()) {
+         /* anything else doesn't make sense in SSA */
+         assert(instr->dpp().row_mask == 0xf && instr->dpp().bank_mask == 0xf);
+         ctx.info[instr->definitions[0].tempId()].set_dpp(instr.get());
+      }
+      break;
    case aco_opcode::p_is_helper:
       if (!ctx.program->needs_wqm)
          ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u);
@@ -3705,6 +3723,37 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    if (instr->opcode == aco_opcode::v_mad_u32_u16)
       select_mul_u32_u24(ctx, instr);
 
+   /* Combine DPP copies into VALU. This should be done after creating MAD/FMA. */
+   if (instr->isVALU()) {
+      for (unsigned i = 0; i < instr->operands.size(); i++) {
+         if (!instr->operands[i].isTemp())
+            continue;
+         ssa_info info = ctx.info[instr->operands[i].tempId()];
+
+         aco_opcode swapped_op;
+         if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
+             (i == 0 || can_swap_operands(instr, &swapped_op)) && can_use_DPP(instr, true) &&
+             !instr->isDPP()) {
+            convert_to_DPP(instr);
+            DPP_instruction* dpp = static_cast<DPP_instruction*>(instr.get());
+            if (i) {
+               instr->opcode = swapped_op;
+               std::swap(instr->operands[0], instr->operands[1]);
+               std::swap(dpp->neg[0], dpp->neg[1]);
+               std::swap(dpp->abs[0], dpp->abs[1]);
+            }
+            if (--ctx.uses[info.instr->definitions[0].tempId()])
+               ctx.uses[info.instr->operands[0].tempId()]++;
+            instr->operands[0].setTemp(info.instr->operands[0].getTemp());
+            dpp->dpp_ctrl = info.instr->dpp().dpp_ctrl;
+            dpp->bound_ctrl = info.instr->dpp().bound_ctrl;
+            dpp->neg[0] ^= info.instr->dpp().neg[0] && !dpp->abs[0];
+            dpp->abs[0] |= info.instr->dpp().abs[0];
+            break;
+         }
+      }
+   }
+
    if (instr->isSDWA() || instr->isDPP() || (instr->isVOP3() && ctx.program->chip_class < GFX10) ||
        (instr->isVOP3P() && ctx.program->chip_class < GFX10))
       return; /* some encodings can't ever take literals */



More information about the mesa-commit mailing list