Mesa (main): aco: combine DPP into VALU after RA

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Aug 19 18:36:04 UTC 2021


Module: Mesa
Branch: main
Commit: 4ac47ad1cd7976d7effbbfae37fa69e26a288ad2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4ac47ad1cd7976d7effbbfae37fa69e26a288ad2

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Tue Jun 30 15:33:18 2020 +0100

aco: combine DPP into VALU after RA

Mostly helps a bunch of Cyberpunk 2077 shaders.

fossil-db (Siena Cichlid):
Totals from 26 (0.02% of 150170) affected shaders:
CodeSize: 83208 -> 81528 (-2.02%)
Instrs: 14728 -> 14308 (-2.85%)
Latency: 48041 -> 47793 (-0.52%)
InvThroughput: 10836 -> 10578 (-2.38%)

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>

---

 src/amd/compiler/aco_optimizer_postRA.cpp | 55 +++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp
index e612292e822..84ee6ef01c7 100644
--- a/src/amd/compiler/aco_optimizer_postRA.cpp
+++ b/src/amd/compiler/aco_optimizer_postRA.cpp
@@ -22,6 +22,7 @@
  *
  */
 
+#include "aco_builder.h"
 #include "aco_ir.h"
 
 #include <algorithm>
@@ -338,6 +339,58 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
    }
 }
 
+void
+try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
+{
+   if (!instr->isVALU() || instr->isDPP() || !can_use_DPP(instr, false))
+      return;
+
+   for (unsigned i = 0; i < MIN2(2, instr->operands.size()); i++) {
+      Idx op_instr_idx = last_writer_idx(ctx, instr->operands[i]);
+      if (!op_instr_idx.found())
+         continue;
+
+      Instruction* mov = ctx.get(op_instr_idx);
+      if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
+         continue;
+
+      /* If we aren't going to remove the v_mov_b32, we have to ensure that it doesn't overwrite
+       * it's own operand before we use it.
+       */
+      if (mov->definitions[0].physReg() == mov->operands[0].physReg() &&
+          (!mov->definitions[0].tempId() || ctx.uses[mov->definitions[0].tempId()] > 1))
+         continue;
+
+      Idx mov_src_idx = last_writer_idx(ctx, mov->operands[0]);
+      if (is_instr_after(mov_src_idx, op_instr_idx))
+         continue;
+
+      if (i && !can_swap_operands(instr, &instr->opcode))
+         continue;
+
+      /* anything else doesn't make sense in SSA */
+      assert(mov->dpp().row_mask == 0xf && mov->dpp().bank_mask == 0xf);
+
+      if (--ctx.uses[mov->definitions[0].tempId()])
+         ctx.uses[mov->operands[0].tempId()]++;
+
+      convert_to_DPP(instr);
+
+      DPP_instruction* dpp = &instr->dpp();
+      if (i) {
+         std::swap(dpp->operands[0], dpp->operands[1]);
+         std::swap(dpp->neg[0], dpp->neg[1]);
+         std::swap(dpp->abs[0], dpp->abs[1]);
+      }
+      dpp->operands[0] = mov->operands[0];
+      dpp->dpp_ctrl = mov->dpp().dpp_ctrl;
+      dpp->bound_ctrl = true;
+      dpp->neg[0] ^= mov->dpp().neg[0] && !dpp->abs[0];
+      dpp->abs[0] |= mov->dpp().abs[0];
+      return;
+   }
+}
+
 void
 process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
 {
@@ -345,6 +398,8 @@ process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
    try_optimize_scc_nocompare(ctx, instr);
 
+   try_combine_dpp(ctx, instr);
+
    if (instr)
       save_reg_writes(ctx, instr);
 



More information about the mesa-commit mailing list