Mesa (main): aco: optimize discard_if when WQM is not needed afterwards

Tue Feb 8 16:39:36 UTC 2022

Module: Mesa
Branch: main
Commit: 5e9df85b1a4504c5b4162e77e139056dc80accc6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5e9df85b1a4504c5b4162e77e139056dc80accc6

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Fri Feb  4 17:13:19 2022 +0100

aco: optimize discard_if when WQM is not needed afterwards

Totals from 11560 (8.57% of 134913) affected shaders: (GFX10.3)
CodeSize: 12092560 -> 11997652 (-0.78%)
Instrs: 2205325 -> 2181598 (-1.08%)
Latency: 15376048 -> 15356958 (-0.12%); split: -0.12%, +0.00%
InvThroughput: 3526105 -> 3525120 (-0.03%); split: -0.03%, +0.00%
Copies: 98543 -> 87601 (-11.10%)
Branches: 16919 -> 16873 (-0.27%)
PreSGPRs: 291584 -> 291532 (-0.02%)

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14805>

---

 src/amd/compiler/aco_insert_exec_mask.cpp | 38 +++++++++++++++++++------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index c9860398a58..1b0c0b61fd9 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -708,11 +708,29 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
 
       WQMState needs = ctx.handle_wqm ? ctx.info[block->index].instr_needs[idx] : Unspecified;
 
+      if (needs == WQM && state != WQM) {
+         transition_to_WQM(ctx, bld, block->index);
+         state = WQM;
+      } else if (needs == Exact && state != Exact) {
+         transition_to_Exact(ctx, bld, block->index);
+         state = Exact;
+      }
+
       if (instr->opcode == aco_opcode::p_discard_if) {
-         if (ctx.info[block->index].block_needs & Preserve_WQM) {
-            assert(block->kind & block_kind_top_level);
-            transition_to_WQM(ctx, bld, block->index);
-            ctx.info[block->index].exec.back().second &= ~mask_type_global;
+         Operand current_exec = Operand(exec, bld.lm);
+
+         if (block->kind & block_kind_top_level) {
+            if (needs == Preserve_WQM) {
+               /* Preserve the WQM mask */
+               transition_to_WQM(ctx, bld, block->index);
+               ctx.info[block->index].exec.back().second &= ~mask_type_global;
+            } else if (ctx.info[block->index].exec.size() == 2) {
+               assert(state == WQM);
+               /* Transition to Exact without extra instruction */
+               ctx.info[block->index].exec.pop_back();
+               current_exec = get_exec_op(ctx.info[block->index].exec.back().first);
+               ctx.info[block->index].exec[0].first = Operand(bld.lm);
+            }
          }
 
          Temp cond, exit_cond;
@@ -727,7 +745,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
             cond = instr->operands[0].getTemp();
             /* discard from current exec */
             exit_cond = bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc),
-                                 Operand(exec, bld.lm), cond)
+                                 current_exec, cond)
                            .def(1)
                            .getTemp();
          }
@@ -745,15 +763,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
          instr->operands[0] = bld.scc(exit_cond);
          assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
 
-      } else if (needs == WQM && state != WQM) {
-         transition_to_WQM(ctx, bld, block->index);
-         state = WQM;
-      } else if (needs == Exact && state != Exact) {
-         transition_to_Exact(ctx, bld, block->index);
-         state = Exact;
-      }
-
-      if (instr->opcode == aco_opcode::p_is_helper) {
+      } else if (instr->opcode == aco_opcode::p_is_helper) {
          Definition dst = instr->definitions[0];
          assert(dst.size() == bld.lm.size());
          if (state == Exact) {