Mesa (main): aco/insert_exec_mask: remove some unnecessary WQM loop handling code

Fri Feb 11 19:26:17 UTC 2022

Module: Mesa
Branch: main
Commit: cbb1b095ca919bb1db7c70a23dff3745942868e1
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=cbb1b095ca919bb1db7c70a23dff3745942868e1

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Wed Jan 26 11:08:42 2022 +0100

aco/insert_exec_mask: remove some unnecessary WQM loop handling code

These workarounds are were necessary to prevent infinite loops
with helper lane registers containing wrong data.

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14951>

---

 src/amd/compiler/aco_insert_exec_mask.cpp | 106 ++----------------------------
 1 file changed, 4 insertions(+), 102 deletions(-)

diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index a501b664ed5..7bf852ec186 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -83,7 +83,6 @@ struct block_info {
    std::vector<WQMState> instr_needs;
    uint8_t block_needs;
    uint8_t ever_again_needs;
-   /* more... */
 };
 
 struct exec_ctx {
@@ -110,16 +109,6 @@ needs_exact(aco_ptr<Instruction>& instr)
    }
 }
 
-void
-set_needs_wqm(wqm_ctx& ctx, Temp tmp)
-{
-   if (!ctx.needs_wqm[tmp.id()]) {
-      ctx.needs_wqm[tmp.id()] = true;
-      if (ctx.defined_in[tmp.id()] != 0xFFFF)
-         ctx.worklist.insert(ctx.defined_in[tmp.id()]);
-   }
-}
-
 void
 mark_block_wqm(wqm_ctx& ctx, unsigned block_idx)
 {
@@ -146,7 +135,8 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
 
       WQMState needs = needs_exact(instr) ? Exact : Unspecified;
       bool propagate_wqm = instr->opcode == aco_opcode::p_wqm;
-      bool pred_by_exec = needs_exec_mask(instr.get());
+      bool pred_by_exec = needs_exec_mask(instr.get()) ||
+                          instr->opcode == aco_opcode::p_logical_end;
       for (const Definition& definition : instr->definitions) {
          if (!definition.isTemp())
             continue;
@@ -161,13 +151,13 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
       if (instr->isBranch() && ctx.branch_wqm[block->index]) {
          assert(!(info.block_needs & Exact_Branch));
          needs = WQM;
-         propagate_wqm = true;
       }
 
       if (propagate_wqm) {
+         needs = pred_by_exec ? WQM : Unspecified;
          for (const Operand& op : instr->operands) {
             if (op.isTemp()) {
-               set_needs_wqm(ctx, op.getTemp());
+               ctx.needs_wqm[op.tempId()] = true;
             }
          }
       }
@@ -175,12 +165,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
          needs = pred_by_exec ? WQM : Unspecified;
       }
 
-      if ((instr->opcode == aco_opcode::p_logical_end && ctx.branch_wqm[block->index]) ||
-          instr->opcode == aco_opcode::p_wqm) {
-         assert(needs != Exact);
-         needs = WQM;
-      }
-
       instr_needs[i] = needs;
       info.block_needs |= needs;
    }
@@ -194,67 +178,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
    }
 }
 
-/* If an outer loop needs WQM but a nested loop does not, we have to ensure that
- * the nested loop is done in WQM so that the exec is not empty upon entering
- * the nested loop.
- *
- * TODO: This could be fixed with slightly better code (for loops with divergent
- * breaks, which might benefit from being in exact) by adding Exact_Branch to a
- * divergent branch surrounding the nested loop, if such a branch exists.
- */
-void
-handle_wqm_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
-{
-   for (unsigned idx = preheader + 1; idx < exec_ctx.program->blocks.size(); idx++) {
-      Block& block = exec_ctx.program->blocks[idx];
-      if (block.kind & block_kind_break)
-         mark_block_wqm(ctx, idx);
-
-      if ((block.kind & block_kind_loop_exit) && block.loop_nest_depth == 0)
-         break;
-   }
-}
-
-/* If an outer loop and it's nested loops does not need WQM,
- * add_branch_code() will ensure that it enters in Exact. We have to
- * ensure that the exact exec mask is not empty by adding Exact_Branch to
- * the outer divergent branch.
- */
-void
-handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
-{
-   assert(exec_ctx.program->blocks[preheader + 1].kind & block_kind_loop_header);
-
-   int parent_branch = preheader;
-   unsigned rel_branch_depth = 0;
-   for (; parent_branch >= 0; parent_branch--) {
-      Block& branch = exec_ctx.program->blocks[parent_branch];
-      if (branch.kind & block_kind_branch) {
-         if (rel_branch_depth == 0)
-            break;
-         rel_branch_depth--;
-      }
-
-      /* top-level blocks should never have empty exact exec masks */
-      if (branch.kind & block_kind_top_level)
-         return;
-
-      if (branch.kind & block_kind_merge)
-         rel_branch_depth++;
-   }
-   assert(parent_branch >= 0);
-
-   ASSERTED Block& branch = exec_ctx.program->blocks[parent_branch];
-   assert(branch.kind & block_kind_branch);
-   if (ctx.branch_wqm[parent_branch]) {
-      /* The branch can't be done in Exact because some other blocks in it
-       * are in WQM. So instead, ensure that the loop is done in WQM. */
-      handle_wqm_loops(ctx, exec_ctx, preheader);
-   } else {
-      exec_ctx.info[parent_branch].block_needs |= Exact_Branch;
-   }
-}
-
 void
 calculate_wqm_needs(exec_ctx& exec_ctx)
 {
@@ -266,27 +189,6 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
 
       Block& block = exec_ctx.program->blocks[block_index];
       get_block_needs(ctx, exec_ctx, &block);
-
-      /* handle_exact_loops() needs information on outer branches, so don't
-       * handle loops until a top-level block.
-       */
-      if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) {
-         unsigned preheader = block.index;
-         do {
-            Block& preheader_block = exec_ctx.program->blocks[preheader];
-            if ((preheader_block.kind & block_kind_loop_preheader) &&
-                preheader_block.loop_nest_depth == 0) {
-               /* If the loop or a nested loop needs WQM, branch_wqm will be true for the
-                * preheader.
-                */
-               if (ctx.branch_wqm[preheader])
-                  handle_wqm_loops(ctx, exec_ctx, preheader);
-               else
-                  handle_exact_loops(ctx, exec_ctx, preheader);
-            }
-            preheader++;
-         } while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level));
-      }
    }
 
    uint8_t ever_again_needs = 0;