Mesa (main): aco/lower_phis: don't allocate unused temporary ids

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jun 14 17:29:12 UTC 2021


Module: Mesa
Branch: main
Commit: bc1c527834904825f310d33de4682ce1e36b97a7
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=bc1c527834904825f310d33de4682ce1e36b97a7

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Jun  9 20:10:51 2021 +0100

aco/lower_phis: don't allocate unused temporary ids

The excessive number of temporary IDs caused #4872's live-out sets to be
extremely large and expensive to iterate.

With this change, #4872's shader is much faster to compile and uses much
less memory.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4872
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11300>

---

 src/amd/compiler/aco_lower_phis.cpp | 48 ++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp
index 313770fd29c..0eae8eec3fe 100644
--- a/src/amd/compiler/aco_lower_phis.cpp
+++ b/src/amd/compiler/aco_lower_phis.cpp
@@ -44,6 +44,8 @@ struct ssa_state {
    unsigned phi_block_idx;
    unsigned loop_nest_depth;
    std::map<unsigned, unsigned> writes;
+   /* Whether there's a write in any of a block's predecessors. Indexed by the block index. */
+   std::vector<bool> any_pred_defined;
    std::vector<Operand> latest;
    std::vector<bool> visited;
 };
@@ -62,7 +64,8 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state, bool bef
 
    Block& block = program->blocks[block_idx];
    size_t pred = block.linear_preds.size();
-   if (pred == 0 || block.loop_nest_depth < state->loop_nest_depth) {
+   if (pred == 0 || block.loop_nest_depth < state->loop_nest_depth ||
+       !state->any_pred_defined[block_idx]) {
       return Operand(program->lane_mask);
    } else if (block.loop_nest_depth > state->loop_nest_depth) {
       Operand op = get_ssa(program, block_idx - 1, state, false);
@@ -79,21 +82,9 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state, bool bef
       Temp res = Temp(program->allocateTmp(program->lane_mask));
       state->latest[block_idx] = Operand(res);
 
-      Operand *const ops = (Operand *)alloca(pred * sizeof(Operand));
-      for (unsigned i = 0; i < pred; i++)
-         ops[i] = get_ssa(program, block.linear_preds[i], state, false);
-
-      bool all_undef = true;
-      for (unsigned i = 0; i < pred; i++)
-         all_undef = all_undef && ops[i].isUndefined();
-      if (all_undef) {
-         state->latest[block_idx] = ops[0];
-         return ops[0];
-      }
-
       aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, pred, 1)};
       for (unsigned i = 0; i < pred; i++)
-         phi->operands[i] = ops[i];
+         phi->operands[i] = get_ssa(program, block.linear_preds[i], state, false);
       phi->definitions[0] = Definition(res);
       block.instructions.emplace(block.instructions.begin(), std::move(phi));
 
@@ -163,6 +154,33 @@ void build_merge_code(Program *program, Block *block, Definition dst, Operand pr
    }
 }
 
+void init_any_pred_defined(Program *program, ssa_state *state, Block *block, aco_ptr<Instruction>& phi)
+{
+   std::fill(state->any_pred_defined.begin(), state->any_pred_defined.end(), false);
+   for (unsigned i = 0; i < block->logical_preds.size(); i++) {
+      if (phi->operands[i].isUndefined())
+         continue;
+      for (unsigned succ : program->blocks[block->logical_preds[i]].linear_succs)
+         state->any_pred_defined[succ] = true;
+   }
+
+   unsigned start = block->logical_preds[0];
+
+   /* for loop exit phis, start at the loop header */
+   const bool loop_exit = block->kind & block_kind_loop_exit;
+   while (loop_exit && program->blocks[start - 1].loop_nest_depth >= state->loop_nest_depth)
+      start--;
+
+   for (unsigned i = 0; i < 1u + loop_exit; i++) {
+      for (unsigned j = start; j < block->index; j++) {
+         if (!state->any_pred_defined[j])
+            continue;
+         for (unsigned succ : program->blocks[j].linear_succs)
+            state->any_pred_defined[succ] = true;
+      }
+   }
+}
+
 void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, aco_ptr<Instruction>& phi)
 {
    Builder bld(program);
@@ -182,6 +200,7 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
 
    state->latest.resize(program->blocks.size());
    state->visited.resize(program->blocks.size());
+   state->any_pred_defined.resize(program->blocks.size());
 
    uint64_t undef_operands = 0;
    for (unsigned i = 0; i < phi->operands.size(); i++)
@@ -198,6 +217,7 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
          state->loop_nest_depth += 1;
       }
       state->writes.clear();
+      init_any_pred_defined(program, state, block, phi);
       state->needs_init = false;
    }
    std::fill(state->latest.begin(), state->latest.end(), Operand(program->lane_mask));



More information about the mesa-commit mailing list