Mesa (main): aco/insert_exec_mask: remove some unnecessary WQM loop handling code
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Feb 11 19:26:17 UTC 2022
Module: Mesa
Branch: main
Commit: cbb1b095ca919bb1db7c70a23dff3745942868e1
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cbb1b095ca919bb1db7c70a23dff3745942868e1
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Wed Jan 26 11:08:42 2022 +0100
aco/insert_exec_mask: remove some unnecessary WQM loop handling code
These workarounds are were necessary to prevent infinite loops
with helper lane registers containing wrong data.
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14951>
---
src/amd/compiler/aco_insert_exec_mask.cpp | 106 ++----------------------------
1 file changed, 4 insertions(+), 102 deletions(-)
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index a501b664ed5..7bf852ec186 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -83,7 +83,6 @@ struct block_info {
std::vector<WQMState> instr_needs;
uint8_t block_needs;
uint8_t ever_again_needs;
- /* more... */
};
struct exec_ctx {
@@ -110,16 +109,6 @@ needs_exact(aco_ptr<Instruction>& instr)
}
}
-void
-set_needs_wqm(wqm_ctx& ctx, Temp tmp)
-{
- if (!ctx.needs_wqm[tmp.id()]) {
- ctx.needs_wqm[tmp.id()] = true;
- if (ctx.defined_in[tmp.id()] != 0xFFFF)
- ctx.worklist.insert(ctx.defined_in[tmp.id()]);
- }
-}
-
void
mark_block_wqm(wqm_ctx& ctx, unsigned block_idx)
{
@@ -146,7 +135,8 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
WQMState needs = needs_exact(instr) ? Exact : Unspecified;
bool propagate_wqm = instr->opcode == aco_opcode::p_wqm;
- bool pred_by_exec = needs_exec_mask(instr.get());
+ bool pred_by_exec = needs_exec_mask(instr.get()) ||
+ instr->opcode == aco_opcode::p_logical_end;
for (const Definition& definition : instr->definitions) {
if (!definition.isTemp())
continue;
@@ -161,13 +151,13 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
if (instr->isBranch() && ctx.branch_wqm[block->index]) {
assert(!(info.block_needs & Exact_Branch));
needs = WQM;
- propagate_wqm = true;
}
if (propagate_wqm) {
+ needs = pred_by_exec ? WQM : Unspecified;
for (const Operand& op : instr->operands) {
if (op.isTemp()) {
- set_needs_wqm(ctx, op.getTemp());
+ ctx.needs_wqm[op.tempId()] = true;
}
}
}
@@ -175,12 +165,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
needs = pred_by_exec ? WQM : Unspecified;
}
- if ((instr->opcode == aco_opcode::p_logical_end && ctx.branch_wqm[block->index]) ||
- instr->opcode == aco_opcode::p_wqm) {
- assert(needs != Exact);
- needs = WQM;
- }
-
instr_needs[i] = needs;
info.block_needs |= needs;
}
@@ -194,67 +178,6 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
}
}
-/* If an outer loop needs WQM but a nested loop does not, we have to ensure that
- * the nested loop is done in WQM so that the exec is not empty upon entering
- * the nested loop.
- *
- * TODO: This could be fixed with slightly better code (for loops with divergent
- * breaks, which might benefit from being in exact) by adding Exact_Branch to a
- * divergent branch surrounding the nested loop, if such a branch exists.
- */
-void
-handle_wqm_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
-{
- for (unsigned idx = preheader + 1; idx < exec_ctx.program->blocks.size(); idx++) {
- Block& block = exec_ctx.program->blocks[idx];
- if (block.kind & block_kind_break)
- mark_block_wqm(ctx, idx);
-
- if ((block.kind & block_kind_loop_exit) && block.loop_nest_depth == 0)
- break;
- }
-}
-
-/* If an outer loop and it's nested loops does not need WQM,
- * add_branch_code() will ensure that it enters in Exact. We have to
- * ensure that the exact exec mask is not empty by adding Exact_Branch to
- * the outer divergent branch.
- */
-void
-handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
-{
- assert(exec_ctx.program->blocks[preheader + 1].kind & block_kind_loop_header);
-
- int parent_branch = preheader;
- unsigned rel_branch_depth = 0;
- for (; parent_branch >= 0; parent_branch--) {
- Block& branch = exec_ctx.program->blocks[parent_branch];
- if (branch.kind & block_kind_branch) {
- if (rel_branch_depth == 0)
- break;
- rel_branch_depth--;
- }
-
- /* top-level blocks should never have empty exact exec masks */
- if (branch.kind & block_kind_top_level)
- return;
-
- if (branch.kind & block_kind_merge)
- rel_branch_depth++;
- }
- assert(parent_branch >= 0);
-
- ASSERTED Block& branch = exec_ctx.program->blocks[parent_branch];
- assert(branch.kind & block_kind_branch);
- if (ctx.branch_wqm[parent_branch]) {
- /* The branch can't be done in Exact because some other blocks in it
- * are in WQM. So instead, ensure that the loop is done in WQM. */
- handle_wqm_loops(ctx, exec_ctx, preheader);
- } else {
- exec_ctx.info[parent_branch].block_needs |= Exact_Branch;
- }
-}
-
void
calculate_wqm_needs(exec_ctx& exec_ctx)
{
@@ -266,27 +189,6 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
Block& block = exec_ctx.program->blocks[block_index];
get_block_needs(ctx, exec_ctx, &block);
-
- /* handle_exact_loops() needs information on outer branches, so don't
- * handle loops until a top-level block.
- */
- if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) {
- unsigned preheader = block.index;
- do {
- Block& preheader_block = exec_ctx.program->blocks[preheader];
- if ((preheader_block.kind & block_kind_loop_preheader) &&
- preheader_block.loop_nest_depth == 0) {
- /* If the loop or a nested loop needs WQM, branch_wqm will be true for the
- * preheader.
- */
- if (ctx.branch_wqm[preheader])
- handle_wqm_loops(ctx, exec_ctx, preheader);
- else
- handle_exact_loops(ctx, exec_ctx, preheader);
- }
- preheader++;
- } while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level));
- }
}
uint8_t ever_again_needs = 0;
More information about the mesa-commit
mailing list