Mesa (main): aco: remove block_kind_discard
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Feb 8 16:39:36 UTC 2022
Module: Mesa
Branch: main
Commit: 08b8500dfbd733bd8e47a3d659b22cc4480a6e6e
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=08b8500dfbd733bd8e47a3d659b22cc4480a6e6e
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Mon Jan 31 15:11:22 2022 +0100
aco: remove block_kind_discard
This case doesn't seem to happen in practice.
No need to micro-optimize it.
This patch merges instruction selection for discard/discard_if.
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14805>
---
src/amd/compiler/aco_insert_exec_mask.cpp | 43 +-------------
src/amd/compiler/aco_instruction_selection.cpp | 77 +++++++-------------------
src/amd/compiler/aco_ir.h | 7 +--
src/amd/compiler/aco_lower_to_hw_instr.cpp | 2 -
src/amd/compiler/aco_opt_value_numbering.cpp | 3 +-
src/amd/compiler/aco_print_ir.cpp | 2 -
6 files changed, 28 insertions(+), 106 deletions(-)
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 55f9accb0bb..cdff7cbb9ff 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -317,13 +317,11 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
exec_ctx.info[i].block_needs |= Exact;
/* if discard is used somewhere in nested CF, we need to preserve the WQM mask */
- if ((block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if) &&
- ever_again_needs & WQM)
+ if (block.kind & block_kind_uses_discard_if && ever_again_needs & WQM)
exec_ctx.info[i].block_needs |= Preserve_WQM;
ever_again_needs |= exec_ctx.info[i].block_needs & ~Exact_Branch;
- if (block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if ||
- block.kind & block_kind_uses_demote)
+ if (block.kind & block_kind_uses_discard_if || block.kind & block_kind_uses_demote)
ever_again_needs |= Exact;
/* don't propagate WQM preservation further than the next top_level block */
@@ -890,8 +888,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
Block& loop_block = ctx.program->blocks[i];
needs |= ctx.info[i].block_needs;
- if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_discard ||
- loop_block.kind & block_kind_uses_demote)
+ if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_uses_demote)
has_discard = true;
if (loop_block.loop_nest_depth != loop_nest_depth)
continue;
@@ -931,40 +928,6 @@ add_branch_code(exec_ctx& ctx, Block* block)
*/
Operand break_cond = Operand(exec, bld.lm);
- if (block->kind & block_kind_discard) {
-
- assert(block->instructions.back()->isBranch());
- aco_ptr<Instruction> branch = std::move(block->instructions.back());
- block->instructions.pop_back();
-
- /* create a discard_if() instruction with the exec mask as condition */
- unsigned num = 0;
- if (ctx.loop.size()) {
- /* if we're in a loop, only discard from the outer exec masks */
- num = ctx.loop.back().num_exec_masks;
- } else {
- num = ctx.info[idx].exec.size() - 1;
- }
-
- Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
- Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
-
- for (int i = num - 1; i >= 0; i--) {
- Instruction* andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
- get_exec_op(ctx.info[block->index].exec[i].first), cond);
- if (i == (int)ctx.info[idx].exec.size() - 1)
- andn2->definitions[0] = Definition(exec, bld.lm);
- if (i == 0)
- bld.pseudo(aco_opcode::p_exit_early_if, bld.scc(andn2->definitions[1].getTemp()));
- ctx.info[block->index].exec[i].first = Operand(andn2->definitions[0].getTemp());
- }
- assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
-
- break_cond = Operand(cond);
- bld.insert(std::move(branch));
- /* no return here as it can be followed by a divergent break */
- }
-
if (block->kind & block_kind_continue_or_break) {
assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[1]].linear_succs[0]].kind &
block_kind_loop_header);
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index bf202bebc1e..215c35f9ea5 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5708,58 +5708,6 @@ visit_load_constant(isel_context* ctx, nir_intrinsic_instr* instr)
load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0);
}
-void
-visit_discard_if(isel_context* ctx, nir_intrinsic_instr* instr)
-{
- if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
- ctx->cf_info.exec_potentially_empty_discard = true;
-
- ctx->program->needs_exact = true;
-
- // TODO: optimize uniform conditions
- Builder bld(ctx->program, ctx->block);
- Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- assert(src.regClass() == bld.lm);
- src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
- bld.pseudo(aco_opcode::p_discard_if, src);
- ctx->block->kind |= block_kind_uses_discard_if;
- return;
-}
-
-void
-visit_discard(isel_context* ctx, nir_intrinsic_instr* instr)
-{
- Builder bld(ctx->program, ctx->block);
-
- if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
- ctx->cf_info.exec_potentially_empty_discard = true;
-
- bool divergent =
- ctx->cf_info.parent_if.is_divergent || ctx->cf_info.parent_loop.has_divergent_continue;
-
- if (ctx->block->loop_nest_depth && (nir_instr_is_last(&instr->instr) && !divergent)) {
- /* we handle discards the same way as jump instructions */
- append_logical_end(ctx->block);
-
- /* in loops, discard behaves like break */
- Block* linear_target = ctx->cf_info.parent_loop.exit;
- ctx->block->kind |= block_kind_discard;
-
- /* uniform discard - loop ends here */
- assert(nir_instr_is_last(&instr->instr));
- ctx->block->kind |= block_kind_uniform;
- ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
- add_linear_edge(ctx->block->index, linear_target);
- return;
- }
-
- ctx->program->needs_exact = true;
- bld.pseudo(aco_opcode::p_discard_if, Operand::c32(-1u));
- ctx->block->kind |= block_kind_uses_discard_if;
- return;
-}
-
enum aco_descriptor_type {
ACO_DESC_IMAGE,
ACO_DESC_FMASK,
@@ -8129,10 +8077,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break;
case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break;
case nir_intrinsic_vulkan_resource_index: visit_load_resource(ctx, instr); break;
- case nir_intrinsic_terminate:
- case nir_intrinsic_discard: visit_discard(ctx, instr); break;
- case nir_intrinsic_terminate_if:
- case nir_intrinsic_discard_if: visit_discard_if(ctx, instr); break;
case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break;
case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break;
case nir_intrinsic_shared_atomic_add:
@@ -8742,6 +8686,27 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
ctx->program->needs_exact = true;
break;
}
+ case nir_intrinsic_terminate:
+ case nir_intrinsic_terminate_if:
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if: {
+ Operand cond = Operand::c32(-1u);
+ if (instr->intrinsic == nir_intrinsic_discard_if ||
+ instr->intrinsic == nir_intrinsic_terminate_if) {
+ Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
+ assert(src.regClass() == bld.lm);
+ cond =
+ bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
+ }
+
+ bld.pseudo(aco_opcode::p_discard_if, cond);
+
+ if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
+ ctx->cf_info.exec_potentially_empty_discard = true;
+ ctx->block->kind |= block_kind_uses_discard_if;
+ ctx->program->needs_exact = true;
+ break;
+ }
case nir_intrinsic_first_invocation: {
emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
get_ssa_temp(ctx, &instr->dest.ssa));
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 4a44448d014..c979e60a091 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1811,10 +1811,9 @@ enum block_kind {
block_kind_continue = 1 << 5,
block_kind_break = 1 << 6,
block_kind_continue_or_break = 1 << 7,
- block_kind_discard = 1 << 8,
- block_kind_branch = 1 << 9,
- block_kind_merge = 1 << 10,
- block_kind_invert = 1 << 11,
+ block_kind_branch = 1 << 8,
+ block_kind_merge = 1 << 9,
+ block_kind_invert = 1 << 10,
block_kind_uses_discard_if = 1 << 12,
block_kind_needs_lowering = 1 << 13,
block_kind_uses_demote = 1 << 14,
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index df94f21db85..6fb6accb9a9 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2036,8 +2036,6 @@ lower_to_hw_instr(Program* program)
bld.reset(&ctx.instructions);
}
- // TODO: exec can be zero here with block_kind_discard
-
assert(instr->operands[0].physReg() == scc);
bld.sopp(aco_opcode::s_cbranch_scc0, Definition(exec, s2), instr->operands[0],
discard_block->index);
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index dba86b87538..98aab2cb5af 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -476,8 +476,7 @@ value_numbering(Program* program)
/* increment exec_id when entering nested control flow */
if (block.kind & block_kind_branch || block.kind & block_kind_loop_preheader ||
- block.kind & block_kind_break || block.kind & block_kind_continue ||
- block.kind & block_kind_discard)
+ block.kind & block_kind_break || block.kind & block_kind_continue)
ctx.exec_id++;
else if (block.kind & block_kind_continue_or_break)
ctx.exec_id += 2;
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 2498a5b1ce8..085697b68c8 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -744,8 +744,6 @@ print_block_kind(uint16_t kind, FILE* output)
fprintf(output, "break, ");
if (kind & block_kind_continue_or_break)
fprintf(output, "continue_or_break, ");
- if (kind & block_kind_discard)
- fprintf(output, "discard, ");
if (kind & block_kind_branch)
fprintf(output, "branch, ");
if (kind & block_kind_merge)
More information about the mesa-commit
mailing list