Mesa (main): aco: remove block_kind_discard

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Feb 8 16:39:36 UTC 2022


Module: Mesa
Branch: main
Commit: 08b8500dfbd733bd8e47a3d659b22cc4480a6e6e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=08b8500dfbd733bd8e47a3d659b22cc4480a6e6e

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Mon Jan 31 15:11:22 2022 +0100

aco: remove block_kind_discard

This case doesn't seem to happen in practice.
No need to micro-optimize it.

This patch merges instruction selection for discard/discard_if.

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14805>

---

 src/amd/compiler/aco_insert_exec_mask.cpp      | 43 +-------------
 src/amd/compiler/aco_instruction_selection.cpp | 77 +++++++-------------------
 src/amd/compiler/aco_ir.h                      |  7 +--
 src/amd/compiler/aco_lower_to_hw_instr.cpp     |  2 -
 src/amd/compiler/aco_opt_value_numbering.cpp   |  3 +-
 src/amd/compiler/aco_print_ir.cpp              |  2 -
 6 files changed, 28 insertions(+), 106 deletions(-)

diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 55f9accb0bb..cdff7cbb9ff 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -317,13 +317,11 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
          exec_ctx.info[i].block_needs |= Exact;
 
       /* if discard is used somewhere in nested CF, we need to preserve the WQM mask */
-      if ((block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if) &&
-          ever_again_needs & WQM)
+      if (block.kind & block_kind_uses_discard_if && ever_again_needs & WQM)
          exec_ctx.info[i].block_needs |= Preserve_WQM;
 
       ever_again_needs |= exec_ctx.info[i].block_needs & ~Exact_Branch;
-      if (block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if ||
-          block.kind & block_kind_uses_demote)
+      if (block.kind & block_kind_uses_discard_if || block.kind & block_kind_uses_demote)
          ever_again_needs |= Exact;
 
       /* don't propagate WQM preservation further than the next top_level block */
@@ -890,8 +888,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
          Block& loop_block = ctx.program->blocks[i];
          needs |= ctx.info[i].block_needs;
 
-         if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_discard ||
-             loop_block.kind & block_kind_uses_demote)
+         if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_uses_demote)
             has_discard = true;
          if (loop_block.loop_nest_depth != loop_nest_depth)
             continue;
@@ -931,40 +928,6 @@ add_branch_code(exec_ctx& ctx, Block* block)
     */
    Operand break_cond = Operand(exec, bld.lm);
 
-   if (block->kind & block_kind_discard) {
-
-      assert(block->instructions.back()->isBranch());
-      aco_ptr<Instruction> branch = std::move(block->instructions.back());
-      block->instructions.pop_back();
-
-      /* create a discard_if() instruction with the exec mask as condition */
-      unsigned num = 0;
-      if (ctx.loop.size()) {
-         /* if we're in a loop, only discard from the outer exec masks */
-         num = ctx.loop.back().num_exec_masks;
-      } else {
-         num = ctx.info[idx].exec.size() - 1;
-      }
-
-      Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
-                           Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
-
-      for (int i = num - 1; i >= 0; i--) {
-         Instruction* andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
-                                       get_exec_op(ctx.info[block->index].exec[i].first), cond);
-         if (i == (int)ctx.info[idx].exec.size() - 1)
-            andn2->definitions[0] = Definition(exec, bld.lm);
-         if (i == 0)
-            bld.pseudo(aco_opcode::p_exit_early_if, bld.scc(andn2->definitions[1].getTemp()));
-         ctx.info[block->index].exec[i].first = Operand(andn2->definitions[0].getTemp());
-      }
-      assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
-
-      break_cond = Operand(cond);
-      bld.insert(std::move(branch));
-      /* no return here as it can be followed by a divergent break */
-   }
-
    if (block->kind & block_kind_continue_or_break) {
       assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[1]].linear_succs[0]].kind &
              block_kind_loop_header);
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index bf202bebc1e..215c35f9ea5 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5708,58 +5708,6 @@ visit_load_constant(isel_context* ctx, nir_intrinsic_instr* instr)
    load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0);
 }
 
-void
-visit_discard_if(isel_context* ctx, nir_intrinsic_instr* instr)
-{
-   if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
-      ctx->cf_info.exec_potentially_empty_discard = true;
-
-   ctx->program->needs_exact = true;
-
-   // TODO: optimize uniform conditions
-   Builder bld(ctx->program, ctx->block);
-   Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
-   assert(src.regClass() == bld.lm);
-   src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
-   bld.pseudo(aco_opcode::p_discard_if, src);
-   ctx->block->kind |= block_kind_uses_discard_if;
-   return;
-}
-
-void
-visit_discard(isel_context* ctx, nir_intrinsic_instr* instr)
-{
-   Builder bld(ctx->program, ctx->block);
-
-   if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
-      ctx->cf_info.exec_potentially_empty_discard = true;
-
-   bool divergent =
-      ctx->cf_info.parent_if.is_divergent || ctx->cf_info.parent_loop.has_divergent_continue;
-
-   if (ctx->block->loop_nest_depth && (nir_instr_is_last(&instr->instr) && !divergent)) {
-      /* we handle discards the same way as jump instructions */
-      append_logical_end(ctx->block);
-
-      /* in loops, discard behaves like break */
-      Block* linear_target = ctx->cf_info.parent_loop.exit;
-      ctx->block->kind |= block_kind_discard;
-
-      /* uniform discard - loop ends here */
-      assert(nir_instr_is_last(&instr->instr));
-      ctx->block->kind |= block_kind_uniform;
-      ctx->cf_info.has_branch = true;
-      bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
-      add_linear_edge(ctx->block->index, linear_target);
-      return;
-   }
-
-   ctx->program->needs_exact = true;
-   bld.pseudo(aco_opcode::p_discard_if, Operand::c32(-1u));
-   ctx->block->kind |= block_kind_uses_discard_if;
-   return;
-}
-
 enum aco_descriptor_type {
    ACO_DESC_IMAGE,
    ACO_DESC_FMASK,
@@ -8129,10 +8077,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
    case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break;
    case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break;
    case nir_intrinsic_vulkan_resource_index: visit_load_resource(ctx, instr); break;
-   case nir_intrinsic_terminate:
-   case nir_intrinsic_discard: visit_discard(ctx, instr); break;
-   case nir_intrinsic_terminate_if:
-   case nir_intrinsic_discard_if: visit_discard_if(ctx, instr); break;
    case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break;
    case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break;
    case nir_intrinsic_shared_atomic_add:
@@ -8742,6 +8686,27 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       ctx->program->needs_exact = true;
       break;
    }
+   case nir_intrinsic_terminate:
+   case nir_intrinsic_terminate_if:
+   case nir_intrinsic_discard:
+   case nir_intrinsic_discard_if: {
+      Operand cond = Operand::c32(-1u);
+      if (instr->intrinsic == nir_intrinsic_discard_if ||
+          instr->intrinsic == nir_intrinsic_terminate_if) {
+         Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
+         assert(src.regClass() == bld.lm);
+         cond =
+            bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
+      }
+
+      bld.pseudo(aco_opcode::p_discard_if, cond);
+
+      if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
+         ctx->cf_info.exec_potentially_empty_discard = true;
+      ctx->block->kind |= block_kind_uses_discard_if;
+      ctx->program->needs_exact = true;
+      break;
+   }
    case nir_intrinsic_first_invocation: {
       emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
                get_ssa_temp(ctx, &instr->dest.ssa));
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 4a44448d014..c979e60a091 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1811,10 +1811,9 @@ enum block_kind {
    block_kind_continue = 1 << 5,
    block_kind_break = 1 << 6,
    block_kind_continue_or_break = 1 << 7,
-   block_kind_discard = 1 << 8,
-   block_kind_branch = 1 << 9,
-   block_kind_merge = 1 << 10,
-   block_kind_invert = 1 << 11,
+   block_kind_branch = 1 << 8,
+   block_kind_merge = 1 << 9,
+   block_kind_invert = 1 << 10,
    block_kind_uses_discard_if = 1 << 12,
    block_kind_needs_lowering = 1 << 13,
    block_kind_uses_demote = 1 << 14,
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index df94f21db85..6fb6accb9a9 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2036,8 +2036,6 @@ lower_to_hw_instr(Program* program)
                   bld.reset(&ctx.instructions);
                }
 
-               // TODO: exec can be zero here with block_kind_discard
-
                assert(instr->operands[0].physReg() == scc);
                bld.sopp(aco_opcode::s_cbranch_scc0, Definition(exec, s2), instr->operands[0],
                         discard_block->index);
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index dba86b87538..98aab2cb5af 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -476,8 +476,7 @@ value_numbering(Program* program)
 
       /* increment exec_id when entering nested control flow */
       if (block.kind & block_kind_branch || block.kind & block_kind_loop_preheader ||
-          block.kind & block_kind_break || block.kind & block_kind_continue ||
-          block.kind & block_kind_discard)
+          block.kind & block_kind_break || block.kind & block_kind_continue)
          ctx.exec_id++;
       else if (block.kind & block_kind_continue_or_break)
          ctx.exec_id += 2;
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 2498a5b1ce8..085697b68c8 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -744,8 +744,6 @@ print_block_kind(uint16_t kind, FILE* output)
       fprintf(output, "break, ");
    if (kind & block_kind_continue_or_break)
       fprintf(output, "continue_or_break, ");
-   if (kind & block_kind_discard)
-      fprintf(output, "discard, ");
    if (kind & block_kind_branch)
       fprintf(output, "branch, ");
    if (kind & block_kind_merge)



More information about the mesa-commit mailing list