Mesa (main): pan/bi: Use flow control lowering on Valhall

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 1 16:29:30 UTC 2022


Module: Mesa
Branch: main
Commit: 5067a26f4432ae5e9690e70ef2498ea24798593e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5067a26f4432ae5e9690e70ef2498ea24798593e

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Thu May 19 16:10:44 2022 -0400

pan/bi: Use flow control lowering on Valhall

Logically at the same part of the compile pipeline as clause scheduling on
Bifrost. Lots of similarities, too. Now that we generate flow control only as a
late pass, various hacks in the compiler are no longer necessary and are
dropped.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16804>

---

 src/panfrost/bifrost/bifrost_compile.c | 36 +++----------
 src/panfrost/bifrost/bir.c             |  6 ---
 src/panfrost/bifrost/valhall/va_pack.c | 92 ++--------------------------------
 3 files changed, 9 insertions(+), 125 deletions(-)

diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index f5c0865393d..e3178d3124a 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -770,11 +770,6 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T,
         uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
         enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
 
-        if (b->shader->arch >= 9 && !inputs->is_blend) {
-                bi_instr *I = bi_nop(b);
-                I->flow = 0x9; /* .wait */
-        }
-
         if (inputs->is_blend && inputs->blend.nr_samples > 1) {
                 /* Conversion descriptor comes from the compile inputs, pixel
                  * indices derived at run time based on sample ID */
@@ -827,11 +822,6 @@ bi_skip_atest(bi_context *ctx, bool emit_zs)
 static void
 bi_emit_atest(bi_builder *b, bi_index alpha)
 {
-        if (b->shader->arch >= 9) {
-                bi_instr *I = bi_nop(b);
-                I->flow = 0x8; /* .wait0126 */
-        }
-
         bi_instr *atest = bi_atest_to(b, bi_temp(b->shader), bi_coverage(b), alpha);
         b->shader->emitted_atest = true;
         b->shader->coverage = atest->dest[0];
@@ -1549,11 +1539,6 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
                 bi_imm_u32(b->shader->inputs->bifrost.rt_conv[rt]) :
                 bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
 
-        if (!b->shader->inputs->is_blend && b->shader->arch >= 9) {
-                bi_instr *I = bi_nop(b);
-                I->flow = 0x9; /* .wait */
-        }
-
         bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b), desc,
                       regfmt, nr - 1);
         bi_emit_cached_split(b, dest, size * nr);
@@ -4663,19 +4648,6 @@ bifrost_nir_lower_store_component(struct nir_builder *b,
  * That trick doesn't work on Valhall, which needs a NOP inserted in the
  * terminal block instead.
  */
-
-static void
-bi_lower_terminal_block(bi_context *ctx, bi_block *block)
-{
-        bi_builder b = bi_init_builder(ctx, bi_after_block(block));
-
-        /* Ensure the instruction is not dead code eliminated. XXX: This is a
-         * bit of a hack.
-         */
-        bi_instr *I = bi_nop(&b);
-        I->flow = 0xF;
-}
-
 static void
 bi_lower_branch(bi_context *ctx, bi_block *block)
 {
@@ -4701,7 +4673,7 @@ bi_lower_branch(bi_context *ctx, bi_block *block)
                 if (cull_terminal)
                         ins->branch_target = NULL;
                 else if (ins->branch_target)
-                        bi_lower_terminal_block(ctx, ins->branch_target);
+                        ins->branch_target->needs_nop = true;
         }
 }
 
@@ -5075,7 +5047,11 @@ bi_compile_variant_nir(nir_shader *nir,
         if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
                 bi_print_shader(ctx, stdout);
 
-        if (ctx->arch <= 8) {
+        if (ctx->arch >= 9) {
+                va_assign_slots(ctx);
+                va_insert_flow_control_nops(ctx);
+                va_merge_flow(ctx);
+        } else {
                 bi_schedule(ctx);
                 bi_assign_scoreboard(ctx);
 
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c
index ea2e0faf1f4..f8323f93cfb 100644
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -185,12 +185,6 @@ bi_side_effects(const bi_instr *I)
         if (bi_opcode_props[I->op].last)
                 return true;
 
-        /* On Valhall, nontrivial flow control acts as a side effect and should
-         * not be dead code eliminated away.
-         */
-        if (I->flow)
-                return true;
-
         switch (I->op) {
         case BI_OPCODE_DISCARD_F32:
         case BI_OPCODE_DISCARD_B32:
diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c
index e934fc81941..dce06ac80d1 100644
--- a/src/panfrost/bifrost/valhall/va_pack.c
+++ b/src/panfrost/bifrost/valhall/va_pack.c
@@ -793,30 +793,6 @@ va_pack_instr(const bi_instr *I)
    return hex;
 }
 
-static bool
-va_last_in_block(bi_block *block, bi_instr *I)
-{
-   return (I->link.next == &block->instructions);
-}
-
-static bool
-va_should_return(bi_block *block, bi_instr *I)
-{
-   /* Don't return within a block */
-   if (!va_last_in_block(block, I))
-      return false;
-
-   /* Don't return if we're succeeded by instructions */
-   for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
-      bi_block *succ = block->successors[i];
-
-      if (succ && !bi_is_terminal_block(succ))
-         return false;
-   }
-
-   return true;
-}
-
 static unsigned
 va_instructions_in_block(bi_block *block)
 {
@@ -903,15 +879,13 @@ va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I)
 static void
 va_lower_blend(bi_context *ctx)
 {
-   bool last_blend = true;
-
    /* Link register (ABI between fragment and blend shaders) */
    bi_index lr = bi_register(48);
 
    /* Program counter for *next* instruction */
    bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false);
 
-   bi_foreach_instr_global_rev(ctx, I) {
+   bi_foreach_instr_global(ctx, I) {
       if (I->op != BI_OPCODE_BLEND)
          continue;
 
@@ -919,7 +893,7 @@ va_lower_blend(bi_context *ctx)
 
       unsigned prolog_length = 2 * 8;
 
-      if (last_blend)
+      if (I->flow == VA_FLOW_END)
          bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0);
       else
          bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8);
@@ -927,66 +901,8 @@ va_lower_blend(bi_context *ctx)
       bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ);
 
       /* For fixed function: skip the prologue, or return */
-      if (last_blend)
-         I->flow = 0x7 | 0x8; /* .return */
-      else
+      if (I->flow != VA_FLOW_END)
          I->branch_offset = prolog_length;
-
-      /* Iterate backwards makes the last BLEND easy to identify */
-      last_blend = false;
-   }
-}
-
-/*
- * Add a flow control modifier to an instruction. There may be an existing flow
- * control modifier; if so, we need to add a NOP with the extra flow control
- * _after_ this instruction
- */
-static void
-va_add_flow(bi_context *ctx, bi_instr *I, enum va_flow flow)
-{
-   if (I->flow != VA_FLOW_NONE) {
-      bi_builder b = bi_init_builder(ctx, bi_after_instr(I));
-      I = bi_nop(&b);
-   }
-
-   I->flow = flow;
-}
-
-/*
- * Add flow control modifiers to the program. This is a stop gap until we have a
- * proper scheduler. For now, this should be conformant while doing little
- * optimization of message waits.
- */
-static void
-va_lower_flow_control(bi_context *ctx)
-{
-   bi_foreach_block(ctx, block) {
-      bool block_reconverges = bi_reconverge_branches(block);
-
-      bi_foreach_instr_in_block_safe(block, I) {
-         /* If this instruction returns, there is nothing left to do. */
-         if (va_should_return(block, I)) {
-            I->flow = VA_FLOW_END;
-            continue;
-         }
-
-         /* We may need to wait */
-         if (I->op == BI_OPCODE_BARRIER)
-            va_add_flow(ctx, I, VA_FLOW_WAIT);
-         else if (bi_opcode_props[I->op].message)
-            va_add_flow(ctx, I, VA_FLOW_WAIT0);
-
-         /* Lastly, we may need to reconverge. If we need reconvergence, it
-          * has to be on the last instruction of the block. If we have to
-          * generate a NOP for that reconverge, we need that to be last. So
-          * this ordering is careful.
-          */
-         if (va_last_in_block(block, I) && block_reconverges)
-            va_add_flow(ctx, I, VA_FLOW_RECONVERGE);
-
-
-      }
    }
 }
 
@@ -1001,8 +917,6 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission)
    if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend)
       va_lower_blend(ctx);
 
-   va_lower_flow_control(ctx);
-
    bi_foreach_block(ctx, block) {
       bi_foreach_instr_in_block(block, I) {
          if (I->op == BI_OPCODE_BRANCHZ_I16)



More information about the mesa-commit mailing list