Mesa (staging/20.3): nir/opt_if: split ALU from Phi more aggressively

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 18 17:58:26 UTC 2020


Module: Mesa
Branch: staging/20.3
Commit: 8232610da2dc2289950143718560afb28980c56e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8232610da2dc2289950143718560afb28980c56e

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Wed Dec 16 10:33:59 2020 +0100

nir/opt_if: split ALU from Phi more aggressively

If the only user is a trivial bcsel which in a second step
can be turned into a phi, this conversion is also worth it
even if the previous result is not undefined or constant.
Allows for some more loop unrolling or saves a few instructions.

Totals from 62 (0.04% of 139391) affected shaders (NAVI10):
SGPRs: 4976 -> 4992 (+0.32%)
VGPRs: 4408 -> 4472 (+1.45%); split: -0.45%, +1.91%
CodeSize: 453632 -> 464000 (+2.29%); split: -0.32%, +2.60%
MaxWaves: 527 -> 511 (-3.04%); split: +0.38%, -3.42%
Instrs: 84940 -> 86681 (+2.05%); split: -0.36%, +2.41%
Cycles: 11946844 -> 11783708 (-1.37%); split: -1.40%, +0.04%
VMEM: 9403 -> 10357 (+10.15%); split: +11.59%, -1.45%
SMEM: 3003 -> 3025 (+0.73%); split: +1.07%, -0.33%
VClause: 1756 -> 1997 (+13.72%); split: -0.11%, +13.84%
SClause: 2914 -> 2915 (+0.03%); split: -0.10%, +0.14%
Copies: 6426 -> 6768 (+5.32%); split: -4.14%, +9.46%
Branches: 2105 -> 2102 (-0.14%); split: -1.66%, +1.52%
PreSGPRs: 2921 -> 2909 (-0.41%); split: -0.55%, +0.14%
PreVGPRs: 4151 -> 4179 (+0.67%); split: -0.24%, +0.92%

cc: mesa-stable

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8123>
(cherry picked from commit 8513b12590cf65f77c16a59774de2d268e5de5f9)

---

 .pick_status.json             |   2 +-
 src/compiler/nir/nir_opt_if.c | 160 ++++++++++++++++++++++++------------------
 2 files changed, 93 insertions(+), 69 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 2f50b7030d7..f26284ababa 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -373,7 +373,7 @@
         "description": "nir/opt_if: split ALU from Phi more aggressively",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 31195b12bb7..e25c66f21b8 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -295,6 +295,39 @@ alu_instr_is_type_conversion(const nir_alu_instr *alu)
           nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0];
 }
 
+static bool
+is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)
+{
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
+   if (bcsel->op != nir_op_bcsel &&
+       bcsel->op != nir_op_b32csel &&
+       bcsel->op != nir_op_fcsel)
+      return false;
+
+   for (unsigned i = 0; i < 3; i++) {
+      if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
+          bcsel->src[i].src.ssa->parent_instr->block != instr->block)
+         return false;
+
+      if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) {
+         /* opt_split_alu_of_phi() is able to peel that src from the loop */
+         if (i == 0 || !allow_non_phi_src)
+            return false;
+         allow_non_phi_src = false;
+      }
+   }
+
+   nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) {
+      if (!nir_src_is_const(src->src))
+         return false;
+   }
+
+   return true;
+}
+
 /**
  * Splits ALU instructions that have a source that is a phi node
  *
@@ -306,12 +339,13 @@ alu_instr_is_type_conversion(const nir_alu_instr *alu)
  *
  * - At least one source of the instruction is a phi node from the header block.
  *
- * - The phi node selects a constant or undef from the block before the loop.
- *
  * - Any non-phi sources of the ALU instruction come from a block that
  *   dominates the block before the loop.  The most common failure mode for
  *   this check is sources that are generated in the loop header block.
  *
+ * - The phi node selects a constant or undef from the block before the loop or
+ *   the only ALU user is a trivial bcsel that gets removed by peeling the ALU
+ *
  * The split process splits the original ALU instruction into two, one at the
  * bottom of the loop and one at the block before the loop. The instruction
  * before the loop computes the value on the first iteration, and the
@@ -450,60 +484,72 @@ opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)
          }
       }
 
-      if (has_phi_src_from_prev_block && all_non_phi_exist_in_prev_block &&
-          (is_prev_result_undef || is_prev_result_const)) {
-         nir_block *const continue_block = find_continue_block(loop);
+      if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block)
+         continue;
 
-         b->cursor = nir_after_block(prev_block);
-         nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
+      if (!is_prev_result_undef && !is_prev_result_const) {
+         /* check if the only user is a trivial bcsel */
+         if (!list_is_empty(&alu->dest.dest.ssa.if_uses) ||
+             !list_is_singular(&alu->dest.dest.ssa.uses))
+            continue;
 
-         /* Make a copy of the original ALU instruction.  Replace the sources
-          * of the new instruction that read a phi with an undef source from
-          * prev_block with the non-undef source of that phi.
-          *
-          * Insert the new instruction at the end of the continue block.
-          */
-         b->cursor = nir_after_block_before_jump(continue_block);
+         nir_src *use = list_first_entry(&alu->dest.dest.ssa.uses, nir_src, use_link);
+         if (!is_trivial_bcsel(use->parent_instr, true))
+            continue;
+      }
+
+      /* Split ALU of Phi */
+      nir_block *const continue_block = find_continue_block(loop);
 
-         nir_ssa_def *const alu_copy =
-            clone_alu_and_replace_src_defs(b, alu, continue_srcs);
+      b->cursor = nir_after_block(prev_block);
+      nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
 
-         /* Make a new phi node that selects a value from prev_block and the
-          * result of the new instruction from continue_block.
-          */
-         nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
-         nir_phi_src *phi_src;
+      /* Make a copy of the original ALU instruction.  Replace the sources
+       * of the new instruction that read a phi with an undef source from
+       * prev_block with the non-undef source of that phi.
+       *
+       * Insert the new instruction at the end of the continue block.
+       */
+      b->cursor = nir_after_block_before_jump(continue_block);
+
+      nir_ssa_def *const alu_copy =
+         clone_alu_and_replace_src_defs(b, alu, continue_srcs);
+
+      /* Make a new phi node that selects a value from prev_block and the
+       * result of the new instruction from continue_block.
+       */
+      nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
+      nir_phi_src *phi_src;
 
-         phi_src = ralloc(phi, nir_phi_src);
-         phi_src->pred = prev_block;
-         phi_src->src = nir_src_for_ssa(prev_value);
-         exec_list_push_tail(&phi->srcs, &phi_src->node);
+      phi_src = ralloc(phi, nir_phi_src);
+      phi_src->pred = prev_block;
+      phi_src->src = nir_src_for_ssa(prev_value);
+      exec_list_push_tail(&phi->srcs, &phi_src->node);
 
-         phi_src = ralloc(phi, nir_phi_src);
-         phi_src->pred = continue_block;
-         phi_src->src = nir_src_for_ssa(alu_copy);
-         exec_list_push_tail(&phi->srcs, &phi_src->node);
+      phi_src = ralloc(phi, nir_phi_src);
+      phi_src->pred = continue_block;
+      phi_src->src = nir_src_for_ssa(alu_copy);
+      exec_list_push_tail(&phi->srcs, &phi_src->node);
 
-         nir_ssa_dest_init(&phi->instr, &phi->dest,
-                           alu_copy->num_components, alu_copy->bit_size, NULL);
+      nir_ssa_dest_init(&phi->instr, &phi->dest,
+                        alu_copy->num_components, alu_copy->bit_size, NULL);
 
-         b->cursor = nir_after_phis(header_block);
-         nir_builder_instr_insert(b, &phi->instr);
+      b->cursor = nir_after_phis(header_block);
+      nir_builder_instr_insert(b, &phi->instr);
 
-         /* Modify all readers of the original ALU instruction to read the
-          * result of the phi.
-          */
-         nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
-                                  nir_src_for_ssa(&phi->dest.ssa));
+      /* Modify all readers of the original ALU instruction to read the
+       * result of the phi.
+       */
+      nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+                               nir_src_for_ssa(&phi->dest.ssa));
 
-         /* Since the original ALU instruction no longer has any readers, just
-          * remove it.
-          */
-         nir_instr_remove_v(&alu->instr);
-         ralloc_free(alu);
+      /* Since the original ALU instruction no longer has any readers, just
+       * remove it.
+       */
+      nir_instr_remove_v(&alu->instr);
+      ralloc_free(alu);
 
-         progress = true;
-      }
+      progress = true;
    }
 
    return progress;
@@ -624,32 +670,10 @@ opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
     */
    nir_foreach_instr_safe(instr, header_block) {
-      if (instr->type != nir_instr_type_alu)
+      if (!is_trivial_bcsel(instr, false))
          continue;
 
       nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
-      if (bcsel->op != nir_op_bcsel &&
-          bcsel->op != nir_op_b32csel &&
-          bcsel->op != nir_op_fcsel)
-         continue;
-
-      bool match = true;
-      for (unsigned i = 0; i < 3; i++) {
-         /* FINISHME: The abs, negate and swizzled cases could be handled by
-          * adding move instructions at the bottom of the continue block and
-          * more phi nodes in the header_block.
-          */
-         if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
-             bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi ||
-             bcsel->src[i].src.ssa->parent_instr->block != header_block) {
-            match = false;
-            break;
-         }
-      }
-
-      if (!match)
-         continue;
-
       nir_phi_instr *const cond_phi =
          nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);
 



More information about the mesa-commit mailing list