[Mesa-dev] [PATCH 5/6] nir: Add a pass for moving SPIR-V continue blocks to the ends of loops

Tue Dec 20 04:11:51 UTC 2016

When shaders come in from SPIR-V, we handle continue blocks by placing
the contents of the continue inside of a "if (!first_iteration)".  We do
this so that we can properly handle the fact that continues in SPIR-V
jump to the continue block at the end of the loop rather than jumping
directly to the top of the loop like they do in NIR.  In particular, the
increment step of a simple for loop ends up in the continue block.  This
pass looks for this case in loops that don't actually have any continues
and moves the continue contents to the end of the loop instead.  We need
this because loop unrolling doesn't work if the increment is inside of a
condition.
---
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/nir.h        |   2 +
 src/compiler/nir/nir_opt_if.c | 253 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 256 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_if.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index a0abede..6f701af 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -239,6 +239,7 @@ NIR_FILES = \
 	nir/nir_opt_dead_cf.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_if.c \
 	nir/nir_opt_loop_unroll.c \
 	nir/nir_opt_peephole_select.c \
 	nir/nir_opt_remove_phis.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a6c8956..7b582a6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2557,6 +2557,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
 
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
+bool nir_opt_if(nir_shader *shader);
+
 bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
 
 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
new file mode 100644
index 0000000..5590684
--- /dev/null
+++ b/src/compiler/nir/nir_opt_if.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_control_flow.h"
+
+/**
+ * This optimization detects if statements at the tops of loops where the
+ * condition is a phi node of two constants and moves half of the if to above
+ * the loop and the other half of the if to the end of the loop.  A simple for
+ * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
+ * ends up looking something like this:
+ *
+ * vec1 32 ssa_0 = load_const (0x00000000)
+ * vec1 32 ssa_1 = load_const (0xffffffff)
+ * loop {
+ *    block block_1:
+ *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
+ *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
+ *    if ssa_2 {
+ *       block block_2:
+ *       vec1 32 ssa_4 = load_const (0x00000001)
+ *       vec1 32 ssa_5 = iadd ssa_2, ssa_4
+ *    } else {
+ *       block block_3:
+ *    }
+ *    block block_4:
+ *    vec1 32 ssa_6 = load_const (0x00000004)
+ *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
+ *    if ssa_7 {
+ *       block block_5:
+ *    } else {
+ *       block block_6:
+ *       break
+ *    }
+ *    block block_7:
+ * }
+ *
+ * This turns it into something like this:
+ *
+ * // Stuff from block 1
+ * // Stuff from block 3
+ * loop {
+ *    block block_1:
+ *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
+ *    vec1 32 ssa_6 = load_const (0x00000004)
+ *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
+ *    if ssa_7 {
+ *       block block_5:
+ *    } else {
+ *       block block_6:
+ *       break
+ *    }
+ *    block block_7:
+ *    // Stuff from block 1
+ *    // Stuff from block 2
+ *    vec1 32 ssa_4 = load_const (0x00000001)
+ *    vec1 32 ssa_5 = iadd ssa_2, ssa_4
+ * }
+ */
+static bool
+opt_peel_loop_initial_if(nir_loop *loop)
+{
+   nir_block *header_block = nir_loop_first_block(loop);
+   nir_block *prev_block =
+      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+
+   /* It would be insane if this were not true */
+   assert(_mesa_set_search(header_block->predecessors, prev_block));
+   /* It must have exactly one "continue" */
+   if (header_block->predecessors->entries != 2)
+      return false;
+
+   nir_block *cont_block = NULL;
+   struct set_entry *pred_entry;
+   set_foreach(header_block->predecessors, pred_entry) {
+      if (pred_entry->key != prev_block)
+         cont_block = (void *)pred_entry->key;
+   }
+
+   nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
+   if (!if_node || if_node->type != nir_cf_node_if)
+      return false;
+
+   nir_if *nif = nir_cf_node_as_if(if_node);
+   assert(nif->condition.is_ssa);
+
+   nir_ssa_def *cond = nif->condition.ssa;
+   if (cond->parent_instr->type != nir_instr_type_phi)
+      return false;
+
+   nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
+   if (cond->parent_instr->block != header_block)
+      return false;
+
+   /* We already know we have exactly one continue */
+   uint32_t entry_val, cont_val;
+   assert(exec_list_length(&cond_phi->srcs) == 2);
+   nir_foreach_phi_src(src, cond_phi) {
+      assert(src->src.is_ssa);
+      nir_const_value *const_src = nir_src_as_const_value(src->src);
+      if (!const_src)
+         return false;
+
+      if (src->pred == cont_block) {
+         cont_val = const_src->u32[0];
+      } else {
+         assert(src->pred == prev_block);
+         entry_val = const_src->u32[0];
+      }
+   }
+
+   /* If they both execute or both don't execute, this is a job for
+    * nir_dead_cf, not this pass.
+    */
+   if ((entry_val && cont_val) || (!entry_val && !cont_val))
+      return false;
+
+   struct exec_list *cont_list, *entry_list;
+   if (cont_val) {
+      cont_list = &nif->then_list;
+      entry_list = &nif->else_list;
+   } else {
+      cont_list = &nif->else_list;
+      entry_list = &nif->then_list;
+   }
+
+   /* We want to be moving the contents of entry_list to above the loop so it
+    * can't contain any break or continue instructions.
+    */
+   foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
+      nir_foreach_block_in_cf_node(block, cf_node) {
+         nir_instr *last_instr = nir_block_last_instr(block);
+         if (last_instr && last_instr->type == nir_instr_type_jump)
+            return false;
+      }
+   }
+
+   /* Before we do anything, convert the loop to LCSSA.  We're about to
+    * replace a bunch of SSA defs with registers and this will prevent any of
+    * it from leaking outside the loop.
+    */
+   nir_convert_loop_to_lcssa(loop);
+
+   nir_block *after_if_block =
+      nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
+
+   /* Get rid of phis in the header block since we will be duplicating it */
+   nir_lower_phis_to_regs_block(header_block);
+   /* Get rid of phis after the if since dominance will change */
+   nir_lower_phis_to_regs_block(after_if_block);
+
+   /* Get rid of SSA defs in the pieces we're about to move around */
+   nir_lower_ssa_defs_to_regs_block(header_block);
+   nir_foreach_block_in_cf_node(block, &nif->cf_node)
+      nir_lower_ssa_defs_to_regs_block(block);
+
+   nir_cf_list header, tmp;
+   nir_cf_extract(&header, nir_before_block(header_block),
+                           nir_after_block(header_block));
+
+   nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
+   nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
+   nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
+                        nir_after_cf_list(entry_list));
+   nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
+
+   nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
+   nir_cf_reinsert(&tmp, nir_after_block_before_jump(cont_block));
+   nir_cf_extract(&tmp, nir_before_cf_list(cont_list),
+                        nir_after_cf_list(cont_list));
+   nir_cf_reinsert(&tmp, nir_after_block_before_jump(cont_block));
+
+   nir_cf_delete(&header);
+   nir_cf_node_remove(&nif->cf_node);
+
+   return true;
+}
+
+static bool
+opt_if_cf_list(struct exec_list *cf_list)
+{
+   bool progress = false;
+   foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
+      switch (cf_node->type) {
+      case nir_cf_node_block:
+         break;
+
+      case nir_cf_node_if: {
+         nir_if *nif = nir_cf_node_as_if(cf_node);
+         progress |= opt_if_cf_list(&nif->then_list);
+         progress |= opt_if_cf_list(&nif->else_list);
+         break;
+      }
+
+      case nir_cf_node_loop: {
+         nir_loop *loop = nir_cf_node_as_loop(cf_node);
+         progress |= opt_if_cf_list(&loop->body);
+         progress |= opt_peel_loop_initial_if(loop);
+         break;
+      }
+
+      case nir_cf_node_function:
+         unreachable("Invalid cf type");
+      }
+   }
+
+   return progress;
+}
+
+bool
+nir_opt_if(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl == NULL)
+         continue;
+
+      if (opt_if_cf_list(&function->impl->body)) {
+         nir_metadata_preserve(function->impl, nir_metadata_none);
+
+         /* If that made progress, we're no longer really in SSA form.  We
+          * need to convert registers back into SSA defs and clean up SSA defs
+          * that don't dominate their uses.
+          */
+         nir_convert_to_ssa_impl(function->impl);
+         progress = true;
+      }
+   }
+
+   return progress;
+}
-- 
2.5.0.400.gff86faf