[Mesa-dev] [PATCH 10/13] nir: add a loop unrolling pass

Timothy Arceri timothy.arceri at collabora.com
Thu Dec 22 02:26:13 UTC 2016


V2:
- tidy ups suggested by Connor.
- tidy up cloning logic and handle copy propagation
 based of suggestion by Connor.
- use nir_ssa_def_rewrite_uses to fix up lcssa phis
  suggested by Connor.
- add support for complex loop unrolling (two terminators)
- handle case were the ssa defs use outside the loop is already a phi
- support unrolling loops with multiple terminators when trip count
  is know for each terminator

V3:
- set correct num_components when creating phi in complex unroll
- rewrite update remap table based on Jasons suggestions.
- remove unrequired extract_loop_body() helper as suggested by Jason.
- simplify the lcssa phi fix up code for simple loops as per Jasons suggestions.
- use mem context to keep track of hash table memory as suggested by Jason.
- move is_{complex,simple}_loop helpers to the unroll code
- require nir_metadata_block_index
- partially rewrote complex unroll to be simpler and easier to follow.

V4:
- use rzalloc() when creating nir_phi_src but not setting pred right away
 fixes regression cause by ralloc() no longer zeroing memory.

V5:
- simplify calling of complex_unroll()
- use new loop terminator fields to get the break/continue from blocks
  and simplify loop unrolling code
- handle slightly less trivial loop terminators. if branches can
  now have instructions but can only contain a single block.
- use nir print type IR snippets in unroll function descriptions
- add better explanation and variable for why we need to clone
  additional times when the second terminator it the limiting
  terminator.
- partially convert out of ssa before unrolling loops (suggested by Jason)

v6:
- remove unused nir_builder
- use Jasons new from ssa helper
- tidy/fixup cursor use
- unroll terminators that contain control flow correctly
- unroll complex loops with control flow before the terminators
  correctly
---
 src/compiler/Makefile.sources          |   1 +
 src/compiler/nir/nir.h                 |   2 +
 src/compiler/nir/nir_opt_loop_unroll.c | 566 +++++++++++++++++++++++++++++++++
 3 files changed, 569 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index f551449..566e246 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -241,6 +241,7 @@ NIR_FILES = \
 	nir/nir_opt_dead_cf.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_loop_unroll.c \
 	nir/nir_opt_peephole_select.c \
 	nir/nir_opt_remove_phis.c \
 	nir/nir_opt_undef.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index fd35bd0..0fd6a77 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2555,6 +2555,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
 
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
+bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+
 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
 
 bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
new file mode 100644
index 0000000..6d27c6b
--- /dev/null
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_control_flow.h"
+#include "nir_loop_analyze.h"
+
+/* Convert the phis from the loops first block and the block that follows the
+ * loop into regs.  Partially converting out of SSA allows us to unroll the
+ * loop without having to keep track of and update phis along the way which
+ * gets tricky and doesn't add much value over conveting to regs.
+ */
+static void
+convert_loop_phis_to_reg(nir_loop *loop,
+                         nir_block *header_blk)
+{
+   nir_lower_phis_to_regs_block(header_blk);
+
+   nir_block *block_after_loop =
+      nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+
+   nir_lower_phis_to_regs_block(block_after_loop);
+}
+
+static void
+get_first_blocks_in_terminator(nir_loop_terminator *term,
+                               nir_block **first_break_block,
+                               nir_block **first_continue_block)
+{
+   if (term->continue_from_then) {
+      *first_continue_block = nir_if_first_then_block(term->nif);
+      *first_break_block = nir_if_first_else_block(term->nif);
+   } else {
+      *first_continue_block = nir_if_first_else_block(term->nif);
+      *first_break_block = nir_if_first_then_block(term->nif);
+   }
+}
+
+/**
+ * Unroll a loop where we know exactly how many iterations there are and there
+ * is only a single exit point.  Note here we can unroll loops with multiple
+ * theoretical exits that only have a single terminating exit that we always
+ * know is the "real" exit.
+ *
+ *     loop {
+ *         ...instrs...
+ *     }
+ *
+ * And the iteration count is 3, the output will be:
+ *
+ *     ...instrs... ...instrs... ...instrs...
+ */
+static void
+simple_unroll(nir_loop *loop)
+{
+   nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
+   assert(nir_is_trivial_loop_if(limiting_term->nif,
+                                 limiting_term->break_block));
+
+   nir_convert_loop_to_lcssa(loop);
+
+   nir_block *header_blk = nir_loop_first_block(loop);
+   convert_loop_phis_to_reg(loop, header_blk);
+
+   /* Skip over loop terminator and get the loop body. */
+   list_for_each_entry(nir_loop_terminator, terminator,
+                       &loop->info->loop_terminator_list,
+                       loop_terminator_link) {
+
+      /* Remove all but the limiting terminator as we know the other exit
+       * conditions can never be met. Note we need to extract any instructions
+       * in the continue from branch and insert then into the loop body before
+       * removing it.
+       */
+      if (terminator->nif != limiting_term->nif) {
+         nir_block *first_break_block;
+         nir_block *first_continue_block;
+         get_first_blocks_in_terminator(terminator, &first_break_block,
+                                        &first_continue_block);
+
+         assert(nir_is_trivial_loop_if(terminator->nif,
+                                       terminator->break_block));
+
+         nir_cf_list continue_from_lst;
+         nir_cf_extract(&continue_from_lst,
+                        nir_before_block(first_continue_block),
+                        nir_after_block(terminator->continue_from_block));
+         nir_cf_reinsert(&continue_from_lst,
+                         nir_after_cf_node(&terminator->nif->cf_node));
+
+         nir_cf_node_remove(&terminator->nif->cf_node);
+      }
+   }
+
+   nir_block *first_break_block;
+   nir_block *first_continue_block;
+   get_first_blocks_in_terminator(limiting_term, &first_break_block,
+                                  &first_continue_block);
+
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(&lp_header, nir_before_block(header_blk),
+                  nir_before_cf_node(&limiting_term->nif->cf_node));
+
+   /* Add the continue from block of the limiting terminator to the loop body
+    */
+   nir_cf_list continue_from_lst;
+   nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
+                  nir_after_block(limiting_term->continue_from_block));
+   nir_cf_reinsert(&continue_from_lst,
+                   nir_after_cf_node(&limiting_term->nif->cf_node));
+
+   /* Pluck out the loop body */
+   nir_cf_list loop_body;
+   nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
+                  nir_after_block(nir_loop_last_block(loop)));
+
+   struct hash_table *remap_table =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   /* Clone the loop header */
+   nir_cf_list cloned_header;
+   nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
+                     remap_table);
+
+   /* Insert cloned loop header before the loop */
+   nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+
+   /* Temp list to store the cloned loop body as we unroll */
+   nir_cf_list unrolled_lp_body;
+
+   /* Clone loop header and append to the loop body */
+   for (unsigned i = 0; i < loop->info->trip_count; i++) {
+      /* Clone loop body */
+      nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
+                        remap_table);
+
+      /* Insert unrolled loop body before the loop */
+      nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
+
+      /* Clone loop header */
+      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
+                        remap_table);
+
+      /* Insert loop header after loop body */
+      nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+   }
+
+   /* Remove the break from the loop terminator and add instructions from
+    * the break block after the unrolled loop.
+    */
+   nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
+   nir_instr_remove(break_instr);
+   nir_cf_list break_list;
+   nir_cf_extract(&break_list, nir_before_block(first_break_block),
+                  nir_after_block(limiting_term->break_block));
+
+   /* Clone so things get properly remapped */
+   nir_cf_list cloned_break_list;
+   nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
+                     remap_table);
+
+   nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
+
+   /* Remove the loop */
+   nir_cf_node_remove(&loop->cf_node);
+
+   /* Delete the original loop body, break block & header */
+   nir_cf_delete(&lp_header);
+   nir_cf_delete(&loop_body);
+   nir_cf_delete(&break_list);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
+
+static void
+move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
+{
+   /* Move the rest of the loop inside the continue-from-block */
+   nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
+
+   /* Remove the break */
+   nir_instr_remove(nir_block_last_instr(term->break_block));
+}
+
+static nir_cursor
+get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
+{
+   if (node->type == nir_cf_node_loop) {
+      return nir_before_cf_node(node);
+   } else {
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+      if (continue_from_then) {
+         return nir_after_block(nir_if_last_then_block(if_stmt));
+      } else {
+         return nir_after_block(nir_if_last_else_block(if_stmt));
+      }
+   }
+}
+
+/**
+ * Unroll a loop with two exists when the trip count of one of the exits is
+ * unknown.  If continue_from_then is true, the loop is repeated only when the
+ * "then" branch of the if is taken; otherwise it is repeated only
+ * when the "else" branch of the if is taken.
+ *
+ * For example, if the input is:
+ *
+ *      loop {
+ *         ...phis/condition...
+ *         if condition {
+ *            ...then instructions...
+ *         } else {
+ *            ...continue instructions...
+ *            break
+ *         }
+ *         ...body...
+ *      }
+ *
+ * And the iteration count is 3, and unlimit_term->continue_from_then is true,
+ * then the output will be:
+ *
+ *      ...condition...
+ *      if condition {
+ *         ...then instructions...
+ *         ...body...
+ *         if condition {
+ *            ...then instructions...
+ *            ...body...
+ *            if condition {
+ *               ...then instructions...
+ *               ...body...
+ *            } else {
+ *               ...continue instructions...
+ *            }
+ *         } else {
+ *            ...continue instructions...
+ *         }
+ *      } else {
+ *         ...continue instructions...
+ *      }
+ */
+static void
+complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
+               bool limiting_term_second)
+{
+   assert(nir_is_trivial_loop_if(unlimit_term->nif,
+                                 unlimit_term->break_block));
+
+   nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
+   assert(nir_is_trivial_loop_if(limiting_term->nif,
+                                 limiting_term->break_block));
+
+   nir_convert_loop_to_lcssa(loop);
+
+   nir_block *header_blk = nir_loop_first_block(loop);
+   convert_loop_phis_to_reg(loop, header_blk);
+
+   nir_cf_list lp_header;
+   nir_cf_list limit_break_list;
+   unsigned num_times_to_clone;
+   if (limiting_term_second) {
+      /* Pluck out the loop header */
+      nir_cf_extract(&lp_header, nir_before_block(header_blk),
+                     nir_before_cf_node(&unlimit_term->nif->cf_node));
+
+      /* We need some special handling when its the second terminator causing
+       * us to exit the loop for example:
+       *
+       *   for (int i = 0; i < uniform_lp_count; i++) {
+       *      colour = vec4(0.0, 1.0, 0.0, 1.0);
+       *
+       *      if (i == 1) {
+       *         break;
+       *      }
+       *      ... any further code is unreachable after i == 1 ...
+       *   }
+       */
+      nir_cf_list after_lt;
+      nir_if *limit_if = limiting_term->nif;
+      nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
+                     nir_after_block(nir_loop_last_block(loop)));
+      move_cf_list_into_loop_term(&after_lt, limiting_term);
+
+      /* Because the trip count is the number of times we pass over the entire
+       * loop before hitting a break when the second terminator is the
+       * limiting terminator we can actually execute code inside the loop when
+       * trip count == 0 e.g. the code above the break.  So we need to bump
+       * the trip_count in order for the code below to clone anything.  When
+       * trip count == 1 we execute the code above the break twice and the
+       * code below it once so we need clone things twice and so on.
+       */
+      num_times_to_clone = loop->info->trip_count + 1;
+   } else {
+      /* Pluck out the loop header */
+      nir_cf_extract(&lp_header, nir_before_block(header_blk),
+                     nir_before_cf_node(&limiting_term->nif->cf_node));
+
+      nir_block *first_break_block;
+      nir_block *first_continue_block;
+      get_first_blocks_in_terminator(limiting_term, &first_break_block,
+                                     &first_continue_block);
+
+      /* Remove the break then extract instructions from the break block so we
+       * can insert them in the innermost else of the unrolled loop.
+       */
+      nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
+      nir_instr_remove(break_instr);
+      nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
+                     nir_after_block(limiting_term->break_block));
+
+      nir_cf_list continue_list;
+      nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
+                     nir_after_block(limiting_term->continue_from_block));
+
+      nir_cf_reinsert(&continue_list,
+                      nir_after_cf_node(&limiting_term->nif->cf_node));
+
+      nir_cf_node_remove(&limiting_term->nif->cf_node);
+
+      num_times_to_clone = loop->info->trip_count;
+   }
+
+   /* In the terminator that we have no trip count for move everything after
+    * the terminator into the continue from branch.
+    */
+   nir_cf_list loop_end;
+   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
+                  nir_after_block(nir_loop_last_block(loop)));
+   move_cf_list_into_loop_term(&loop_end, unlimit_term);
+
+   /* Pluck out the loop body. */
+   nir_cf_list loop_body;
+   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
+                  nir_after_block(nir_loop_last_block(loop)));
+
+   struct hash_table *remap_table =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+    */
+   nir_cf_node *unroll_loc = &loop->cf_node;
+
+   /* Temp lists to store the cloned loop as we unroll */
+   nir_cf_list unrolled_lp_body;
+   nir_cf_list cloned_header;
+
+   for (unsigned i = 0; i < num_times_to_clone; i++) {
+      /* Clone loop header */
+      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
+                        remap_table);
+
+      nir_cursor cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Insert cloned loop header */
+      nir_cf_reinsert(&cloned_header, cursor);
+
+      cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Clone loop body */
+      nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
+                        remap_table);
+
+      unroll_loc = exec_node_data(nir_cf_node,
+                                  exec_list_get_tail(&unrolled_lp_body.list),
+                                  node);
+      assert(unroll_loc->type == nir_cf_node_block &&
+             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+
+      /* Get the unrolled if node */
+      unroll_loc = nir_cf_node_prev(unroll_loc);
+
+      /* Insert unrolled loop body */
+      nir_cf_reinsert(&unrolled_lp_body, cursor);
+   }
+
+   if (!limiting_term_second) {
+      assert(unroll_loc->type == nir_cf_node_if);
+
+      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
+                        remap_table);
+
+      nir_cursor cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Insert cloned loop header */
+      nir_cf_reinsert(&cloned_header, cursor);
+
+      /* Clone so things get properly remapped, and insert break block from
+       * the limiting terminator.
+       */
+      nir_cf_list cloned_break_blk;
+      nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
+                        loop->cf_node.parent, remap_table);
+
+      cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      nir_cf_reinsert(&cloned_break_blk, cursor);
+      nir_cf_delete(&limit_break_list);
+   }
+
+   /* The loop has been unrolled so remove it. */
+   nir_cf_node_remove(&loop->cf_node);
+
+   /* Delete the original loop header and body */
+   nir_cf_delete(&lp_header);
+   nir_cf_delete(&loop_body);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
+
+static bool
+is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
+{
+   unsigned max_iter = shader->options->max_unroll_iterations;
+
+   if (li->trip_count > max_iter)
+      return false;
+
+   if (li->force_unroll)
+      return true;
+
+   bool loop_not_too_large =
+      li->num_instructions * li->trip_count <= max_iter * 25;
+
+   return loop_not_too_large;
+}
+
+static bool
+process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
+{
+   bool progress = false;
+   nir_loop *loop;
+
+   switch (cf_node->type) {
+   case nir_cf_node_block:
+      return progress;
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+      foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
+         progress |= process_loops(sh, nested_node, innermost_loop);
+      foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
+         progress |= process_loops(sh, nested_node, innermost_loop);
+      return progress;
+   }
+   case nir_cf_node_loop: {
+      loop = nir_cf_node_as_loop(cf_node);
+      foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
+         progress |= process_loops(sh, nested_node, innermost_loop);
+      break;
+   }
+   default:
+      unreachable("unknown cf node type");
+   }
+
+   if (*innermost_loop) {
+      /* Don't attempt to unroll outer loops or a second inner loop in
+       * this pass wait until the next pass as we have altered the cf.
+       */
+      *innermost_loop = false;
+
+      if (loop->info->limiting_terminator == NULL)
+         return progress;
+
+      if (!is_loop_small_enough_to_unroll(sh, loop->info))
+         return progress;
+
+      if (loop->info->is_trip_count_known) {
+         simple_unroll(loop);
+         progress = true;
+      } else {
+         /* Attempt to unroll loops with two terminators. */
+         unsigned num_lt = list_length(&loop->info->loop_terminator_list);
+         if (num_lt == 2) {
+            bool limiting_term_second = true;
+            nir_loop_terminator *terminator =
+               list_last_entry(&loop->info->loop_terminator_list,
+                                nir_loop_terminator, loop_terminator_link);
+
+
+            if (terminator->nif == loop->info->limiting_terminator->nif) {
+               limiting_term_second = false;
+               terminator =
+                  list_first_entry(&loop->info->loop_terminator_list,
+                                  nir_loop_terminator, loop_terminator_link);
+            }
+
+            /* If the first terminator has a trip count of zero and is the
+             * limiting terminator just do a simple unroll as the second
+             * terminator can never be reached.
+             */
+            if (loop->info->trip_count == 0 && !limiting_term_second) {
+               simple_unroll(loop);
+            } else {
+               complex_unroll(loop, terminator, limiting_term_second);
+            }
+            progress = true;
+         }
+      }
+   }
+
+   return progress;
+}
+
+static bool
+nir_opt_loop_unroll_impl(nir_function_impl *impl,
+                         nir_variable_mode indirect_mask)
+{
+   bool progress = false;
+   nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
+   nir_metadata_require(impl, nir_metadata_block_index);
+
+   foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+      bool innermost_loop = true;
+      progress |= process_loops(impl->function->shader, node,
+                                &innermost_loop);
+   }
+
+   if (progress)
+      nir_convert_to_ssa_impl(impl);
+
+   return progress;
+}
+
+bool
+nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
+      }
+   }
+   return progress;
+}
-- 
2.9.3



More information about the mesa-dev mailing list