[Mesa-dev] [PATCH 11/12] nir: add a loop unrolling pass

Timothy Arceri timothy.arceri at collabora.com
Sat Aug 27 06:03:33 UTC 2016


---
 src/compiler/Makefile.sources          |   1 +
 src/compiler/nir/nir.h                 |   2 +
 src/compiler/nir/nir_opt_loop_unroll.c | 443 +++++++++++++++++++++++++++++++++
 3 files changed, 446 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 55eb399..a455a99 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -231,6 +231,7 @@ NIR_FILES = \
 	nir/nir_opt_dead_cf.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_loop_unroll.c \
 	nir/nir_opt_peephole_select.c \
 	nir/nir_opt_remove_phis.c \
 	nir/nir_opt_undef.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 0f35a9e..ad7fab3 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2663,6 +2663,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
 
 void nir_opt_gcm(nir_shader *shader);
 
+bool nir_opt_loop_unroll(nir_shader *shader);
+
 bool nir_opt_peephole_select(nir_shader *shader);
 
 bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
new file mode 100644
index 0000000..a34ca56
--- /dev/null
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_control_flow.h"
+
+typedef struct {
+   /* Array of loops */
+   nir_loop *loops;
+
+   /* Array of unroll factors */
+   int *factors;
+} unroll_vector;
+
+typedef struct {
+   /* Array of loop infos for the loop nest */
+   nir_loop_info *li;
+
+   /* List of unroll vectors */
+   struct list_head unroll_vectors;
+
+   nir_shader_compiler_options *options;
+} loop_unroll_state;
+
+static void
+extract_loop_body(nir_cf_list *extracted, nir_cf_node *node, nir_shader *ns)
+{
+   nir_cf_node *end = node;
+   while (!nir_cf_node_is_last(end))
+      end = nir_cf_node_next(end);
+
+   nir_cf_loop_list_extract(extracted, node, end);
+}
+
+static bool
+replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
+{
+   nir_function_impl *impl = void_impl;
+   void *mem_ctx = ralloc_parent(impl);
+
+   nir_ssa_undef_instr *undef =
+      nir_ssa_undef_instr_create(mem_ctx, def->num_components,
+                                 def->bit_size);
+   nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+   nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
+   return true;
+}
+
+static void
+cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl)
+{
+   switch (node->type) {
+   case nir_cf_node_block: {
+      nir_block *block = nir_cf_node_as_block(node);
+      /* We need to walk the instructions and clean up defs/uses */
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type == nir_instr_type_jump) {
+            exec_node_remove(&instr->node);
+         } else {
+            nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
+            nir_instr_remove(instr);
+         }
+      }
+      break;
+   }
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+      foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+         cleanup_cf_node(child, impl);
+      foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+         cleanup_cf_node(child, impl);
+
+      list_del(&if_stmt->condition.use_link);
+      break;
+   }
+   default:
+      unreachable("Invalid CF node type");
+   }
+}
+
+static void
+clone_list(nir_shader *ns, nir_loop *loop, nir_cf_list *src_cf_list,
+           nir_cf_list *cloned_cf_list, struct hash_table *remap_table,
+           struct hash_table *phi_remap)
+{
+   /* Dest list needs to at least have one block */
+   nir_block *nblk = nir_block_create(ns);
+   nblk->cf_node.parent = loop->cf_node.parent;
+   exec_list_push_tail(&cloned_cf_list->list, &nblk->cf_node.node);
+
+   nir_clone_loop_list(&cloned_cf_list->list, &src_cf_list->list,
+                       remap_table, phi_remap, ns);
+}
+
+static void
+remove_unrolled_loop(nir_cf_node *loop, nir_block *block_before_loop,
+                     struct hash_table *remap_table,
+                     struct hash_table *phi_remap,
+                     nir_function_impl *impl)
+{
+   /* Fixup LCSSA-phi srcs */
+   nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(loop));
+   nir_cf_node *cf_node = nir_cf_node_next(loop);
+   assert(cf_node->type == nir_cf_node_block);
+
+   nir_block *block = nir_cf_node_as_block(cf_node);
+   nir_foreach_instr_safe(instr, block) {
+      if (instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(instr);
+         assert(phi->is_lcssa_phi);
+
+         if (nir_cf_node_as_loop(loop)->info->trip_count != 0) {
+            nir_foreach_phi_src_safe(src, phi) {
+               /* Update predecessor */
+               src->pred = prev_block;
+
+               /* Update src */
+               struct hash_entry *hte =
+                  _mesa_hash_table_search(phi_remap, src->src.ssa);
+               if (hte) {
+                  nir_src new_src = nir_src_for_ssa((nir_ssa_def *) hte->data);
+                  nir_instr_rewrite_src(instr, &src->src, new_src);
+               } else {
+                  /* If the LCSSA src was not a phi fallback to following the
+                   * the remappings in the remap table.
+                   */
+                  nir_ssa_def *ssa_def = src->src.ssa;
+                  while ((hte = _mesa_hash_table_search(remap_table,
+                                                        ssa_def))) {
+                     ssa_def = (nir_ssa_def *) hte->data;
+                  }
+
+                  if (ssa_def != src->src.ssa){
+                     nir_src new_src = nir_src_for_ssa(ssa_def);
+                     nir_instr_rewrite_src(instr, &src->src, new_src);
+                  }
+               }
+            }
+         }
+         nir_opt_remove_phi(phi);
+      } else {
+         /* There should be no more LCSSA-phis */
+         break;
+      }
+   }
+
+   /* After cloning the loop and fixing up the LCSSA-phis we need to remove
+    * any obsolete phis at the beginning of the new unrolled block. To avoid
+    * failing validation.
+    */
+   nir_foreach_instr_safe(instr, block_before_loop) {
+      if (instr->type != nir_instr_type_phi)
+         continue;
+
+      /* Replace phi */
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_opt_remove_phi(phi);
+   }
+
+   /* Remove the loop */
+   nir_loop *nir_loop = nir_cf_node_as_loop(loop);
+   foreach_list_typed(nir_cf_node, child, node, &nir_loop->body)
+      cleanup_cf_node(child, impl);
+
+   exec_node_remove(&loop->node);
+
+   stitch_blocks(prev_block, block);
+}
+
+/**
+ * Unroll a loop which does not contain any jumps.  For example, if the input
+ * is:
+ *
+ *     (loop (...) ...instrs...)
+ *
+ * And the iteration count is 3, the output will be:
+ *
+ *     ...instrs... ...instrs... ...instrs...
+ */
+static void
+simple_unroll(nir_function *fn, nir_loop *loop, nir_builder *b)
+{
+   nir_shader *ns = fn->shader;
+   nir_block *prev_block =
+      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+
+   struct exec_node *loop_node = exec_list_get_head(&loop->body);
+   nir_cf_node *lp_header_cf_node = exec_node_data(nir_cf_node, loop_node,
+                                                   node);
+
+   struct hash_table *remap_table =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   struct hash_table *phi_remap =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   /* Get the loop header this contains a bunch of phis and the loops
+    * conditional.
+    */
+   assert(lp_header_cf_node->type == nir_cf_node_block);
+   nir_block *loop_header_blk = nir_cf_node_as_block(lp_header_cf_node);
+
+   /* Remove any phi preds that are from the loop itself nir_opt_remove_phi()
+    * will clean this up further for us.
+    */
+   nir_foreach_instr_safe(instr, loop_header_blk) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_foreach_phi_src_safe(src, phi) {
+         /* Is the pred from the block itself? */
+         if (src->pred->index > phi->instr.block->index &&
+             src->pred->cf_node.parent == &loop->cf_node) {
+
+            /* Store the phi dest and src to be used for remaping */
+            _mesa_hash_table_insert(phi_remap, &phi->dest.ssa, src->src.ssa);
+
+            list_del(&src->src.use_link);
+            exec_node_remove(&src->node);
+         }
+      }
+   }
+
+   /* Skip over if node (loop condition) and get the real loop body we need
+    * to do this now because we are about to start messing with the control
+    * flow.
+    */
+   nir_cf_node *if_node = nir_cf_node_next(lp_header_cf_node);
+   nir_cf_node *cf_node = nir_cf_node_next(if_node);
+   assert(cf_node->type == nir_cf_node_block);
+
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(&lp_header, nir_before_cf_node(lp_header_cf_node),
+                  nir_after_cf_node(lp_header_cf_node));
+
+   loop_node = exec_list_get_head(&loop->body);
+   lp_header_cf_node = exec_node_data(nir_cf_node, loop_node, node);
+
+   /* Extract phis from loop header and place before the loop */
+   nir_cf_list lp_header_phis;
+   nir_cf_loop_list_extract(&lp_header_phis, lp_header_cf_node,
+                            lp_header_cf_node);
+   b->cursor = nir_before_cf_node(&loop->cf_node);
+   nir_cf_reinsert(&lp_header_phis, b->cursor);
+
+   /* Pluck out the loop body */
+   nir_cf_list loop_body;
+   extract_loop_body(&loop_body, cf_node, ns);
+
+   /* Initialise remap table with phi remap table */
+   struct hash_entry *hte;
+   hash_table_foreach(phi_remap, hte) {
+      _mesa_hash_table_insert(remap_table, hte->key, hte->data);
+   }
+
+   /* Clone loop header and append to the loop body */
+   if (loop->info->trip_count > 0) {
+      nir_cf_list cloned_header;
+      exec_list_make_empty(&cloned_header.list);
+      cloned_header.impl = loop_body.impl;
+
+      clone_list(ns, loop, &lp_header, &cloned_header, remap_table, phi_remap);
+
+      /* Append loop body with header instructions */
+      struct exec_node *lp_body_node = exec_list_get_head(&loop_body.list);
+
+      nir_cf_node *end = exec_node_data(nir_cf_node, lp_body_node, node);
+      while (!nir_cf_node_is_last(end))
+         end = nir_cf_node_next(end);
+
+      nir_block *lp_body_blk = nir_cf_node_as_block(end);
+
+      struct exec_node *lp_header_node =
+         exec_list_get_head(&cloned_header.list);
+      nir_block *lp_header_blk =
+         nir_cf_node_as_block(exec_node_data(nir_cf_node, lp_header_node,
+                                             node));
+
+      foreach_list_typed(nir_instr, instr, node, &lp_header_blk->instr_list) {
+         instr->block = lp_body_blk;
+      }
+
+      exec_list_append(&lp_body_blk->instr_list, &lp_header_blk->instr_list);
+   }
+
+   /* Reinsert original header before the loop */
+   nir_cf_reinsert(&lp_header, b->cursor);
+
+   nir_cf_list *prev_lp = &loop_body;
+
+   /* Create temp array to store the cloned loop body as we unroll */
+   nir_cf_list unrolled_lp_body[2];
+   exec_list_make_empty(&unrolled_lp_body[0].list);
+   exec_list_make_empty(&unrolled_lp_body[1].list);
+   unrolled_lp_body[0].impl = loop_body.impl;
+   unrolled_lp_body[1].impl = loop_body.impl;
+
+   for (unsigned i = 1; i < loop->info->trip_count; i++) {
+
+      /* Clone the loop body */
+      if (exec_list_is_empty(&unrolled_lp_body[0].list)){
+         clone_list(ns, loop, prev_lp, &unrolled_lp_body[0], remap_table,
+                    phi_remap);
+
+         /* Insert unrolled loop body before the loop. */
+         b->cursor = nir_before_cf_node(&loop->cf_node);
+         nir_cf_reinsert(prev_lp, b->cursor);
+
+         prev_lp = &unrolled_lp_body[0];
+      } else {
+         clone_list(ns, loop, prev_lp, &unrolled_lp_body[1], remap_table,
+                    phi_remap);
+
+         /* Insert unrolled loop body before the loop. */
+         b->cursor = nir_before_cf_node(&loop->cf_node);
+         nir_cf_reinsert(prev_lp, b->cursor);
+
+         prev_lp = &unrolled_lp_body[1];
+      }
+   }
+
+   b->cursor = nir_before_cf_node(&loop->cf_node);
+   if (loop->info->trip_count != 0) {
+      nir_cf_reinsert(prev_lp, b->cursor);
+   } else {
+      /* Its possible to end up with a trip count of zero. For example:
+       *
+       *    int 0;
+       *    do res = res.yzwx;  while (++i < 1);
+       *
+       * In this case we simple delete the loop and fix up the LCSSA phis.
+       */
+      nir_cf_delete(prev_lp);
+   }
+
+   /* The loop has been unrolled so remove it. */
+   remove_unrolled_loop(&loop->cf_node, prev_block, remap_table, phi_remap,
+                        fn->impl);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+   _mesa_hash_table_destroy(phi_remap, NULL);
+}
+
+static bool
+process_loops(nir_cf_node *cf_node, nir_builder *b, bool *innermost_loop)
+{
+   bool progress = false;
+   nir_loop *loop;
+
+   switch (cf_node->type) {
+   case nir_cf_node_block:
+      return progress;
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+      foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
+         progress |= process_loops(nested_node, b, innermost_loop);
+      foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
+         progress |= process_loops(nested_node, b, innermost_loop);
+      return progress;
+   }
+   case nir_cf_node_loop: {
+      loop = nir_cf_node_as_loop(cf_node);
+      foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
+         progress |= process_loops(nested_node, b, innermost_loop);
+      break;
+   }
+   default:
+      unreachable("unknown cf node type");
+   }
+
+   if (*innermost_loop) {
+      nir_function *fn = nir_cf_node_get_function(&loop->cf_node)->function;
+      if (is_simple_for_loop(fn->shader, loop->info)) {
+         simple_unroll(fn, loop, b);
+         progress = true;
+         /* Don't unroll outer loops loops until after loop analysis have run
+          * again.
+          */
+         *innermost_loop = false;
+      } else {
+         /* We couldn't unroll the innermost loop so don't try to unroll the
+          * outerloop.
+          */
+         *innermost_loop = false;
+      }
+   }
+
+   return progress;
+}
+
+static bool
+nir_opt_loop_unroll_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+   nir_metadata_require(impl, nir_metadata_loop_analysis);
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+      bool innermost_loop = true;
+      progress |= process_loops(node, &b, &innermost_loop);
+   }
+
+   return progress;
+}
+
+bool
+nir_opt_loop_unroll(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= nir_opt_loop_unroll_impl(function->impl);
+   }
+   return false;
+}
-- 
2.7.4



More information about the mesa-dev mailing list