[Mesa-dev] [PATCH 08/10] nir: add a loop unrolling pass
Timothy Arceri
timothy.arceri at collabora.com
Tue Dec 6 01:12:26 UTC 2016
V2:
- tidy ups suggested by Connor.
- tidy up cloning logic and handle copy propagation
based of suggestion by Connor.
- use nir_ssa_def_rewrite_uses to fix up lcssa phis
suggested by Connor.
- add support for complex loop unrolling (two terminators)
- handle case were the ssa defs use outside the loop is already a phi
- support unrolling loops with multiple terminators when trip count
is know for each terminator
V3:
- set correct num_components when creating phi in complex unroll
- rewrite update remap table based on Jasons suggestions.
- remove unrequired extract_loop_body() helper as suggested by Jason.
- simplify the lcssa phi fix up code for simple loops as per Jasons suggestions.
- use mem context to keep track of hash table memory as suggested by Jason.
- move is_{complex,simple}_loop helpers to the unroll code
- require nir_metadata_block_index
- partially rewrote complex unroll to be simpler and easier to follow.
V4:
- use rzalloc() when creating nir_phi_src but not setting pred right away
fixes regression cause by ralloc() no longer zeroing memory.
---
src/compiler/Makefile.sources | 1 +
src/compiler/nir/nir.h | 2 +
src/compiler/nir/nir_opt_loop_unroll.c | 729 +++++++++++++++++++++++++++++++++
3 files changed, 732 insertions(+)
create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index d3e158a..799fb38 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -238,6 +238,7 @@ NIR_FILES = \
nir/nir_opt_dead_cf.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
+ nir/nir_opt_loop_unroll.c \
nir/nir_opt_peephole_select.c \
nir/nir_opt_remove_phis.c \
nir/nir_opt_undef.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index d948e97..b8813e4 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2573,6 +2573,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
bool nir_opt_gcm(nir_shader *shader, bool value_number);
+bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
new file mode 100644
index 0000000..8715757
--- /dev/null
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_control_flow.h"
+
+static bool
+is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
+{
+ unsigned max_iter = shader->options->max_unroll_iterations;
+
+ if (li->trip_count > max_iter)
+ return false;
+
+ if (li->force_unroll)
+ return true;
+
+ bool loop_not_too_large =
+ li->num_instructions * li->trip_count <= max_iter * 25;
+
+ return loop_not_too_large;
+}
+
+static bool
+is_complex_loop(nir_shader *shader, nir_loop_info *li)
+{
+ unsigned num_lt = list_length(&li->loop_terminator_list);
+ return is_loop_small_enough_to_unroll(shader, li) && num_lt == 2;
+}
+
+static bool
+is_simple_loop(nir_shader *shader, nir_loop_info *li)
+{
+ return li->is_trip_count_known &&
+ is_loop_small_enough_to_unroll(shader, li);
+}
+
+static void
+move_cf_list_into_if(nir_cf_list *lst, nir_cf_node *if_node,
+ nir_block *last_blk, bool continue_from_then_branch)
+{
+ nir_if *if_stmt = nir_cf_node_as_if(if_node);
+ if (continue_from_then_branch) {
+ /* Move the rest of the loop inside the then */
+ nir_cf_reinsert(lst, nir_after_block(nir_if_last_then_block(if_stmt)));
+ } else {
+ /* Move the rest of the loop inside the else */
+ nir_cf_reinsert(lst, nir_after_block(nir_if_last_else_block(if_stmt)));
+ }
+
+ /* Remove the break */
+ nir_instr_remove(nir_block_last_instr(last_blk));
+}
+
+static void
+create_remap_tables(void *mem_ctx, nir_loop *loop, nir_block *loop_header_blk,
+ struct hash_table **remap_table,
+ struct hash_table **phi_remap)
+{
+ *remap_table = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ *phi_remap = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ /* Build table to be used for remapping as we unroll. */
+ nir_foreach_instr(instr, loop_header_blk) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src(src, phi) {
+ /* Add src to remap table if it's pred is from before the loop, these
+ * srcs will be used in the first iteration of cloning the loop.
+ */
+ if (src->pred->index < phi->instr.block->index) {
+ _mesa_hash_table_insert(*remap_table, &phi->dest.ssa,
+ src->src.ssa);
+ }
+ }
+ }
+}
+
+static void
+update_remap_table(nir_loop *loop, struct hash_table *remap_table,
+ struct hash_table *phi_remap)
+{
+ nir_block *header_block = nir_loop_first_block(loop);
+ nir_foreach_instr(instr, header_block) {
+
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src(src, phi) {
+ if (src->pred->index < phi->instr.block->index)
+ continue;
+
+ struct hash_entry *remap_he =
+ _mesa_hash_table_search(remap_table, src->src.ssa);
+ if (remap_he) {
+ _mesa_hash_table_insert(phi_remap, &phi->dest.ssa, remap_he->data);
+ } else {
+ _mesa_hash_table_insert(phi_remap, &phi->dest.ssa, src->src.ssa);
+ }
+ }
+ }
+
+ struct hash_entry *phi_entry;
+ hash_table_foreach(phi_remap, phi_entry)
+ _mesa_hash_table_insert(remap_table, phi_entry->key, phi_entry->data);
+}
+
+static void
+insert_phi_and_set_block_on_uses(nir_builder *b, nir_phi_instr *phi_instr)
+{
+ nir_instr_insert(b->cursor, &phi_instr->instr);
+
+ /* Now that we have inserted the phi fix up the block for its uses. */
+ nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) {
+ nir_phi_instr *use_phi = nir_instr_as_phi(use_src->parent_instr);
+
+ foreach_list_typed(nir_phi_src, src, node, &use_phi->srcs) {
+ if (!src->pred)
+ src->pred = phi_instr->dest.ssa.parent_instr->block;
+ }
+ }
+}
+
+static nir_phi_instr *
+create_complex_unroll_phi(nir_shader *ns, nir_phi_instr *prev_phi_instr)
+{
+ nir_phi_instr *new_phi = nir_phi_instr_create(ns);
+ nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
+ prev_phi_instr->dest.ssa.num_components,
+ prev_phi_instr->dest.ssa.bit_size, NULL);
+
+ /* Add the new phi as a src to the phi from the previous iteration */
+ nir_phi_src *new_src = rzalloc(prev_phi_instr, nir_phi_src);
+ new_src->src = nir_src_for_ssa(&new_phi->dest.ssa);
+ new_src->src.parent_instr = &prev_phi_instr->instr;
+ exec_list_push_tail(&prev_phi_instr->srcs, &new_src->node);
+ list_addtail(&new_src->src.use_link, &new_src->src.ssa->uses);
+
+ return new_phi;
+}
+
+static void
+add_complex_unroll_phi_src(nir_ssa_def *phi_src, nir_phi_instr *phi_instr,
+ struct hash_table *remap_table, nir_block *blk)
+{
+ struct hash_entry *hte =
+ _mesa_hash_table_search(remap_table, phi_src);
+
+ nir_phi_src *new_src = ralloc(phi_instr, nir_phi_src);
+ nir_ssa_def *ssa_def = hte ? (nir_ssa_def *) hte->data : phi_src;
+ new_src->pred = blk;
+ new_src->src = nir_src_for_ssa(ssa_def);
+ new_src->src.parent_instr = &phi_instr->instr;
+ list_addtail(&new_src->src.use_link, &new_src->src.ssa->uses);
+
+ exec_list_push_tail(&phi_instr->srcs, &new_src->node);
+}
+
+static void
+simple_loop_fix_lcssa_phis(nir_cf_node *loop, struct hash_table *remap_table)
+{
+ nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(loop));
+ nir_cf_node *cf_node = nir_cf_node_next(loop);
+ assert(cf_node->type == nir_cf_node_block);
+
+ nir_block *block = nir_cf_node_as_block(cf_node);
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_phi) {
+ /* There should be no more phis */
+ break;
+ }
+
+ /* Simple loops should only ever have a single exit point so we can
+ * simply rewrite the phis uses to whatever is in the remap table or
+ * the src before the loop (if it had 0 iterations) and be done.
+ */
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src(src, phi) {
+ /* Update predecessor */
+ src->pred = prev_block;
+
+ struct hash_entry *hte =
+ _mesa_hash_table_search(remap_table, src->src.ssa);
+ if (hte) {
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+ nir_src_for_ssa((nir_ssa_def *) hte->data));
+ } else {
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa, src->src);
+ }
+ }
+ assert(list_empty(&phi->dest.ssa.uses));
+ }
+}
+
+static bool
+ends_in_break(nir_block *block)
+{
+ if (exec_list_is_empty(&block->instr_list))
+ return false;
+
+ nir_instr *instr = nir_block_last_instr(block);
+ return instr->type == nir_instr_type_jump &&
+ nir_instr_as_jump(instr)->type == nir_jump_break;
+}
+
+/**
+ * Unroll a loop which does not contain any jumps. For example, if the input
+ * is:
+ *
+ * (loop (...) ...instrs...)
+ *
+ * And the iteration count is 3, the output will be:
+ *
+ * ...instrs... ...instrs... ...instrs...
+ */
+static void
+simple_unroll(nir_loop *loop, nir_builder *b)
+{
+ void *mem_ctx = ralloc_context(NULL);
+
+ nir_convert_loop_to_lcssa(loop);
+
+ /* Get the loop header this contains a bunch of phis and the loops
+ * conditional.
+ */
+ nir_block *header_blk = nir_loop_first_block(loop);
+
+ struct hash_table *remap_table;
+ struct hash_table *phi_remap;
+ create_remap_tables(mem_ctx, loop, header_blk, &remap_table, &phi_remap);
+
+ /* Skip over loop terminator and get the loop body. */
+ nir_cf_node *if_node = &loop->info->limiting_terminator->nif->cf_node;
+ list_for_each_entry(nir_loop_terminator, terminator,
+ &loop->info->loop_terminator_list, loop_terminator_link) {
+ nir_cf_node *loop_node = &terminator->nif->cf_node;
+
+ /* Remove all but the limiting terminator as we know the other exit
+ * conditions can never be met.
+ */
+ if (loop_node != &loop->info->limiting_terminator->nif->cf_node) {
+ nir_cf_node_remove(loop_node);
+ }
+ }
+
+ /* Pluck out the loop header */
+ nir_cf_list lp_header;
+ nir_cf_extract(&lp_header, nir_before_block(header_blk),
+ nir_before_cf_node(if_node));
+
+ /* Pluck out the loop body */
+ nir_cf_list loop_body;
+ nir_cf_extract(&loop_body, nir_after_cf_node(if_node),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ /* Clone the loop header */
+ nir_cf_list cloned_header;
+ nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
+ remap_table);
+
+ /* Insert cloned loop header before the loop */
+ b->cursor = nir_before_cf_node(&loop->cf_node);
+ nir_cf_reinsert(&cloned_header, b->cursor);
+
+ /* Temp list to store the cloned loop body as we unroll */
+ nir_cf_list unrolled_lp_body;
+
+ /* Clone loop header and append to the loop body */
+ for (unsigned i = 0; i < loop->info->trip_count; i++) {
+ /* Clone loop body */
+ nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
+ remap_table);
+
+ update_remap_table(loop, remap_table, phi_remap);
+
+ /* Insert unrolled loop body before the loop */
+ b->cursor = nir_before_cf_node(&loop->cf_node);
+ nir_cf_reinsert(&unrolled_lp_body, b->cursor);
+
+ /* Clone loop header */
+ nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
+ remap_table);
+
+ /* Insert loop header after loop body */
+ b->cursor = nir_before_cf_node(&loop->cf_node);
+ nir_cf_reinsert(&cloned_header, b->cursor);
+ }
+
+ /* The loop has been unrolled so remove it. */
+ simple_loop_fix_lcssa_phis(&loop->cf_node, remap_table);
+
+ /* Remove the loop */
+ nir_cf_node_remove(&loop->cf_node);
+
+ /* Delete the original loop body & header */
+ nir_cf_delete(&lp_header);
+ nir_cf_delete(&loop_body);
+
+ ralloc_free(mem_ctx);
+}
+
+typedef struct {
+ /* This field is updated at each interation as we unroll. */
+ nir_phi_instr *phi;
+
+ /* This is the phi src of the loop terminator with an unknown trip count */
+ nir_ssa_def *phi_src_def;
+
+ /* These fields are used to fix things up after we have unrolled. */
+ nir_phi_instr *old_loop_phi;
+ nir_ssa_def *outermost_phi_def;
+ nir_ssa_def *innermost_phi_src_def;
+
+ struct list_head list_link;
+} loop_term_phi_info;
+
+/**
+ * Unroll a loop with two exists when the trip count of one of the exits is
+ * unknown. If continue_from_then_branch is true, the loop is repeated only
+ * when the "then" branch of the if is taken; otherwise it is repeated only
+ * when the "else" branch of the if is taken.
+ *
+ * For example, if the input is:
+ *
+ * (loop (...)
+ * ...body...
+ * (if (cond)
+ * (...then_instrs...)
+ * (...else_instrs...)))
+ *
+ * And the iteration count is 3, and \c continue_from_then_branch is true,
+ * then the output will be:
+ *
+ * ...body...
+ * (if (cond)
+ * (...then_instrs...
+ * ...body...
+ * (if (cond)
+ * (...then_instrs...
+ * ...body...
+ * (if (cond)
+ * (...then_instrs...)
+ * (...else_instrs...)))
+ * (...else_instrs...)))
+ * (...else_instrs))
+ */
+static void
+complex_unroll(nir_function *fn, nir_loop *loop, nir_builder *b,
+ nir_cf_node *if_node, nir_block *last_blk,
+ bool continue_from_then_branch, bool limiting_term_second)
+{
+ void *mem_ctx = ralloc_context(NULL);
+
+ nir_convert_loop_to_lcssa(loop);
+
+ nir_shader *ns = fn->shader;
+ nir_cf_node *limiting_trm = &loop->info->limiting_terminator->nif->cf_node;
+ nir_block *header_blk = nir_loop_first_block(loop);
+
+ struct hash_table *remap_table;
+ struct hash_table *phi_remap;
+ create_remap_tables(mem_ctx, loop, header_blk, &remap_table, &phi_remap);
+
+ struct hash_table *final_term_phis =
+ _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ struct list_head loop_term_phis;
+ list_inithead(&loop_term_phis);
+
+ /* Build a list of phis */
+ nir_if *loop_term_if = loop->info->limiting_terminator->nif;
+ nir_cf_node *after_loop = nir_cf_node_next(&loop->cf_node);
+ nir_foreach_instr(instr, nir_cf_node_as_block(after_loop)) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src(src, phi) {
+ nir_block *then_blk = nir_if_last_then_block(loop_term_if);
+ nir_block *else_blk = nir_if_last_else_block(loop_term_if);
+
+ if (src->pred == then_blk || src->pred == else_blk) {
+ _mesa_hash_table_insert(final_term_phis, phi, src->src.ssa);
+ } else {
+ /* Add to list of phis with an unknown iteration count */
+ loop_term_phi_info *ltpi = ralloc(mem_ctx, loop_term_phi_info);
+ ltpi->phi = phi;
+ ltpi->old_loop_phi = phi;
+ ltpi->phi_src_def = src->src.ssa;
+ list_addtail(<pi->list_link, &loop_term_phis);
+ }
+ }
+ }
+
+ if (limiting_term_second) {
+ /* We need some special handling when its the second terminator causing
+ * us to exit the loop for example:
+ *
+ * for (int i = 0; i < uniform_lp_count; i++) {
+ * colour = vec4(0.0, 1.0, 0.0, 1.0);
+ *
+ * if (i == 1)
+ * break;
+ * }
+ * ... any further code is unreachable after i == 1 ...
+ * }
+ *
+ * Bump the trip count by one so we actually clone something. Also
+ * extract everything after the limiting terminator and insert it into
+ * the branch we will continue from.
+ */
+ loop->info->trip_count++;
+
+ nir_cf_list after_lt;
+ nir_cf_extract(&after_lt, nir_after_cf_node(limiting_trm),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ nir_if *if_stmt = loop->info->limiting_terminator->nif;
+ nir_block *last_then = nir_if_last_then_block(if_stmt);
+ if (ends_in_break(last_then)) {
+ move_cf_list_into_if(&after_lt, limiting_trm, last_then, false);
+ } else {
+ nir_block *last_else = nir_if_last_else_block(if_stmt);
+ if (ends_in_break(last_else)) {
+ move_cf_list_into_if(&after_lt, limiting_trm, last_else, true);
+ }
+ }
+ } else {
+ /* Remove the limiting terminator. Loop analysis will only find a
+ * terminator for trival if statments (then only contains break, else
+ * is empty) so its safe to remove the whole thing.
+ */
+ nir_cf_node_remove(limiting_trm);
+ }
+
+ list_for_each_entry(loop_term_phi_info, ltpi, &loop_term_phis, list_link) {
+ nir_phi_instr *new_phi = create_complex_unroll_phi(ns, ltpi->phi);
+
+ /* Update outermost_phi_def to point to the replacement phi to be used
+ * when rewritting the loops existing phis.
+ */
+ ltpi->outermost_phi_def = &new_phi->dest.ssa;
+
+ /* Get the src for when exiting by the loop terminator */
+ struct hash_entry *final_term_hte =
+ _mesa_hash_table_search(final_term_phis, ltpi->phi);
+ if (final_term_hte) {
+ ltpi->innermost_phi_src_def = (nir_ssa_def *) final_term_hte->data;
+ } else {
+ ltpi->innermost_phi_src_def = NULL;
+ }
+
+ /* Update phi to be used in next iteration */
+ ltpi->phi = new_phi;
+ }
+
+ /* Move everything after the terminator we don't have a trip count for
+ * inside the if.
+ */
+ nir_cf_list loop_end;
+ nir_cf_extract(&loop_end, nir_after_cf_node(if_node),
+ nir_after_block(nir_loop_last_block(loop)));
+ nir_if *if_stmt = nir_cf_node_as_if(if_node);
+ move_cf_list_into_if(&loop_end, if_node, last_blk,
+ continue_from_then_branch);
+
+ /* Pluck out the loop body. Unlike the simple unroll pass there are no
+ * breaks remaining in the loop so we do not have the concept of a loop
+ * header and a loop body, instead we just extract everything.
+ */
+ nir_cf_list loop_body;
+ nir_cf_extract(&loop_body, nir_before_cf_node(&header_blk->cf_node),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ /* Temp list to store the cloned loop body as we unroll */
+ nir_cf_list unrolled_lp_body;
+
+ /* Set the cursor to before the loop */
+ b->cursor = nir_before_cf_node(&loop->cf_node);
+
+ nir_block *continue_from_blk = NULL;
+ for (unsigned i = 0; i < loop->info->trip_count; i++) {
+ /* Clone loop body */
+ nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
+ remap_table);
+
+ nir_cf_node *last_node =
+ exec_node_data(nir_cf_node,
+ exec_list_get_tail(&unrolled_lp_body.list), node);
+ assert(last_node->type == nir_cf_node_block &&
+ exec_list_is_empty(&nir_cf_node_as_block(last_node)->instr_list));
+
+ /* Insert unrolled loop body */
+ nir_cf_reinsert(&unrolled_lp_body, b->cursor);
+
+ nir_cf_node *if_node = nir_cf_node_prev(last_node);
+ assert(if_node->type == nir_cf_node_if);
+ if_stmt = nir_cf_node_as_if(if_node);
+
+ nir_block *exit_from_blk;
+ if (continue_from_then_branch) {
+ continue_from_blk = nir_if_last_then_block(if_stmt);
+ exit_from_blk = nir_if_last_else_block(if_stmt);
+ } else {
+ exit_from_blk = nir_if_last_then_block(if_stmt);
+ continue_from_blk = nir_if_last_else_block(if_stmt);
+ }
+
+ b->cursor = nir_after_cf_node(if_node);
+ if (i < loop->info->trip_count - 1) {
+ list_for_each_entry(loop_term_phi_info, ltpi,
+ &loop_term_phis, list_link) {
+ /* Insert phi created in previous iteration */
+ insert_phi_and_set_block_on_uses(b, ltpi->phi);
+ add_complex_unroll_phi_src(ltpi->phi_src_def, ltpi->phi,
+ remap_table, exit_from_blk);
+
+ /* Create phi to be fixed up by next iteration */
+ nir_phi_instr *new_phi = create_complex_unroll_phi(ns, ltpi->phi);
+ ltpi->phi = new_phi;
+ }
+ } else {
+ list_for_each_entry(loop_term_phi_info, ltpi,
+ &loop_term_phis, list_link) {
+ /* Insert phi created in previous iteration */
+ insert_phi_and_set_block_on_uses(b, ltpi->phi);
+ add_complex_unroll_phi_src(ltpi->phi_src_def, ltpi->phi,
+ remap_table, exit_from_blk);
+ }
+ }
+
+ /* Ready the remap table for the next iteration */
+ update_remap_table(loop, remap_table, phi_remap);
+
+ /* Set the cursor to the last if in the loop body we just unrolled ready
+ * for the next iteration.
+ */
+ b->cursor = nir_after_block(continue_from_blk);
+ }
+
+
+ list_for_each_entry(loop_term_phi_info, ltpi, &loop_term_phis, list_link) {
+ /* Now that the remap table is updated add the second src to the
+ * innermost phis.
+ */
+ nir_ssa_def *phi_src = ltpi->innermost_phi_src_def ?
+ ltpi->innermost_phi_src_def : ltpi->phi_src_def;
+
+ assert(exec_list_length(<pi->phi->srcs) == 1);
+
+ add_complex_unroll_phi_src(phi_src, ltpi->phi, remap_table,
+ continue_from_blk);
+
+ /* Rewrite the uses of the old loop phis */
+ nir_foreach_use_safe(use_src, <pi->old_loop_phi->dest.ssa) {
+ nir_src new_src = nir_src_for_ssa(ltpi->outermost_phi_def);
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+ }
+
+ nir_foreach_if_use_safe(use_src, <pi->old_loop_phi->dest.ssa) {
+ nir_src new_src = nir_src_for_ssa(ltpi->outermost_phi_def);
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+ }
+ }
+
+ /* The loop has been unrolled so remove it. */
+ nir_cf_node_remove(&loop->cf_node);
+
+ /* Delete the original loop body */
+ nir_cf_delete(&loop_body);
+
+ ralloc_free(mem_ctx);
+}
+
+static bool
+process_loops(nir_cf_node *cf_node, nir_builder *b, bool *innermost_loop)
+{
+ bool progress = false;
+ nir_loop *loop;
+
+ switch (cf_node->type) {
+ case nir_cf_node_block:
+ return progress;
+ case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+ foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
+ progress |= process_loops(nested_node, b, innermost_loop);
+ foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
+ progress |= process_loops(nested_node, b, innermost_loop);
+ return progress;
+ }
+ case nir_cf_node_loop: {
+ loop = nir_cf_node_as_loop(cf_node);
+ foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
+ progress |= process_loops(nested_node, b, innermost_loop);
+ break;
+ }
+ default:
+ unreachable("unknown cf node type");
+ }
+
+ if (*innermost_loop) {
+ nir_function *fn = nir_cf_node_get_function(&loop->cf_node)->function;
+
+ /* Don't attempt to unroll outer loops or a second inner loop in
+ * this pass wait until the next pass as we have altered the cf.
+ */
+ *innermost_loop = false;
+
+ if (loop->info->limiting_terminator == NULL) {
+ return progress;
+ }
+
+ if (is_simple_loop(fn->shader, loop->info)) {
+ simple_unroll(loop, b);
+ progress = true;
+ } else {
+ /* Attempt to unroll loops with two terminators. */
+ if (is_complex_loop(fn->shader, loop->info)) {
+ bool first_terminator = true;
+ list_for_each_entry(nir_loop_terminator, terminator,
+ &loop->info->loop_terminator_list,
+ loop_terminator_link) {
+
+ nir_cf_node *if_node = &terminator->nif->cf_node;
+
+ if (if_node == &loop->info->limiting_terminator->nif->cf_node) {
+ first_terminator = false;
+ continue;
+ }
+
+ /* If the first terminator has a trip count of zero just do a
+ * simple unroll as the second terminator can never be reached.
+ */
+ if (loop->info->trip_count == 0 && first_terminator) {
+ simple_unroll(loop, b);
+ progress = true;
+ break;
+ }
+
+ nir_if *if_stmt = nir_cf_node_as_if(if_node);
+
+ /* Determine which if-statement branch, if any, ends with a
+ * break. Note that since predicted_num_loop_jumps == 1, it is
+ * impossible for both branches to end with a break.
+ */
+ nir_block *last_then = nir_if_last_then_block(if_stmt);
+ if (ends_in_break(last_then)) {
+ complex_unroll(fn, loop, b, if_node, last_then, false,
+ !first_terminator);
+
+ progress = true;
+ break;
+ } else {
+ nir_block *last_else = nir_if_last_else_block(if_stmt);
+ if (ends_in_break(last_else)) {
+ complex_unroll(fn, loop, b, if_node, last_else, true,
+ !first_terminator);
+
+ progress = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return progress;
+}
+
+static bool
+nir_opt_loop_unroll_impl(nir_function_impl *impl,
+ nir_variable_mode indirect_mask)
+{
+ bool progress = false;
+ nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
+ nir_metadata_require(impl, nir_metadata_block_index);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+ bool innermost_loop = true;
+ progress |= process_loops(node, &b, &innermost_loop);
+ }
+
+ return progress;
+}
+
+bool
+nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl) {
+ progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
+ }
+ }
+ return false;
+}
--
2.7.4
More information about the mesa-dev
mailing list