[Mesa-dev] [PATCH 08/10] nir: add a loop unrolling pass

Jason Ekstrand jason at jlekstrand.net
Thu Oct 6 17:33:24 UTC 2016


On Thu, Oct 6, 2016 at 10:22 AM, Jason Ekstrand <jason at jlekstrand.net>
wrote:
>
>
> On Wed, Oct 5, 2016 at 7:25 PM, Timothy Arceri <
> timothy.arceri at collabora.com> wrote:
>
>> Just
>>
>> On Wed, 2016-10-05 at 16:23 -0700, Jason Ekstrand wrote:
>> >
>> >
>> > On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceri <timothy.arceri at coll
>> > abora.com> wrote:
>> > > V2:
>> > > - tidy ups suggested by Connor.
>> > > - tidy up cloning logic and handle copy propagation
>> > >  based of suggestion by Connor.
>> > > - use nir_ssa_def_rewrite_uses to fix up lcssa phis
>> > >   suggested by Connor.
>> > > - add support for complex loop unrolling (two terminators)
>> > > - handle case were the ssa defs use outside the loop is already a
>> > > phi
>> > > - support unrolling loops with multiple terminators when trip count
>> > >   is know for each terminator
>> > > ---
>> > >  src/compiler/Makefile.sources          |   1 +
>> > >  src/compiler/nir/nir.h                 |   2 +
>> > >  src/compiler/nir/nir_opt_loop_unroll.c | 820
>> > > +++++++++++++++++++++++++++++++++
>> > >  3 files changed, 823 insertions(+)
>> > >  create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c
>> > >
>> > > diff --git a/src/compiler/Makefile.sources
>> > > b/src/compiler/Makefile.sources
>> > > index 8ef6080..b3512bb 100644
>> > > --- a/src/compiler/Makefile.sources
>> > > +++ b/src/compiler/Makefile.sources
>> > > @@ -233,6 +233,7 @@ NIR_FILES = \
>> > >         nir/nir_opt_dead_cf.c \
>> > >         nir/nir_opt_gcm.c \
>> > >         nir/nir_opt_global_to_local.c \
>> > > +       nir/nir_opt_loop_unroll.c \
>> > >         nir/nir_opt_peephole_select.c \
>> > >         nir/nir_opt_remove_phis.c \
>> > >         nir/nir_opt_undef.c \
>> > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> > > index 9887432..0513d81 100644
>> > > --- a/src/compiler/nir/nir.h
>> > > +++ b/src/compiler/nir/nir.h
>> > > @@ -2661,6 +2661,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
>> > >
>> > >  bool nir_opt_gcm(nir_shader *shader, bool value_number);
>> > >
>> > > +bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode
>> > > indirect_mask);
>> > > +
>> > >  bool nir_opt_peephole_select(nir_shader *shader);
>> > >
>> > >  bool nir_opt_remove_phis(nir_shader *shader);
>> > > diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
>> > > b/src/compiler/nir/nir_opt_loop_unroll.c
>> > > new file mode 100644
>> > > index 0000000..1de02f6
>> > > --- /dev/null
>> > > +++ b/src/compiler/nir/nir_opt_loop_unroll.c
>> > > @@ -0,0 +1,820 @@
>> > > +/*
>> > > + * Copyright © 2016 Intel Corporation
>> > > + *
>> > > + * Permission is hereby granted, free of charge, to any person
>> > > obtaining a
>> > > + * copy of this software and associated documentation files (the
>> > > "Software"),
>> > > + * to deal in the Software without restriction, including without
>> > > limitation
>> > > + * the rights to use, copy, modify, merge, publish, distribute,
>> > > sublicense,
>> > > + * and/or sell copies of the Software, and to permit persons to
>> > > whom the
>> > > + * Software is furnished to do so, subject to the following
>> > > conditions:
>> > > + *
>> > > + * The above copyright notice and this permission notice
>> > > (including the next
>> > > + * paragraph) shall be included in all copies or substantial
>> > > portions of the
>> > > + * Software.
>> > > + *
>> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > > EXPRESS OR
>> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > > MERCHANTABILITY,
>> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>> > > EVENT SHALL
>> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
>> > > DAMAGES OR OTHER
>> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> > > ARISING
>> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> > > OTHER
>> > > + * DEALINGS IN THE SOFTWARE.
>> > > + */
>> > > +
>> > > +#include "nir.h"
>> > > +#include "nir_builder.h"
>> > > +#include "nir_control_flow.h"
>> > > +
>> > > +static void
>> > > +extract_loop_body(nir_cf_list *extracted, nir_cf_node *node)
>> >
>> > "node" is not particularly descriptive.  Perhaps "start_node" or
>> > something like that.
>> >
>> > > +{
>> > > +   nir_cf_node *end = node;
>> > > +   while (!nir_cf_node_is_last(end))
>> > > +      end = nir_cf_node_next(end);
>> >
>> > This bit of iteration seems unfortunate.  If you have the loop
>> > pointer, you can just do
>> >
>> > nir_cf_extract(extracted, nir_before_cf_node(node),
>> > nir_after_cf_node(nir_loop_last_cf_node(loop))
>> >
>> > For that matter, is the helper even needed?  If you don't want to
>> > type that much and want to keep the helper, you could easily get the
>> > loop from node->parent.
>>
>> Sure this was one of the first bits I wrote before finding the nir
>> helpers. Will see if I can just remove it.
>>
>> >
>> > > +
>> > > +   nir_cf_extract(extracted, nir_before_cf_node(node),
>> > > +                  nir_after_cf_node(end));
>> > > +}
>> > > +
>> > > +static void
>> > > +clone_list(nir_shader *ns, nir_loop *loop, nir_cf_list
>> > > *src_cf_list,
>> > > +           nir_cf_list *cloned_cf_list, struct hash_table
>> > > *remap_table)
>> > > +{
>> > > +   /* Dest list needs to at least have one block */
>> > > +   nir_block *nblk = nir_block_create(ns);
>> > > +   nblk->cf_node.parent = loop->cf_node.parent;
>> > > +   exec_list_push_tail(&cloned_cf_list->list, &nblk-
>> > > >cf_node.node);
>> >
>> > Do you always want to do this or should it be guarded by "if
>> > (exec_list_empty(&src_cf_list->list))"?
>>
>> I think I was relying on the clone to do that check but makes sense to
>> do it here. Will add.
>>
>> >
>> > > +
>> > > +   nir_clone_loop_list(&cloned_cf_list->list, &src_cf_list->list,
>> > > +                       remap_table, ns);
>> > > +}
>> > > +
>> > > +static void
>> > > +move_cf_list_into_if(nir_cf_list *lst, nir_cf_node *if_node,
>> > > +                     nir_cf_node *last_node, bool
>> > > continue_from_then_branch)
>> > > +{
>> > > +   nir_if *if_stmt = nir_cf_node_as_if(if_node);
>> > > +   if (continue_from_then_branch) {
>> > > +      /* Move the rest of the loop inside the then */
>> > > +      nir_cf_reinsert(lst,
>> > > nir_after_cf_node(nir_if_last_then_node(if_stmt)));
>> > > +   } else {
>> > > +      /* Move the rest of the loop inside the else */
>> > > +      nir_cf_reinsert(lst,
>> > > nir_after_cf_node(nir_if_last_else_node(if_stmt)));
>> > > +   }
>> > > +
>> > > +   /* Remove the break */
>> > > +
>> > >  nir_instr_remove(nir_block_last_instr(nir_cf_node_as_block(last_no
>> > > de)));
>> > > +}
>> > > +
>> > > +static bool
>> > > +is_phi_src_phi_from_loop_header(nir_ssa_def *def, nir_ssa_def
>> > > *src)
>> > > +{
>> > > +   return def->parent_instr->type == nir_instr_type_phi &&
>> > > +      src->parent_instr->type == nir_instr_type_phi &&
>> > > +      nir_instr_as_phi(def->parent_instr)->instr.block->index ==
>> > > +      nir_instr_as_phi(src->parent_instr)->instr.block->index;
>> >
>> > This last check can be shortened to "def->parent_instr->block == src-
>> > >parent_instr->block".  Converting to a phi and then doing ->instr is
>> > just redundant.  Also, I'd rather compre block pointers than indices
>> > if we can.
>>
>> Sure.
>>
>> >
>> > > +}
>> > > +
>> > > +static void
>> > > +get_table_of_lcssa_and_loop_term_phis(nir_cf_node *loop,
>> > > +                                      struct hash_table
>> > > **lcssa_phis,
>> > > +                                      struct hash_table
>> > > **loop_term_phis,
>> > > +                                      nir_if *loop_term_if)
>> > > +{
>> > > +   *lcssa_phis = _mesa_hash_table_create(NULL,
>> > > _mesa_hash_pointer,
>> > > +                                         _mesa_key_pointer_equal);
>> > > +   *loop_term_phis = _mesa_hash_table_create(NULL,
>> > > _mesa_hash_pointer,
>> > > +
>> > >  _mesa_key_pointer_equal);
>> > > +
>> > > +   nir_cf_node *cf_node = nir_cf_node_next(loop);
>> > > +   nir_block *block = nir_cf_node_as_block(cf_node);
>> > > +   nir_foreach_instr(instr, block) {
>> > > +      if (instr->type == nir_instr_type_phi) {
>> > > +         nir_phi_instr *phi = nir_instr_as_phi(instr);
>> > > +
>> > > +         nir_foreach_phi_src(src, phi) {
>> > > +            nir_block *then_blk =
>> > > +
>> > >  nir_cf_node_as_block(nir_if_last_then_node(loop_term_if));
>> > > +            nir_block *else_blk =
>> > > +
>> > >  nir_cf_node_as_block(nir_if_last_else_node(loop_term_if));
>> > > +
>> > > +            if (src->pred == then_blk || src->pred == else_blk) {
>> > > +               _mesa_hash_table_insert(*loop_term_phis, phi, src-
>> > > >src.ssa);
>> > > +            } else {
>> > > +               _mesa_hash_table_insert(*lcssa_phis, phi, src-
>> > > >src.ssa);
>> > > +            }
>> > > +         }
>> > > +      } else {
>> > > +         /* There should be no more phis */
>> > > +         break;
>> > > +      }
>> > > +   }
>> > > +}
>> > > +
>> > > +static void
>> > > +create_remap_tables(nir_loop *loop, nir_block *loop_header_blk,
>> > > +                    struct hash_table **remap_table,
>> > > +                    struct hash_table **phi_remap,
>> > > +                    struct hash_table **src_before_loop,
>> > > +                    struct hash_table **src_after_loop)
>> > > +{
>> > > +   *remap_table = _mesa_hash_table_create(NULL,
>> > > _mesa_hash_pointer,
>> > > +
>> > > _mesa_key_pointer_equal);
>> > > +   *phi_remap = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>> > > +                                        _mesa_key_pointer_equal);
>> > > +   *src_before_loop = _mesa_hash_table_create(NULL,
>> > > _mesa_hash_pointer,
>> > > +
>> > > _mesa_key_pointer_equal);
>> > > +   *src_after_loop = _mesa_hash_table_create(NULL,
>> > > _mesa_hash_pointer,
>> > > +
>> > >  _mesa_key_pointer_equal);
>> > > +
>> > > +   /* Build hash tables used for remapping as we unroll. */
>> > > +   nir_foreach_instr(instr, loop_header_blk) {
>> > > +      if (instr->type != nir_instr_type_phi)
>> > > +         break;
>> > > +
>> > > +      nir_phi_instr *phi = nir_instr_as_phi(instr);
>> > > +      nir_foreach_phi_src(src, phi) {
>> > > +         /* Is the pred from the block itself? */
>> >
>> > I'm not sure what this comment is trying to say.  The logic below
>> > determines whether or not the predecessor is from some block inside
>> > the loop but not the header block.
>>
>> I'll update this to make a bit more sense.
>>
>> >
>> > > +         if (src->pred->index > phi->instr.block->index &&
>> > > +             src->pred->cf_node.parent == &loop->cf_node) {
>> > > +
>> > > +            _mesa_hash_table_insert(*phi_remap, &phi->dest.ssa,
>> > > src->src.ssa);
>> > > +            _mesa_hash_table_insert(*src_after_loop, &phi-
>> > > >dest.ssa,
>> > > +                                    src->src.ssa);
>> > > +         } else {
>> >
>> > Perhaps assert(src->pred == start_bloc || src->pred ==
>> > block_before_loop)
>>
>> Sure.
>>
>> >
>> > > +            _mesa_hash_table_insert(*remap_table, &phi->dest.ssa,
>> > > +                                    src->src.ssa);
>> > > +            _mesa_hash_table_insert(*src_before_loop, &phi-
>> > > >dest.ssa,
>> > > +                                    src->src.ssa);
>> > > +         }
>> > > +      }
>> > > +   }
>> > > +}
>> > > +
>> > > +static void
>> > > +update_remap_tables(bool is_first_iteration, struct hash_table
>> > > *remap_table,
>> > > +                    struct hash_table *phi_remap,
>> > > +                    struct hash_table *src_before_loop,
>> > > +                    struct hash_table *src_after_loop)
>> > > +{
>> > > +   struct hash_entry *phi_hte;
>> > > +   hash_table_foreach(phi_remap, phi_hte) {
>> > > +      struct hash_entry *remap_hte =
>> > > +         _mesa_hash_table_search(remap_table, phi_hte->data);
>> > > +
>> > > +      nir_ssa_def *phi_def = (nir_ssa_def *) phi_hte->key;
>> > > +      nir_ssa_def *phi_src = (nir_ssa_def *) phi_hte->data;
>> > > +
>> > > +      if (!remap_hte && is_first_iteration) {
>> > > +         _mesa_hash_table_insert(remap_table, phi_hte->key,
>> > > phi_hte->data);
>> > > +         continue;
>> > > +      }
>> > > +
>> > > +      if (is_phi_src_phi_from_loop_header(phi_def, phi_src)) {
>> > > +          /* After copy propagation we can end up with phis inside
>> > > loops
>> > > +           * that look like this:
>> > > +           *
>> > > +           *    vec1 32 ssa_14 = phi block_0: ssa_9, block_4:
>> > > ssa_13
>> > > +           *    vec1 32 ssa_13 = phi block_0: ssa_8, block_4:
>> > > ssa_12
>> > > +           *    vec1 32 ssa_12 = phi block_0: ssa_7, block_4:
>> > > ssa_11
>> > > +           *    vec1 32 ssa_11 = phi block_0: ssa_6, block_4:
>> > > ssa_14
>> > > +           *
>> > > +           * For each iteration of the loop we need to update the
>> > > phi and
>> > > +           * cloning remap tables so that we use the correct src
>> > > for the
>> > > +           * next iteration.
>> > > +           */
>> > > +          struct hash_entry *sbl_hte =
>> > > +             _mesa_hash_table_search(src_before_loop, phi_hte-
>> > > >data);
>> > > +          _mesa_hash_table_insert(remap_table, phi_hte->key,
>> > > sbl_hte->data);
>> > > +
>> > > +          struct hash_entry *sal_hte =
>> > > +             _mesa_hash_table_search(src_after_loop, phi_hte-
>> > > >data);
>> > > +          phi_hte->data = sal_hte->data;
>> > > +      } else if (remap_hte) {
>> > > +          _mesa_hash_table_insert(remap_table, phi_hte->key,
>> > > remap_hte->data);
>> > > +      }
>> >
>> > Hrm... I understand the problem but I would have thought we could
>> > have done it with fewer remap tables...  My naeve idea would be to do
>> > as follows:
>> >
>> > In create_remap_tables, populate the remap table with phi_dst ->
>> > phi_src where phi_src is the source with pred == block_before_loop.
>> >
>> > In update_remap_tables, populate the remap table with phi_dst ->
>> > remap(phi_src) where phi_src is the soure with pred == continue_block
>> >
>> > Am I just over-simplifying things here?
>>
>> I believe you are. The remap table is updated by the cloning process
>> our job here is to simply make sure it is in the right state before we
>> do the next clone. The key thing is that we are always cloning the
>> original header/body (were are not cloning the previous clone).
>>
>
> Ah, that last point was a big part of what I was missing.
>
>
>> So the above code is acting more as a reset, or initialisation of the
>> remap table for the next iteration of the loop (although we want to
>> keep what it already contains).
>>
>> Essentially the code is doing what you are suggesting you are just
>> leaving out the part where updating phi_src with the source from pred
>> == continue_block only works for the first iteration. For the next
>> iteration we use the value already in the remap table after the clone,
>> or the case where there is no mov i.e no clone we need to set things up
>> manually using the src_before_loop/src_after_loop hash tables.
>>
>> Does this make sense?
>>
>
> Yes, but I still think you could make it simpler and/or more obvious if
> you did it in terms of the phi nodes themselves rather than these tables
> that you generate from the phi nodes (which is really just a less
> deterministic list of nodes).  Something like this perhaps:
>
> nir_foreach_instr(instr, header_block) {
>    if (instr->type != nir_instr_type_phi)
>       break;
>
>    nir_phi_instr *phi = nir_instr_as_phi(instr);
>
>    nir_foreach_phi_src(src, phi) {
>       if (src->pred != continue_block)
>          continue;
>
>       struct hash_entry *src_remap = _mesa_hash_table_search(remap,
> src->src.ssa);
>       if (src_remap) {
>          _mesa_hash_table_insert(remap, &phi->dst.ssa, src_remap->data);
>       } else {
>          _mesa_hash_table_insert(remap, &phi->dst.ssa, src->src.ssa);
>       }
>    }
> }
>
> That seems much more obvious than hash tables which are actually lists.
> Sure, you could argue that walking the sources of phis looks inefficient
> but, given that this is the simple case, they're guaranteed to have exactly
> 2 sources, so meh.
>
> It's entirely possible that I'm still missing something...
>

And... I just realized what I'm missing... :-)  The phis may end up mapping
to other phis whose entries in the remap table we've already mutated.
Really, we need to do it in two steps:  One to compute the set of remaps
and then a second step to apply them.  So my algorithm would have to be
modified as follows:

Add a phi_remap table and, in the above code, do the
_mesa_hash_table_insert operations on the phi_remap.  Then, do

struct hash_entry *phi_entry;
hash_table_foreach(phi_entry, phi_remap)
   _mesa_hash_table_insert(remap, phi_entry->key, phi_entry->data);

to apply the remaps.  Still, I think that's more obvious than the 4 tables
approach.

--Jason


> It would be nice if we could distill this into an easy to follow
>> comment to put on the function itself, happy for your input to help get
>> to something that not only I understand :)
>>
>> How about something like:
>>
>> /*
>>  * The remap_table is updated by the cloning process, our job here is
>>  * to simply make sure it is in the right state before we do the next
>>  * clone. The key thing is that we are always cloning the original
>>  * header/body (were are not cloning the previous clone).
>>  *
>>  * Since the cloning process is updating the remap_table we can think
>>  * of this function as acting more as a reset, or initialisation of the
>>  * remap table for the next iteration of the loop (although we want to
>>  * keep what it already contains).
>>  *
>>  * After the first iteration we use the value already in remap_table to
>>  * update the phi_src with the new src from the previous clone, or in
>>  * the case where there is no mov i.e no clone, we update the
>>  * remap_table using the src_before_loop/src_after_loop hash tables.
>>  */
>>
>> >
>> > > +   }
>> > > +}
>> > > +
>> > > +static void
>> > > +insert_phi_and_set_block_on_uses(nir_builder *b, nir_phi_instr
>> > > *phi_instr)
>> > > +{
>> > > +   nir_instr_insert(b->cursor, &phi_instr->instr);
>> > > +
>> > > +   /* Now that we have inserted the phi fix up the block for its
>> > > uses. */
>> > > +   nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) {
>> > > +      nir_phi_instr *use_phi = nir_instr_as_phi(use_src-
>> > > >parent_instr);
>> > > +
>> > > +      foreach_list_typed(nir_phi_src, src, node, &use_phi->srcs) {
>> > > +         if (!src->pred)
>> > > +            src->pred = phi_instr->dest.ssa.parent_instr->block;
>> > > +      }
>> > > +   }
>> > > +}
>> > > +
>> > > +static nir_phi_instr *
>> > > +create_complex_unroll_phi(nir_shader *ns, nir_phi_instr
>> > > *prev_phi_instr)
>> > > +{
>> > > +   nir_phi_instr *new_phi = nir_phi_instr_create(ns);
>> > > +   nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
>> > > +                     prev_phi_instr->dest.ssa.bit_size, NULL);
>> > > +
>> > > +   /* Add the new phi as a src to the phi from the previous
>> > > iteration */
>> > > +   nir_phi_src *new_src = ralloc(prev_phi_instr, nir_phi_src);
>> > > +   new_src->src = nir_src_for_ssa(&new_phi->dest.ssa);
>> > > +   new_src->src.parent_instr = &prev_phi_instr->instr;
>> > > +   exec_list_push_tail(&prev_phi_instr->srcs, &new_src->node);
>> > > +   list_addtail(&new_src->src.use_link, &new_src->src.ssa->uses);
>> > > +
>> > > +   return new_phi;
>> > > +}
>> > > +
>> > > +static void
>> > > +add_complex_unroll_phi_src(nir_ssa_def *phi_src, nir_phi_instr
>> > > *phi_instr,
>> > > +                           struct hash_table *remap_table,
>> > > nir_block *blk)
>> > > +{
>> > > +   struct hash_entry *hte =
>> > > +      _mesa_hash_table_search(remap_table, phi_src);
>> > > +
>> > > +   nir_phi_src *new_src = ralloc(phi_instr, nir_phi_src);
>> > > +   nir_ssa_def *ssa_def = hte ? (nir_ssa_def *) hte->data :
>> > > phi_src;
>> > > +   new_src->pred = blk;
>> > > +   new_src->src = nir_src_for_ssa(ssa_def);
>> > > +   new_src->src.parent_instr = &phi_instr->instr;
>> > > +   list_addtail(&new_src->src.use_link, &new_src->src.ssa->uses);
>> > > +
>> > > +   exec_list_push_tail(&phi_instr->srcs, &new_src->node);
>> > > +}
>> > > +
>> > > +static void
>> > > +simple_loop_fix_lcssa_phis(nir_cf_node *loop, struct hash_table
>> > > *remap_table)
>> > > +{
>> > > +   nir_block *prev_block =
>> > > nir_cf_node_as_block(nir_cf_node_prev(loop));
>> > > +   nir_cf_node *cf_node = nir_cf_node_next(loop);
>> > > +   assert(cf_node->type == nir_cf_node_block);
>> > > +
>> > > +   nir_block *block = nir_cf_node_as_block(cf_node);
>> > > +   nir_foreach_instr_safe(instr, block) {
>> > > +      if (instr->type == nir_instr_type_phi) {
>> > > +         nir_phi_instr *phi = nir_instr_as_phi(instr);
>> > > +
>> > > +         nir_foreach_phi_src_safe(src, phi) {
>> > > +            /* Update predecessor */
>> > > +            src->pred = prev_block;
>> > > +
>> > > +            /* Update src */
>> > > +            struct hash_entry *hte =
>> > > +               _mesa_hash_table_search(remap_table, src->src.ssa);
>> > > +            assert(hte || !phi->is_lcssa_phi);
>> > > +            if (hte) {
>> > > +               nir_src new_src = nir_src_for_ssa((nir_ssa_def *)
>> > > hte->data);
>> > > +               if (phi->is_lcssa_phi || exec_list_length(&phi-
>> > > >srcs) == 1) {
>> > > +                  nir_ssa_def_rewrite_uses(&phi->dest.ssa,
>> > > new_src);
>> > > +               } else {
>> > > +                  nir_instr_rewrite_src(instr, &src->src,
>> > > new_src);
>> > > +               }
>> > > +            } else {
>> > > +               /* If a non lcssa phi now only has 1 src rewrite
>> > > its uses here.
>> > > +                * This avoids the src getting rewritten to an
>> > > undefined def,
>> > > +                * which appears to be done in nir_cf_node_remove()
>> > > when
>> > > +                * removing the loop.
>> > > +                */
>> > > +               if (exec_list_length(&phi->srcs) == 1) {
>> > > +                  struct exec_node *head =
>> > > exec_list_get_head(&phi->srcs);
>> > > +                  nir_phi_src *phi_src =
>> > > exec_node_data(nir_phi_src, head, node);
>> > > +                  nir_ssa_def_rewrite_uses(&phi->dest.ssa,
>> > > phi_src->src);
>> > > +               }
>> > > +            }
>> > > +         }
>> > > +         if (phi->is_lcssa_phi || exec_list_length(&phi->srcs) ==
>> > > 1)
>> > > +            nir_instr_remove(&phi->instr);
>> >
>> > I'm not convinced that this logic is at all what we want.  It seems
>> > like what we want is more something along these lines:
>> >
>> > nir_foreach_phi_src(src, phi) {
>> >    if (src->pred != block_with_break_in_terminator)
>> >       continue;
>>
>> I was under the impression NIR never delays the use of phis so no phi
>> following a loop should ever not meet this condition right?
>>
>
> Right.  For simple loops they should all be lcssa phis with exactly one
> source.
>
>
>> >
>> >    struct hash_entry *hte = _mesa_hash_table_search(remap_table, src-
>> > >src.ssa);
>> >    if (hte) {
>> >       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
>> > nir_src_for_ssa((nir_ssa_def *)hte->data));
>> >    } else {
>> >       nir_ssa_def_rewrite_uses(&phi->dest.ssa, src->src);
>> >    }
>> > }
>> > assert(list_is_empty(&phi->dest.ssa.uses));
>> >
>> > What we really want for the phis after the loop is to take the value
>> > that it would have taken coming from the break.
>>
>> Right. Which is what the code does but for simple loops there should
>> only ever be a single exit point so your simpler code *should* work.
>> I'll give it a try thanks.
>>
>> >
>> > > +      } else {
>> > > +         /* There should be no more LCSSA-phis */
>> > > +         break;
>> > > +      }
>> > > +   }
>> > > +}
>> > > +
>> > > +static bool
>> > > +ends_in_break(nir_block *block)
>> > > +{
>> > > +   if (exec_list_is_empty(&block->instr_list))
>> > > +      return false;
>> > > +
>> > > +   nir_instr *instr = nir_block_last_instr(block);
>> > > +   return instr->type == nir_instr_type_jump &&
>> > > +      nir_instr_as_jump(instr)->type == nir_jump_break;
>> > > +}
>> > > +
>> > > +/**
>> > > + * Unroll a loop which does not contain any jumps.  For example,
>> > > if the input
>> > > + * is:
>> > > + *
>> > > + *     (loop (...) ...instrs...)
>> > > + *
>> > > + * And the iteration count is 3, the output will be:
>> > > + *
>> > > + *     ...instrs... ...instrs... ...instrs...
>> > > + */
>> > > +static void
>> > > +simple_unroll(nir_function *fn, nir_loop *loop, nir_builder *b)
>> > > +{
>> > > +   nir_shader *ns = fn->shader;
>> > > +
>> > > +   /* Get the loop header this contains a bunch of phis and the
>> > > loops
>> > > +    * conditional.
>> > > +    */
>> > > +   nir_cf_node *lp_header_cf_node = nir_loop_first_cf_node(loop);
>> > > +   nir_block *loop_header_blk =
>> > > nir_cf_node_as_block(lp_header_cf_node);
>> > > +
>> > > +   struct hash_table *remap_table;
>> > > +   struct hash_table *phi_remap;
>> > > +   struct hash_table *src_before_loop;
>> > > +   struct hash_table *src_after_loop;
>> > > +   create_remap_tables(loop, loop_header_blk, &remap_table,
>> > > &phi_remap,
>> > > +                       &src_before_loop, &src_after_loop);
>> > > +
>> > > +   /* Skip over loop terminator and get the loop body. */
>> > > +   nir_cf_node *if_node = &loop->info->limiting_terminator->nif-
>> > > >cf_node;
>> > > +   list_for_each_entry(nir_loop_terminator, terminator,
>> > > +                       &loop->info->loop_terminator_list,
>> > > loop_terminator_link) {
>> > > +       nir_cf_node *loop_node = &terminator->nif->cf_node;
>> > > +
>> > > +      /* Remove all but the limiting terminator as we know the
>> > > other exit
>> > > +       * conditions can never be met.
>> > > +       */
>> > > +      if (loop_node != &loop->info->limiting_terminator->nif-
>> > > >cf_node) {
>> > > +         nir_cf_node_remove(loop_node);
>> >
>> > Can we really do this?  What happens if the terminator is
>> >
>> > if (...) {
>> >    /*stuff */
>> >    break;
>> > } else {
>> >    /* other stuff */
>> > }
>> >
>> > Answer (from IRC): That can never happen since loop analysis will
>> > bail in that case.
>> >
>> > Also, is it actually necessary to delete the other terminators?  Can
>> > we just delete the breaks from them and trust dead_cf to clean them
>> > up properly?
>>
>> I *think* we can do this (and I did a some stage) but to what ends? The
>> code to do so would be more complex and it means we are cloning a whole
>> bunch of extra if staments.
>>
>
> Just thinking through things. :-)  If we want to deal with non-simple
> if's, that might be a good way to go.
>
>
>> >
>> > > +      }
>> > > +   }
>> > > +
>> > > +   nir_cf_node *cf_node = nir_cf_node_next(if_node);
>> > > +
>> > > +   /* Pluck out the loop header */
>> > > +   nir_cf_list lp_header;
>> > > +   nir_cf_extract(&lp_header,
>> > > nir_before_cf_node(lp_header_cf_node),
>> > > +                  nir_before_cf_node(if_node));
>> > > +
>> > > +   /* Pluck out the loop body */
>> > > +   nir_cf_list loop_body;
>> > > +   extract_loop_body(&loop_body, cf_node);
>> >
>> > It might be clearer if this were
>> >
>> > nir_cf_extract(&loop_body, nir_after_cf_node(if_node),
>> > nir_after_cf_node(nir_loop_last_cf_node(loop)));
>> >
>> > I'm not sure the extract_loop_body helper is actually helping you.
>>
>> Sure.
>>
>> >
>> > > +
>> > > +   /* Clone the loop header */
>> > > +   nir_cf_list cloned_header;
>> > > +   exec_list_make_empty(&cloned_header.list);
>> > > +   cloned_header.impl = loop_body.impl;
>> > > +
>> > > +   clone_list(ns, loop, &lp_header, &cloned_header, remap_table);
>> >
>> > It would be cool if these 4 lines were just
>> >
>> > nir_cf_list_clone(&lp_header, &cloned_header, remap_table);
>>
>> Hmm. I'm not sure thats better. It would still be at least 2 lines:
>>
>> nir_cf_list cloned_header;
>> nir_cf_list_clone(&lp_header, &cloned_header, remap_table);
>>
>
> Sure, but it would also simplify the clone implementation a bit and a
> cf_list is a well-defined thing in NIR where as an arbitrary exec_list with
> things that we've been promissed are cf_nodes isn't so much.
>
>
>> But now we are calling:
>>
>> exec_list_make_empty(&cloned_header.list);
>> cloned_header.impl = loop_body.impl;
>>
>> Inside the loop below when we don't need to. Although I guess it's
>> normally not going to be a huge number of iterations so I guess that
>> its worth it for cleaner code. Ok I'm convinced I'll change it.
>>
>> >
>> > > +
>> > > +   /* Insert cloned loop header before the loop */
>> > > +   b->cursor = nir_before_cf_node(&loop->cf_node);
>> > > +   nir_cf_reinsert(&cloned_header, b->cursor);
>> > > +
>> > > +   /* Create temp block to store the cloned loop body as we unroll
>> > > */
>> > > +   nir_cf_list unrolled_lp_body;
>> > > +   exec_list_make_empty(&unrolled_lp_body.list);
>> > > +   unrolled_lp_body.impl = loop_body.impl;
>> > > +
>> > > +   /* Clone loop header and append to the loop body */
>> > > +   for (unsigned i = 0; i < loop->info->trip_count; i++) {
>> > > +      /* Clone loop body */
>> > > +      clone_list(ns, loop, &loop_body, &unrolled_lp_body,
>> > > remap_table);
>> > > +
>> > > +      update_remap_tables(i == 0, remap_table, phi_remap,
>> > > src_before_loop,
>> > > +                          src_after_loop);
>> > > +
>> > > +      /* Insert unrolled loop body before the loop */
>> > > +      b->cursor = nir_before_cf_node(&loop->cf_node);
>> > > +      nir_cf_reinsert(&unrolled_lp_body, b->cursor);
>> > > +
>> > > +      /* Clone loop header */
>> > > +      clone_list(ns, loop, &lp_header, &cloned_header,
>> > > remap_table);
>> > > +
>> > > +      /* Insert loop header after loop body */
>> > > +      b->cursor = nir_before_cf_node(&loop->cf_node);
>> > > +      nir_cf_reinsert(&cloned_header, b->cursor);
>> > > +   }
>> > > +
>> > > +   /* The loop has been unrolled so remove it. */
>> > > +   simple_loop_fix_lcssa_phis(&loop->cf_node, remap_table);
>> > > +
>> > > +   /* Remove the loop */
>> > > +   nir_cf_node_remove(&loop->cf_node);
>> > > +
>> > > +   /* Delete the original loop body & header */
>> > > +   nir_cf_delete(&lp_header);
>> > > +   nir_cf_delete(&loop_body);
>> > > +
>> > > +   _mesa_hash_table_destroy(remap_table, NULL);
>> > > +   _mesa_hash_table_destroy(phi_remap, NULL);
>> > > +   _mesa_hash_table_destroy(src_before_loop, NULL);
>> > > +   _mesa_hash_table_destroy(src_after_loop, NULL);
>> >
>> > I think I would be mildly happier if you used a ralloc context rather
>> > than destroying them all manually.
>>
>> Yeah that would have saved a few crashes when writing the complex
>> unroll code. Will do.
>>
>> >
>> > > +}
>> > > +
>> > > +/**
>> > > + * Unroll a loop with two exists when the trip count of one of the
>> > > exits is
>> > > + * unknown.  If continue_from_then_branch is true, the loop is
>> > > repeated only
>> > > + * when the "then" branch of the if is taken; otherwise it is
>> > > repeated only
>> > > + * when the "else" branch of the if is taken.
>> > > + *
>> > > + * For example, if the input is:
>> > > + *
>> > > + *     (loop (...)
>> > > + *      ...body...
>> > > + *      (if (cond)
>> > > + *          (...then_instrs...)
>> > > + *        (...else_instrs...)))
>> > > + *
>> > > + * And the iteration count is 3, and \c continue_from_then_branch
>> > > is true,
>> > > + * then the output will be:
>> > > + *
>> > > + *     ...body...
>> > > + *     (if (cond)
>> > > + *         (...then_instrs...
>> > > + *          ...body...
>> > > + *          (if (cond)
>> > > + *              (...then_instrs...
>> > > + *               ...body...
>> > > + *               (if (cond)
>> > > + *                   (...then_instrs...)
>> > > + *                 (...else_instrs...)))
>> > > + *            (...else_instrs...)))
>> > > + *       (...else_instrs))
>> > > + */
>> > > +static void
>> > > +complex_unroll(nir_function *fn, nir_loop *loop, nir_builder *b,
>> > > +               nir_cf_node *if_node, nir_cf_node *last_node,
>> > > +               bool continue_from_then_branch, bool
>> > > limiting_term_second)
>> > > +{
>> > > +   nir_cf_node *limiting_trm = &loop->info->limiting_terminator-
>> > > >nif->cf_node;
>> > > +   nir_cf_node *lp_header_cf_node = nir_loop_first_cf_node(loop);
>> > > +   nir_block *loop_header_blk =
>> > > nir_cf_node_as_block(lp_header_cf_node);
>> > > +
>> > > +   struct hash_table *remap_table;
>> > > +   struct hash_table *phi_remap;
>> > > +   struct hash_table *src_before_loop;
>> > > +   struct hash_table *src_after_loop;
>> > > +   create_remap_tables(loop, loop_header_blk, &remap_table,
>> > > &phi_remap,
>> > > +                       &src_before_loop, &src_after_loop);
>> > > +
>> > > +   struct hash_table *loop_phis;
>> > > +   struct hash_table *loop_term_phis;
>> > > +   get_table_of_lcssa_and_loop_term_phis(&loop->cf_node,
>> > > &loop_phis,
>> > > +                                         &loop_term_phis,
>> > > +                                         loop->info-
>> > > >limiting_terminator->nif);
>> > > +
>> > > +   if (limiting_term_second) {
>> > > +      /* We need some special handling when its the second
>> > > terminator causing
>> > > +       * us to exit the loop for example:
>> > > +       *
>> > > +       *   for (int i = 0; i < uniform_lp_count; i++) {
>> > > +       *      colour = vec4(0.0, 1.0, 0.0, 1.0);
>> > > +       *
>> > > +       *      if (i == 1)
>> > > +       *         break;
>> > > +       *      }
>> > > +       *      ... any further code is unreachable after i == 1 ...
>> > > +       *   }
>> > > +       *
>> > > +       * Bump the trip count by one so we actually clone
>> > > something. Also
>> > > +       * extract everything after the limiting terminator and
>> > > insert it into
>> > > +       * the branch we will continue from.
>> > > +       */
>> > > +      loop->info->trip_count++;
>> > > +
>> > > +      nir_cf_list after_lt;
>> > > +      extract_loop_body(&after_lt,
>> > > nir_cf_node_next(limiting_trm));
>> > > +
>> > > +      nir_if *if_stmt = loop->info->limiting_terminator->nif;
>> > > +      nir_cf_node *last_then = nir_if_last_then_node(if_stmt);
>> > > +      if (last_then->type == nir_cf_node_block &&
>> > > +          ends_in_break(nir_cf_node_as_block(last_then))) {
>> > > +         move_cf_list_into_if(&after_lt, limiting_trm, last_then,
>> > > false);
>> > > +      } else {
>> > > +         nir_cf_node *last_else = nir_if_last_else_node(if_stmt);
>> > > +         if (last_else->type == nir_cf_node_block &&
>> > > +             ends_in_break(nir_cf_node_as_block(last_else))) {
>> > > +            move_cf_list_into_if(&after_lt, limiting_trm,
>> > > last_else, true);
>> > > +         }
>> > > +      }
>> > > +   } else {
>> > > +      /* Remove the limiting terminator.  Loop analysis will only
>> > > find a
>> > > +       * terminator for trival if statments (then only contains
>> > > break, else
>> > > +       * is empty) so its safe to remove the whole thing.
>> > > +       */
>> > > +      nir_cf_node_remove(limiting_trm);
>> > > +   }
>> > > +
>> > > +   nir_shader *ns = fn->shader;
>> > > +   struct hash_table *lcssa_phis =
>> > > +      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>> > > +                              _mesa_key_pointer_equal);
>> > > +
>> > > +   /* Create phis to be used post-if (replacements for the post-
>> > > loop phis) */
>> > > +   struct hash_entry *phi_hte;
>> > > +   hash_table_foreach(loop_phis, phi_hte) {
>> > > +      nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte->key;
>> > > +      nir_phi_instr *new_phi = create_complex_unroll_phi(ns,
>> > > phi_instr);
>> > > +
>> > > +      nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data;
>> > > +      _mesa_hash_table_insert(lcssa_phis, new_phi, ssa_def);
>> > > +
>> > > +      /* Update loop_phis to point to the replacement phi */
>> > > +      phi_hte->data = &new_phi->dest.ssa;
>> > > +
>> > > +      struct hash_entry *loop_term_hte =
>> > > +         _mesa_hash_table_search(loop_term_phis, phi_hte->key);
>> > > +      if (loop_term_hte) {
>> > > +         _mesa_hash_table_insert(loop_term_phis, new_phi,
>> > > loop_term_hte->data);
>> > > +         _mesa_hash_table_remove(loop_term_phis, loop_term_hte);
>> > > +      }
>> > > +   }
>> > > +
>> > > +   /* Move everything after the terminator we don't have a trip
>> > > count for
>> > > +    * inside the if.
>> > > +    */
>> > > +   nir_cf_list loop_end;
>> > > +   extract_loop_body(&loop_end, nir_cf_node_next(if_node));
>> > > +   nir_if *if_stmt = nir_cf_node_as_if(if_node);
>> > > +   move_cf_list_into_if(&loop_end, if_node, last_node,
>> > > +                        continue_from_then_branch);
>> > > +
>> > > +   /* Pluck out the loop body. Unlike the simple unroll pass there
>> > > are no
>> > > +    * breaks remaining in the loop so we do not have the concept
>> > > of a loop
>> > > +    * header and a loop body, instead we just extract everything.
>> > > +    */
>> > > +   nir_cf_list loop_body;
>> > > +   extract_loop_body(&loop_body, lp_header_cf_node);
>> > > +
>> > > +   /* Create temp block to store the cloned loop body as we unroll
>> > > */
>> > > +   nir_cf_list unrolled_lp_body;
>> > > +   exec_list_make_empty(&unrolled_lp_body.list);
>> > > +   unrolled_lp_body.impl = loop_body.impl;
>> > > +
>> > > +   /* Set the cursor to before the loop */
>> > > +   b->cursor = nir_before_cf_node(&loop->cf_node);
>> > > +
>> > > +   nir_cf_node *continue_from_node = NULL;
>> > > +   for (unsigned i = 0; i < loop->info->trip_count; i++) {
>> > > +      /* Clone loop body */
>> > > +      clone_list(ns, loop, &loop_body, &unrolled_lp_body,
>> > > remap_table);
>> > > +
>> > > +      nir_cf_node *last_node =
>> > > +         exec_node_data(nir_cf_node,
>> > > +
>> > > exec_list_get_tail(&unrolled_lp_body.list), node);
>> > > +      assert(last_node->type == nir_cf_node_block &&
>> > > +             exec_list_is_empty(&nir_cf_node_as_block(last_node)-
>> > > >instr_list));
>> > > +
>> > > +      /* Insert unrolled loop body */
>> > > +      nir_cf_reinsert(&unrolled_lp_body, b->cursor);
>> > > +
>> > > +      nir_cf_node *if_node = nir_cf_node_prev(last_node);
>> > > +      assert(if_node->type == nir_cf_node_if);
>> > > +      if_stmt = nir_cf_node_as_if(if_node);
>> > > +
>> > > +      nir_cf_node *exit_from_node;
>> > > +      if (continue_from_then_branch) {
>> > > +         continue_from_node = nir_if_last_then_node(if_stmt);
>> > > +         exit_from_node = nir_if_last_else_node(if_stmt);
>> > > +      } else {
>> > > +         exit_from_node = nir_if_last_then_node(if_stmt);
>> > > +         continue_from_node = nir_if_last_else_node(if_stmt);
>> > > +      }
>> > > +
>> > > +      b->cursor = nir_after_cf_node(if_node);
>> > > +      if (i < loop->info->trip_count - 1) {
>> > > +         struct hash_table *tmp =
>> > > +            _mesa_hash_table_create(NULL, _mesa_hash_pointer,
>> > > +                                    _mesa_key_pointer_equal);
>> > > +
>> > > +         struct hash_entry *phi_hte;
>> > > +         hash_table_foreach(lcssa_phis, phi_hte) {
>> > > +            /* Insert phi created in previous iteration */
>> > > +            nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte-
>> > > >key;
>> > > +            insert_phi_and_set_block_on_uses(b, phi_instr);
>> > > +
>> > > +            nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data;
>> > > +            add_complex_unroll_phi_src(ssa_def, phi_instr,
>> > > remap_table,
>> > > +
>> > >  nir_cf_node_as_block(exit_from_node));
>> > > +
>> > > +            /* Create phi to be fixed up by next iteration */
>> > > +            nir_phi_instr *new_phi = create_complex_unroll_phi(ns,
>> > > phi_instr);
>> > > +            _mesa_hash_table_insert(tmp, new_phi, ssa_def);
>> > > +
>> > > +            struct hash_entry *loop_term_hte =
>> > > +               _mesa_hash_table_search(loop_term_phis, phi_hte-
>> > > >key);
>> > > +            if (loop_term_hte) {
>> > > +               _mesa_hash_table_insert(loop_term_phis, new_phi,
>> > > +                                       loop_term_hte->data);
>> > > +               _mesa_hash_table_remove(loop_term_phis,
>> > > loop_term_hte);
>> > > +            }
>> > > +         }
>> > > +
>> > > +         /* Now that the phis have been processed replace the
>> > > table with the
>> > > +          * phis to be fixed up in the next iteration.
>> > > +          */
>> > > +         _mesa_hash_table_destroy(lcssa_phis, NULL);
>> > > +         lcssa_phis = tmp;
>> > > +      } else {
>> > > +         struct hash_entry *phi_hte;
>> > > +         hash_table_foreach(lcssa_phis, phi_hte) {
>> > > +            /* Insert phi created in previous iteration */
>> > > +            nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte-
>> > > >key;
>> > > +            insert_phi_and_set_block_on_uses(b, phi_instr);
>> > > +
>> > > +            nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data;
>> > > +            add_complex_unroll_phi_src(ssa_def, phi_instr,
>> > > remap_table,
>> > > +
>> > >  nir_cf_node_as_block(exit_from_node));
>> > > +         }
>> > > +      }
>> > > +
>> > > +      /* Ready the remap tables for the next iteration */
>> > > +      update_remap_tables(i == 0, remap_table, phi_remap,
>> > > src_before_loop,
>> > > +                          src_after_loop);
>> > > +
>> > > +      /* Set the cursor to the last if in the loop body we just
>> > > unrolled ready
>> > > +       * for the next iteration.
>> > > +       */
>> > > +      b->cursor = nir_after_cf_node(continue_from_node);
>> > > +   }
>> > > +
>> > > +   /* Now that the remap table is updated add the second src to
>> > > the innermost
>> > > +    * phis.
>> > > +    */
>> > > +   hash_table_foreach(lcssa_phis, phi_hte) {
>> > > +      nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte->key;
>> > > +      nir_ssa_def *phi_src = (nir_ssa_def *) phi_hte->data;
>> > > +
>> > > +      assert(exec_list_length(&phi_instr->srcs) == 1);
>> > > +
>> > > +      /* Get the src for when exiting by the loop terminator */
>> > > +      struct hash_entry *loop_term_hte =
>> > > +         _mesa_hash_table_search(loop_term_phis, phi_instr);
>> > > +      if (loop_term_hte)
>> > > +         phi_src = (nir_ssa_def *) loop_term_hte->data;
>> > > +
>> > > +      add_complex_unroll_phi_src(phi_src, phi_instr, remap_table,
>> > > +
>> > >  nir_cf_node_as_block(continue_from_node));
>> > > +   }
>> > > +
>> > > +   /* Rewrite the uses of the old loop phis */
>> > > +   hash_table_foreach(loop_phis, phi_hte) {
>> > > +      nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte->key;
>> > > +
>> > > +      nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) {
>> > > +         nir_src new_src = nir_src_for_ssa((nir_ssa_def *)
>> > > phi_hte->data);
>> > > +         nir_instr_rewrite_src(use_src->parent_instr, use_src,
>> > > new_src);
>> > > +      }
>> > > +
>> > > +      nir_foreach_if_use_safe(use_src, &phi_instr->dest.ssa) {
>> > > +         nir_src new_src = nir_src_for_ssa((nir_ssa_def *)
>> > > phi_hte->data);
>> > > +         nir_if_rewrite_condition(use_src->parent_if, new_src);
>> > > +      }
>> > > +   }
>> > > +
>> > > +   /* The loop has been unrolled so remove it. */
>> > > +   nir_cf_node_remove(&loop->cf_node);
>> > > +
>> > > +   /* Delete the original loop body */
>> > > +   nir_cf_delete(&loop_body);
>> > > +
>> > > +   _mesa_hash_table_destroy(loop_phis, NULL);
>> > > +   _mesa_hash_table_destroy(loop_term_phis, NULL);
>> > > +   _mesa_hash_table_destroy(lcssa_phis, NULL);
>> > > +   _mesa_hash_table_destroy(remap_table, NULL);
>> > > +   _mesa_hash_table_destroy(phi_remap, NULL);
>> > > +   _mesa_hash_table_destroy(src_before_loop, NULL);
>> > > +   _mesa_hash_table_destroy(src_after_loop, NULL);
>> >
>> > ralloc context?
>> >
>> > > +}
>> > > +
>> > > +static bool
>> > > +process_loops(nir_cf_node *cf_node, nir_builder *b, bool
>> > > *innermost_loop)
>> > > +{
>> > > +   bool progress = false;
>> > > +   nir_loop *loop;
>> > > +
>> > > +   switch (cf_node->type) {
>> > > +   case nir_cf_node_block:
>> > > +      return progress;
>> > > +   case nir_cf_node_if: {
>> > > +      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
>> > > +      foreach_list_typed_safe(nir_cf_node, nested_node, node,
>> > > &if_stmt->then_list)
>> > > +         progress |= process_loops(nested_node, b,
>> > > innermost_loop);
>> > > +      foreach_list_typed_safe(nir_cf_node, nested_node, node,
>> > > &if_stmt->else_list)
>> > > +         progress |= process_loops(nested_node, b,
>> > > innermost_loop);
>> > > +      return progress;
>> > > +   }
>> > > +   case nir_cf_node_loop: {
>> > > +      loop = nir_cf_node_as_loop(cf_node);
>> > > +      foreach_list_typed_safe(nir_cf_node, nested_node, node,
>> > > &loop->body)
>> > > +         progress |= process_loops(nested_node, b,
>> > > innermost_loop);
>> > > +      break;
>> > > +   }
>> > > +   default:
>> > > +      unreachable("unknown cf node type");
>> > > +   }
>> > > +
>> > > +   if (*innermost_loop) {
>> > > +      nir_function *fn = nir_cf_node_get_function(&loop->cf_node)-
>> > > >function;
>> > > +
>> > > +      /* Don't attempt to unroll outer loops or a second inner
>> > > loop in
>> > > +       * this pass wait until the next pass as we have altered the
>> > > cf.
>> > > +       */
>> > > +      *innermost_loop = false;
>> > > +
>> > > +      if (loop->info->limiting_terminator == NULL) {
>> > > +         return progress;
>> > > +      }
>> > > +
>> > > +      if (is_simple_loop(fn->shader, loop->info)) {
>> > > +         simple_unroll(fn, loop, b);
>> > > +         progress = true;
>> > > +      } else {
>> > > +         /* Attempt to unroll loops with two terminators. */
>> > > +         if (is_complex_loop(fn->shader, loop->info)) {
>> > > +            bool first_terminator = true;
>> > > +            list_for_each_entry(nir_loop_terminator, terminator,
>> > > +                                &loop->info->loop_terminator_list,
>> > > +                                loop_terminator_link) {
>> > > +
>> > > +               nir_cf_node *if_node = &terminator->nif->cf_node;
>> > > +
>> > > +               if (if_node == &loop->info->limiting_terminator-
>> > > >nif->cf_node) {
>> > > +                  first_terminator = false;
>> > > +                  continue;
>> > > +               }
>> > > +
>> > > +               /* If the first terminator has a trip count of zero
>> > > just do a
>> > > +                * simple unroll as the second terminator can never
>> > > be reached.
>> > > +                */
>> > > +               if (loop->info->trip_count == 0 &&
>> > > first_terminator) {
>> > > +                  simple_unroll(fn, loop, b);
>> > > +                  progress = true;
>> > > +                  break;
>> > > +               }
>> > > +
>> > > +               nir_if *if_stmt = nir_cf_node_as_if(if_node);
>> > > +
>> > > +               /* Determine which if-statement branch, if any,
>> > > ends with a
>> > > +                * break. Note that since predicted_num_loop_jumps
>> > > == 1, it is
>> > > +                * impossible for both branches to end with a
>> > > break.
>> > > +                */
>> > > +               nir_cf_node *last_then =
>> > > nir_if_last_then_node(if_stmt);
>> > > +               if (last_then->type == nir_cf_node_block &&
>> > > +                   ends_in_break(nir_cf_node_as_block(last_then)))
>> > > {
>> > > +
>> > > +                  complex_unroll(fn, loop, b, if_node, last_then,
>> > > false,
>> > > +                                 !first_terminator);
>> > > +
>> > > +                  progress = true;
>> > > +                  break;
>> > > +               } else {
>> > > +                  nir_cf_node *last_else =
>> > > nir_if_last_else_node(if_stmt);
>> > > +                  if (last_else->type == nir_cf_node_block &&
>> > > +
>> > > ends_in_break(nir_cf_node_as_block(last_else))) {
>> > > +
>> > > +                     complex_unroll(fn, loop, b, if_node,
>> > > last_else, true,
>> > > +                                    !first_terminator);
>> > > +
>> > > +                     progress = true;
>> > > +                     break;
>> > > +                  }
>> > > +               }
>> > > +            }
>> > > +         }
>> > > +      }
>> > > +   }
>> > > +
>> > > +   return progress;
>> > > +}
>> > > +
>> > > +static bool
>> > > +nir_opt_loop_unroll_impl(nir_function_impl *impl,
>> > > +                         nir_variable_mode indirect_mask)
>> > > +{
>> > > +   bool progress = false;
>> > > +   nir_metadata_require(impl, nir_metadata_loop_analysis,
>> > > indirect_mask);
>> > > +
>> > > +   nir_builder b;
>> > > +   nir_builder_init(&b, impl);
>> > > +
>> > > +   foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
>> > > +      bool innermost_loop = true;
>> > > +      progress |= process_loops(node, &b, &innermost_loop);
>> > > +   }
>> > > +
>> > > +   return progress;
>> > > +}
>> > > +
>> > > +bool
>> > > +nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode
>> > > indirect_mask)
>> > > +{
>> > > +   bool progress = false;
>> > > +
>> > > +   nir_foreach_function(function, shader) {
>> > > +      if (function->impl) {
>> > > +         progress |= nir_opt_loop_unroll_impl(function->impl,
>> > > indirect_mask);
>> > > +      }
>> > > +   }
>> > > +   return false;
>> > > +}
>> > > --
>> > > 2.7.4
>> > >
>> > > _______________________________________________
>> > > mesa-dev mailing list
>> > > mesa-dev at lists.freedesktop.org
>> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> > >
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161006/987d23f9/attachment-0001.html>


More information about the mesa-dev mailing list