[Mesa-dev] [PATCH 08/10] nir: add a loop unrolling pass

Timothy Arceri timothy.arceri at collabora.com
Thu Oct 6 23:07:17 UTC 2016


On Thu, 2016-10-06 at 10:33 -0700, Jason Ekstrand wrote:
> > 
> > On Wed, Oct 5, 2016 at 7:25 PM, Timothy Arceri <timothy.arceri at coll
> > abora.com> wrote:
> > > Just 
> > > 
> > > On Wed, 2016-10-05 at 16:23 -0700, Jason Ekstrand wrote:
> > > >
> > > >
> > > > On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceri <timothy.arcer
> > > i at coll
> > > > abora.com> wrote:
> > > > > V2:
> > > > > - tidy ups suggested by Connor.
> > > > > - tidy up cloning logic and handle copy propagation
> > > > >  based of suggestion by Connor.
> > > > > - use nir_ssa_def_rewrite_uses to fix up lcssa phis
> > > > >   suggested by Connor.
> > > > > - add support for complex loop unrolling (two terminators)
> > > > > - handle case were the ssa defs use outside the loop is
> > > already a
> > > > > phi
> > > > > - support unrolling loops with multiple terminators when trip
> > > count
> > > > >   is know for each terminator
> > > > > ---
> > > > >  src/compiler/Makefile.sources          |   1 +
> > > > >  src/compiler/nir/nir.h                 |   2 +
> > > > >  src/compiler/nir/nir_opt_loop_unroll.c | 820
> > > > > +++++++++++++++++++++++++++++++++
> > > > >  3 files changed, 823 insertions(+)
> > > > >  create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c
> > > > >
> > > > > diff --git a/src/compiler/Makefile.sources
> > > > > b/src/compiler/Makefile.sources
> > > > > index 8ef6080..b3512bb 100644
> > > > > --- a/src/compiler/Makefile.sources
> > > > > +++ b/src/compiler/Makefile.sources
> > > > > @@ -233,6 +233,7 @@ NIR_FILES = \
> > > > >         nir/nir_opt_dead_cf.c \
> > > > >         nir/nir_opt_gcm.c \
> > > > >         nir/nir_opt_global_to_local.c \
> > > > > +       nir/nir_opt_loop_unroll.c \
> > > > >         nir/nir_opt_peephole_select.c \
> > > > >         nir/nir_opt_remove_phis.c \
> > > > >         nir/nir_opt_undef.c \
> > > > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > > > > index 9887432..0513d81 100644
> > > > > --- a/src/compiler/nir/nir.h
> > > > > +++ b/src/compiler/nir/nir.h
> > > > > @@ -2661,6 +2661,8 @@ bool nir_opt_dead_cf(nir_shader
> > > *shader);
> > > > >
> > > > >  bool nir_opt_gcm(nir_shader *shader, bool value_number);
> > > > >
> > > > > +bool nir_opt_loop_unroll(nir_shader *shader,
> > > nir_variable_mode
> > > > > indirect_mask);
> > > > > +
> > > > >  bool nir_opt_peephole_select(nir_shader *shader);
> > > > >
> > > > >  bool nir_opt_remove_phis(nir_shader *shader);
> > > > > diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
> > > > > b/src/compiler/nir/nir_opt_loop_unroll.c
> > > > > new file mode 100644
> > > > > index 0000000..1de02f6
> > > > > --- /dev/null
> > > > > +++ b/src/compiler/nir/nir_opt_loop_unroll.c
> > > > > @@ -0,0 +1,820 @@
> > > > > +/*
> > > > > + * Copyright © 2016 Intel Corporation
> > > > > + *
> > > > > + * Permission is hereby granted, free of charge, to any
> > > person
> > > > > obtaining a
> > > > > + * copy of this software and associated documentation files
> > > (the
> > > > > "Software"),
> > > > > + * to deal in the Software without restriction, including
> > > without
> > > > > limitation
> > > > > + * the rights to use, copy, modify, merge, publish,
> > > distribute,
> > > > > sublicense,
> > > > > + * and/or sell copies of the Software, and to permit persons
> > > to
> > > > > whom the
> > > > > + * Software is furnished to do so, subject to the following
> > > > > conditions:
> > > > > + *
> > > > > + * The above copyright notice and this permission notice
> > > > > (including the next
> > > > > + * paragraph) shall be included in all copies or substantial
> > > > > portions of the
> > > > > + * Software.
> > > > > + *
> > > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > > KIND,
> > > > > EXPRESS OR
> > > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > > > > MERCHANTABILITY,
> > > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN
> > > NO
> > > > > EVENT SHALL
> > > > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > > > > DAMAGES OR OTHER
> > > > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> > > OTHERWISE,
> > > > > ARISING
> > > > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> > > USE OR
> > > > > OTHER
> > > > > + * DEALINGS IN THE SOFTWARE.
> > > > > + */
> > > > > +
> > > > > +#include "nir.h"
> > > > > +#include "nir_builder.h"
> > > > > +#include "nir_control_flow.h"
> > > > > +
> > > > > +static void
> > > > > +extract_loop_body(nir_cf_list *extracted, nir_cf_node *node)
> > > >
> > > > "node" is not particularly descriptive.  Perhaps "start_node"
> > > or
> > > > something like that.
> > > >  
> > > > > +{
> > > > > +   nir_cf_node *end = node;
> > > > > +   while (!nir_cf_node_is_last(end))
> > > > > +      end = nir_cf_node_next(end);
> > > >
> > > > This bit of iteration seems unfortunate.  If you have the loop
> > > > pointer, you can just do
> > > >
> > > > nir_cf_extract(extracted, nir_before_cf_node(node),
> > > > nir_after_cf_node(nir_loop_last_cf_node(loop))
> > > >
> > > > For that matter, is the helper even needed?  If you don't want
> > > to
> > > > type that much and want to keep the helper, you could easily
> > > get the
> > > > loop from node->parent.
> > > 
> > > Sure this was one of the first bits I wrote before finding the
> > > nir
> > > helpers. Will see if I can just remove it.
> > > 
> > > >  
> > > > > +
> > > > > +   nir_cf_extract(extracted, nir_before_cf_node(node),
> > > > > +                  nir_after_cf_node(end));
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +clone_list(nir_shader *ns, nir_loop *loop, nir_cf_list
> > > > > *src_cf_list,
> > > > > +           nir_cf_list *cloned_cf_list, struct hash_table
> > > > > *remap_table)
> > > > > +{
> > > > > +   /* Dest list needs to at least have one block */
> > > > > +   nir_block *nblk = nir_block_create(ns);
> > > > > +   nblk->cf_node.parent = loop->cf_node.parent;
> > > > > +   exec_list_push_tail(&cloned_cf_list->list, &nblk-
> > > > > >cf_node.node);
> > > >
> > > > Do you always want to do this or should it be guarded by "if
> > > > (exec_list_empty(&src_cf_list->list))"?
> > > 
> > > I think I was relying on the clone to do that check but makes
> > > sense to
> > > do it here. Will add.
> > > 
> > > >  
> > > > > +
> > > > > +   nir_clone_loop_list(&cloned_cf_list->list, &src_cf_list-
> > > >list,
> > > > > +                       remap_table, ns);
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +move_cf_list_into_if(nir_cf_list *lst, nir_cf_node *if_node,
> > > > > +                     nir_cf_node *last_node, bool
> > > > > continue_from_then_branch)
> > > > > +{
> > > > > +   nir_if *if_stmt = nir_cf_node_as_if(if_node);
> > > > > +   if (continue_from_then_branch) {
> > > > > +      /* Move the rest of the loop inside the then */
> > > > > +      nir_cf_reinsert(lst,
> > > > > nir_after_cf_node(nir_if_last_then_node(if_stmt)));
> > > > > +   } else {
> > > > > +      /* Move the rest of the loop inside the else */
> > > > > +      nir_cf_reinsert(lst,
> > > > > nir_after_cf_node(nir_if_last_else_node(if_stmt)));
> > > > > +   }
> > > > > +
> > > > > +   /* Remove the break */
> > > > > + 
> > > > >
> > >  nir_instr_remove(nir_block_last_instr(nir_cf_node_as_block(last_
> > > no
> > > > > de)));
> > > > > +}
> > > > > +
> > > > > +static bool
> > > > > +is_phi_src_phi_from_loop_header(nir_ssa_def *def,
> > > nir_ssa_def
> > > > > *src)
> > > > > +{
> > > > > +   return def->parent_instr->type == nir_instr_type_phi &&
> > > > > +      src->parent_instr->type == nir_instr_type_phi &&
> > > > > +      nir_instr_as_phi(def->parent_instr)->instr.block-
> > > >index ==
> > > > > +      nir_instr_as_phi(src->parent_instr)->instr.block-
> > > >index;
> > > >
> > > > This last check can be shortened to "def->parent_instr->block
> > > == src-
> > > > >parent_instr->block".  Converting to a phi and then doing
> > > ->instr is
> > > > just redundant.  Also, I'd rather compre block pointers than
> > > indices
> > > > if we can.
> > > 
> > > Sure.
> > > 
> > > >
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +get_table_of_lcssa_and_loop_term_phis(nir_cf_node *loop,
> > > > > +                                      struct hash_table
> > > > > **lcssa_phis,
> > > > > +                                      struct hash_table
> > > > > **loop_term_phis,
> > > > > +                                      nir_if *loop_term_if)
> > > > > +{
> > > > > +   *lcssa_phis = _mesa_hash_table_create(NULL,
> > > > > _mesa_hash_pointer, 
> > > > > +                                       
> > >  _mesa_key_pointer_equal);
> > > > > +   *loop_term_phis = _mesa_hash_table_create(NULL,
> > > > > _mesa_hash_pointer,
> > > > > +                                           
> > > > >  _mesa_key_pointer_equal);
> > > > > +
> > > > > +   nir_cf_node *cf_node = nir_cf_node_next(loop);
> > > > > +   nir_block *block = nir_cf_node_as_block(cf_node);
> > > > > +   nir_foreach_instr(instr, block) {
> > > > > +      if (instr->type == nir_instr_type_phi) {
> > > > > +         nir_phi_instr *phi = nir_instr_as_phi(instr);
> > > > > +
> > > > > +         nir_foreach_phi_src(src, phi) {
> > > > > +            nir_block *then_blk =
> > > > > +             
> > > > >  nir_cf_node_as_block(nir_if_last_then_node(loop_term_if));
> > > > > +            nir_block *else_blk =
> > > > > +             
> > > > >  nir_cf_node_as_block(nir_if_last_else_node(loop_term_if));
> > > > > +
> > > > > +            if (src->pred == then_blk || src->pred ==
> > > else_blk) {
> > > > > +               _mesa_hash_table_insert(*loop_term_phis, phi,
> > > src-
> > > > > >src.ssa);
> > > > > +            } else {
> > > > > +               _mesa_hash_table_insert(*lcssa_phis, phi,
> > > src-
> > > > > >src.ssa);
> > > > > +            }
> > > > > +         }
> > > > > +      } else {
> > > > > +         /* There should be no more phis */
> > > > > +         break;
> > > > > +      }
> > > > > +   }
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +create_remap_tables(nir_loop *loop, nir_block
> > > *loop_header_blk,
> > > > > +                    struct hash_table **remap_table,
> > > > > +                    struct hash_table **phi_remap,
> > > > > +                    struct hash_table **src_before_loop,
> > > > > +                    struct hash_table **src_after_loop)
> > > > > +{
> > > > > +   *remap_table = _mesa_hash_table_create(NULL,
> > > > > _mesa_hash_pointer,
> > > > > +                                         
> > > > > _mesa_key_pointer_equal);
> > > > > +   *phi_remap = _mesa_hash_table_create(NULL,
> > > _mesa_hash_pointer,
> > > > > +                                       
> > > _mesa_key_pointer_equal);
> > > > > +   *src_before_loop = _mesa_hash_table_create(NULL,
> > > > > _mesa_hash_pointer,
> > > > > +                                             
> > > > > _mesa_key_pointer_equal);
> > > > > +   *src_after_loop = _mesa_hash_table_create(NULL,
> > > > > _mesa_hash_pointer,
> > > > > +                                           
> > > > >  _mesa_key_pointer_equal);
> > > > > +
> > > > > +   /* Build hash tables used for remapping as we unroll. */
> > > > > +   nir_foreach_instr(instr, loop_header_blk) {
> > > > > +      if (instr->type != nir_instr_type_phi)
> > > > > +         break;
> > > > > +
> > > > > +      nir_phi_instr *phi = nir_instr_as_phi(instr);
> > > > > +      nir_foreach_phi_src(src, phi) {
> > > > > +         /* Is the pred from the block itself? */
> > > >
> > > > I'm not sure what this comment is trying to say.  The logic
> > > below
> > > > determines whether or not the predecessor is from some block
> > > inside
> > > > the loop but not the header block.
> > > 
> > > I'll update this to make a bit more sense.
> > > 
> > > >  
> > > > > +         if (src->pred->index > phi->instr.block->index &&
> > > > > +             src->pred->cf_node.parent == &loop->cf_node) {
> > > > > +
> > > > > +            _mesa_hash_table_insert(*phi_remap, &phi-
> > > >dest.ssa,
> > > > > src->src.ssa);
> > > > > +            _mesa_hash_table_insert(*src_after_loop, &phi-
> > > > > >dest.ssa,
> > > > > +                                    src->src.ssa);
> > > > > +         } else {
> > > >
> > > > Perhaps assert(src->pred == start_bloc || src->pred ==
> > > > block_before_loop)
> > > 
> > > Sure.
> > > 
> > > >  
> > > > > +            _mesa_hash_table_insert(*remap_table, &phi-
> > > >dest.ssa,
> > > > > +                                    src->src.ssa);
> > > > > +            _mesa_hash_table_insert(*src_before_loop, &phi-
> > > > > >dest.ssa,
> > > > > +                                    src->src.ssa);
> > > > > +         }
> > > > > +      }
> > > > > +   }
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +update_remap_tables(bool is_first_iteration, struct
> > > hash_table
> > > > > *remap_table,
> > > > > +                    struct hash_table *phi_remap,
> > > > > +                    struct hash_table *src_before_loop,
> > > > > +                    struct hash_table *src_after_loop)
> > > > > +{
> > > > > +   struct hash_entry *phi_hte;
> > > > > +   hash_table_foreach(phi_remap, phi_hte) {
> > > > > +      struct hash_entry *remap_hte =
> > > > > +         _mesa_hash_table_search(remap_table, phi_hte-
> > > >data);
> > > > > +
> > > > > +      nir_ssa_def *phi_def = (nir_ssa_def *) phi_hte->key;
> > > > > +      nir_ssa_def *phi_src = (nir_ssa_def *) phi_hte->data;
> > > > > +
> > > > > +      if (!remap_hte && is_first_iteration) {
> > > > > +         _mesa_hash_table_insert(remap_table, phi_hte->key,
> > > > > phi_hte->data);
> > > > > +         continue;
> > > > > +      }
> > > > > +
> > > > > +      if (is_phi_src_phi_from_loop_header(phi_def, phi_src))
> > > {
> > > > > +          /* After copy propagation we can end up with phis
> > > inside
> > > > > loops
> > > > > +           * that look like this:
> > > > > +           *
> > > > > +           *    vec1 32 ssa_14 = phi block_0: ssa_9,
> > > block_4:
> > > > > ssa_13
> > > > > +           *    vec1 32 ssa_13 = phi block_0: ssa_8,
> > > block_4:
> > > > > ssa_12
> > > > > +           *    vec1 32 ssa_12 = phi block_0: ssa_7,
> > > block_4:
> > > > > ssa_11
> > > > > +           *    vec1 32 ssa_11 = phi block_0: ssa_6,
> > > block_4:
> > > > > ssa_14
> > > > > +           *
> > > > > +           * For each iteration of the loop we need to
> > > update the
> > > > > phi and
> > > > > +           * cloning remap tables so that we use the correct
> > > src
> > > > > for the
> > > > > +           * next iteration.
> > > > > +           */
> > > > > +          struct hash_entry *sbl_hte =
> > > > > +             _mesa_hash_table_search(src_before_loop,
> > > phi_hte-
> > > > > >data);
> > > > > +          _mesa_hash_table_insert(remap_table, phi_hte->key,
> > > > > sbl_hte->data);
> > > > > +
> > > > > +          struct hash_entry *sal_hte =
> > > > > +             _mesa_hash_table_search(src_after_loop,
> > > phi_hte-
> > > > > >data);
> > > > > +          phi_hte->data = sal_hte->data;
> > > > > +      } else if (remap_hte) {
> > > > > +          _mesa_hash_table_insert(remap_table, phi_hte->key,
> > > > > remap_hte->data);
> > > > > +      }
> > > >
> > > > Hrm... I understand the problem but I would have thought we
> > > could
> > > > have done it with fewer remap tables...  My naeve idea would be
> > > to do
> > > > as follows:
> > > >
> > > > In create_remap_tables, populate the remap table with phi_dst
> > > ->
> > > > phi_src where phi_src is the source with pred ==
> > > block_before_loop.
> > > >
> > > > In update_remap_tables, populate the remap table with phi_dst
> > > ->
> > > > remap(phi_src) where phi_src is the soure with pred ==
> > > continue_block
> > > >
> > > > Am I just over-simplifying things here?
> > > 
> > > I believe you are. The remap table is updated by the cloning
> > > process
> > > our job here is to simply make sure it is in the right state
> > > before we
> > > do the next clone. The key thing is that we are always cloning
> > > the
> > > original header/body (were are not cloning the previous clone).
> > 
> > Ah, that last point was a big part of what I was missing.
> >  
> > > So the above code is acting more as a reset, or initialisation of
> > > the
> > > remap table for the next iteration of the loop (although we want
> > > to
> > > keep what it already contains).
> > > 
> > > Essentially the code is doing what you are suggesting you are
> > > just
> > > leaving out the part where updating phi_src with the source from
> > > pred
> > > == continue_block only works for the first iteration. For the
> > > next
> > > iteration we use the value already in the remap table after the
> > > clone,
> > > or the case where there is no mov i.e no clone we need to set
> > > things up
> > > manually using the src_before_loop/src_after_loop hash tables.
> > > 
> > > Does this make sense? 
> > 
> > Yes, but I still think you could make it simpler and/or more
> > obvious if you did it in terms of the phi nodes themselves rather
> > than these tables that you generate from the phi nodes (which is
> > really just a less deterministic list of nodes).  Something like
> > this perhaps:
> > 
> > nir_foreach_instr(instr, header_block) {
> >    if (instr->type != nir_instr_type_phi)
> >       break;
> > 
> >    nir_phi_instr *phi = nir_instr_as_phi(instr);
> > 
> >    nir_foreach_phi_src(src, phi) {
> >       if (src->pred != continue_block)
> >          continue;
> > 
> >       struct hash_entry *src_remap = _mesa_hash_table_search(remap,
> > src->src.ssa);
> >       if (src_remap) {
> >          _mesa_hash_table_insert(remap, &phi->dst.ssa, src_remap-
> > >data);
> >       } else {
> >          _mesa_hash_table_insert(remap, &phi->dst.ssa, src-
> > >src.ssa);
> >       }
> >    }
> > }
> > 
> > That seems much more obvious than hash tables which are actually
> > lists.  Sure, you could argue that walking the sources of phis
> > looks inefficient but, given that this is the simple case, they're
> > guaranteed to have exactly 2 sources, so meh.
> > 
> > It's entirely possible that I'm still missing something...
> > 
> 
> And... I just realized what I'm missing... :-)  The phis may end up
> mapping to other phis whose entries in the remap table we've already
> mutated.  Really, we need to do it in two steps:  One to compute the
> set of remaps and then a second step to apply them.  So my algorithm
> would have to be modified as follows:
> 
> Add a phi_remap table and, in the above code, do the
> _mesa_hash_table_insert operations on the phi_remap.  Then, do
> 
> struct hash_entry *phi_entry;
> hash_table_foreach(phi_entry, phi_remap)
>    _mesa_hash_table_insert(remap, phi_entry->key, phi_entry->data);
> 
> to apply the remaps.  Still, I think that's more obvious than the 4
> tables approach.ir_foreach_phi_src(

I correct me if I'm wrong but I think your still missing a step. Here
you are only updating the remap_table. We also need to update phi_remap
because the original phis are only valid for the initial iteration. For
example if we have:

   vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_13
   vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_12
   vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_11
   vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_14

   ... do something with ssa_11 ...
   ... do something with ssa_12 ...
   ... do something with ssa_13 ...
   ... do something with ssa_14 ...

The following iterations would follow this pattern:

Iteration 1:              Iteration 2:

Remap table:              Remap table:

----------------------    ----------------------
| Key     | Value    |    | Key     | Value    |
----------------------    ----------------------
| ssa_14  | ssa_9    |    | ssa_14  | ssa_8    |
| ssa_13  | ssa_8    |    | ssa_13  | ssa_7    |
| ssa_12  | ssa_7    |    | ssa_12  | ssa_6    |
| ssa_11  | ssa_6    |    | ssa_11  | ssa_9    |
----------------------    ----------------------

Phi_remap table:         Phi_remap table:

----------------------    ----------------------
| Key     | Value    |
   | Key     | Value    |
----------------------    ------------------
----
| ssa_14  | ssa_13   |    | ssa_14  | ssa_12   |
| ssa_13  | ssa_12
  |    | ssa_13  | ssa_11   |
| ssa_12  | ssa_11   |    | ssa_12  |
ssa_14   |
| ssa_11  | ssa_14   |    | ssa_11  | ssa_13   |
-----------
-----------    ----------------------

Both tables are changing which is why we need the other two tables so
we can have something to refer back to, and use to update these tables
as we go.



> 
> --Jason
>  
> > > It would be nice if we could distill this into an easy to follow
> > > comment to put on the function itself, happy for your input to
> > > help get
> > > to something that not only I understand :)
> > > 
> > > How about something like:
> > > 
> > > /*
> > >  * The remap_table is updated by the cloning process, our job
> > > here is 
> > >  * to simply make sure it is in the right state before we do the
> > > next
> > >  * clone. The key thing is that we are always cloning the
> > > original 
> > >  * header/body (were are not cloning the previous clone).
> > >  *
> > >  * Since the cloning process is updating the remap_table we can
> > > think
> > >  * of this function as acting more as a reset, or initialisation
> > > of the
> > >  * remap table for the next iteration of the loop (although we
> > > want to
> > >  * keep what it already contains).
> > >  *
> > >  * After the first iteration we use the value already in
> > > remap_table to
> > >  * update the phi_src with the new src from the previous clone,
> > > or in
> > >  * the case where there is no mov i.e no clone, we update the 
> > >  * remap_table using the src_before_loop/src_after_loop hash
> > > tables.
> > >  */
> > > 
> > > >
> > > > > +   }
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +insert_phi_and_set_block_on_uses(nir_builder *b,
> > > nir_phi_instr
> > > > > *phi_instr)
> > > > > +{
> > > > > +   nir_instr_insert(b->cursor, &phi_instr->instr);
> > > > > +
> > > > > +   /* Now that we have inserted the phi fix up the block for
> > > its
> > > > > uses. */
> > > > > +   nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) {
> > > > > +      nir_phi_instr *use_phi = nir_instr_as_phi(use_src-
> > > > > >parent_instr);
> > > > > +
> > > > > +      foreach_list_typed(nir_phi_src, src, node, &use_phi-
> > > >srcs) {
> > > > > +         if (!src->pred)
> > > > > +            src->pred = phi_instr->dest.ssa.parent_instr-
> > > >block;
> > > > > +      }
> > > > > +   }
> > > > > +}
> > > > > +
> > > > > +static nir_phi_instr *
> > > > > +create_complex_unroll_phi(nir_shader *ns, nir_phi_instr
> > > > > *prev_phi_instr)
> > > > > +{
> > > > > +   nir_phi_instr *new_phi = nir_phi_instr_create(ns);
> > > > > +   nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
> > > > > +                     prev_phi_instr->dest.ssa.bit_size,
> > > NULL);
> > > > > +
> > > > > +   /* Add the new phi as a src to the phi from the previous
> > > > > iteration */
> > > > > +   nir_phi_src *new_src = ralloc(prev_phi_instr,
> > > nir_phi_src);
> > > > > +   new_src->src = nir_src_for_ssa(&new_phi->dest.ssa);
> > > > > +   new_src->src.parent_instr = &prev_phi_instr->instr;
> > > > > +   exec_list_push_tail(&prev_phi_instr->srcs, &new_src-
> > > >node);
> > > > > +   list_addtail(&new_src->src.use_link, &new_src->src.ssa-
> > > >uses);
> > > > > +
> > > > > +   return new_phi;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +add_complex_unroll_phi_src(nir_ssa_def *phi_src,
> > > nir_phi_instr
> > > > > *phi_instr,
> > > > > +                           struct hash_table *remap_table,
> > > > > nir_block *blk)
> > > > > +{
> > > > > +   struct hash_entry *hte =
> > > > > +      _mesa_hash_table_search(remap_table, phi_src);
> > > > > +
> > > > > +   nir_phi_src *new_src = ralloc(phi_instr, nir_phi_src);
> > > > > +   nir_ssa_def *ssa_def = hte ? (nir_ssa_def *) hte->data :
> > > > > phi_src;
> > > > > +   new_src->pred = blk;
> > > > > +   new_src->src = nir_src_for_ssa(ssa_def);
> > > > > +   new_src->src.parent_instr = &phi_instr->instr;
> > > > > +   list_addtail(&new_src->src.use_link, &new_src->src.ssa-
> > > >uses);
> > > > > +
> > > > > +   exec_list_push_tail(&phi_instr->srcs, &new_src->node);
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +simple_loop_fix_lcssa_phis(nir_cf_node *loop, struct
> > > hash_table
> > > > > *remap_table)
> > > > > +{
> > > > > +   nir_block *prev_block =
> > > > > nir_cf_node_as_block(nir_cf_node_prev(loop));
> > > > > +   nir_cf_node *cf_node = nir_cf_node_next(loop);
> > > > > +   assert(cf_node->type == nir_cf_node_block);
> > > > > +
> > > > > +   nir_block *block = nir_cf_node_as_block(cf_node);
> > > > > +   nir_foreach_instr_safe(instr, block) {
> > > > > +      if (instr->type == nir_instr_type_phi) {
> > > > > +         nir_phi_instr *phi = nir_instr_as_phi(instr);
> > > > > +
> > > > > +         nir_foreach_phi_src_safe(src, phi) {
> > > > > +            /* Update predecessor */
> > > > > +            src->pred = prev_block;
> > > > > +
> > > > > +            /* Update src */
> > > > > +            struct hash_entry *hte =
> > > > > +               _mesa_hash_table_search(remap_table, src-
> > > >src.ssa);
> > > > > +            assert(hte || !phi->is_lcssa_phi);
> > > > > +            if (hte) {
> > > > > +               nir_src new_src =
> > > nir_src_for_ssa((nir_ssa_def *)
> > > > > hte->data);
> > > > > +               if (phi->is_lcssa_phi ||
> > > exec_list_length(&phi-
> > > > > >srcs) == 1) {
> > > > > +                  nir_ssa_def_rewrite_uses(&phi->dest.ssa,
> > > > > new_src);
> > > > > +               } else {
> > > > > +                  nir_instr_rewrite_src(instr, &src->src,
> > > > > new_src);
> > > > > +               }
> > > > > +            } else {
> > > > > +               /* If a non lcssa phi now only has 1 src
> > > rewrite
> > > > > its uses here.
> > > > > +                * This avoids the src getting rewritten to
> > > an
> > > > > undefined def,
> > > > > +                * which appears to be done in
> > > nir_cf_node_remove()
> > > > > when
> > > > > +                * removing the loop.
> > > > > +                */
> > > > > +               if (exec_list_length(&phi->srcs) == 1) {
> > > > > +                  struct exec_node *head =
> > > > > exec_list_get_head(&phi->srcs);
> > > > > +                  nir_phi_src *phi_src =
> > > > > exec_node_data(nir_phi_src, head, node);
> > > > > +                  nir_ssa_def_rewrite_uses(&phi->dest.ssa,
> > > > > phi_src->src);
> > > > > +               }
> > > > > +            }
> > > > > +         }
> > > > > +         if (phi->is_lcssa_phi || exec_list_length(&phi-
> > > >srcs) ==
> > > > > 1)
> > > > > +            nir_instr_remove(&phi->instr);
> > > >
> > > > I'm not convinced that this logic is at all what we want.  It
> > > seems
> > > > like what we want is more something along these lines:
> > > >
> > > > nir_foreach_phi_src(src, phi) {
> > > >    if (src->pred != block_with_break_in_terminator)
> > > >       continue;
> > > 
> > > I was under the impression NIR never delays the use of phis so no
> > > phi
> > > following a loop should ever not meet this condition right?
> > > 
> > 
> > Right.  For simple loops they should all be lcssa phis with exactly
> > one source.
> >  
> > > >
> > > >    struct hash_entry *hte =
> > > _mesa_hash_table_search(remap_table, src-
> > > > >src.ssa);
> > > >    if (hte) {
> > > >       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
> > > > nir_src_for_ssa((nir_ssa_def *)hte->data));
> > > >    } else {
> > > >       nir_ssa_def_rewrite_uses(&phi->dest.ssa, src->src);
> > > >    }
> > > > }
> > > > assert(list_is_empty(&phi->dest.ssa.uses));
> > > >
> > > > What we really want for the phis after the loop is to take the
> > > value
> > > > that it would have taken coming from the break.
> > > 
> > > Right. Which is what the code does but for simple loops there
> > > should
> > > only ever be a single exit point so your simpler code *should*
> > > work.
> > > I'll give it a try thanks.
> > > 
> > > >  
> > > > > +      } else {
> > > > > +         /* There should be no more LCSSA-phis */
> > > > > +         break;
> > > > > +      }
> > > > > +   }
> > > > > +}
> > > > > +
> > > > > +static bool
> > > > > +ends_in_break(nir_block *block)
> > > > > +{
> > > > > +   if (exec_list_is_empty(&block->instr_list))
> > > > > +      return false;
> > > > > +
> > > > > +   nir_instr *instr = nir_block_last_instr(block);
> > > > > +   return instr->type == nir_instr_type_jump &&
> > > > > +      nir_instr_as_jump(instr)->type == nir_jump_break;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * Unroll a loop which does not contain any jumps.  For
> > > example,
> > > > > if the input
> > > > > + * is:
> > > > > + *
> > > > > + *     (loop (...) ...instrs...)
> > > > > + *
> > > > > + * And the iteration count is 3, the output will be:
> > > > > + *
> > > > > + *     ...instrs... ...instrs... ...instrs...
> > > > > + */
> > > > > +static void
> > > > > +simple_unroll(nir_function *fn, nir_loop *loop, nir_builder
> > > *b)
> > > > > +{
> > > > > +   nir_shader *ns = fn->shader;
> > > > > +
> > > > > +   /* Get the loop header this contains a bunch of phis and
> > > the
> > > > > loops
> > > > > +    * conditional.
> > > > > +    */
> > > > > +   nir_cf_node *lp_header_cf_node =
> > > nir_loop_first_cf_node(loop);
> > > > > +   nir_block *loop_header_blk =
> > > > > nir_cf_node_as_block(lp_header_cf_node);
> > > > > +
> > > > > +   struct hash_table *remap_table;
> > > > > +   struct hash_table *phi_remap;
> > > > > +   struct hash_table *src_before_loop;
> > > > > +   struct hash_table *src_after_loop;
> > > > > +   create_remap_tables(loop, loop_header_blk, &remap_table,
> > > > > &phi_remap,
> > > > > +                       &src_before_loop, &src_after_loop);
> > > > > +
> > > > > +   /* Skip over loop terminator and get the loop body. */
> > > > > +   nir_cf_node *if_node = &loop->info->limiting_terminator-
> > > >nif-
> > > > > >cf_node;
> > > > > +   list_for_each_entry(nir_loop_terminator, terminator,
> > > > > +                       &loop->info->loop_terminator_list,
> > > > > loop_terminator_link) {
> > > > > +       nir_cf_node *loop_node = &terminator->nif->cf_node;
> > > > > +
> > > > > +      /* Remove all but the limiting terminator as we know
> > > the
> > > > > other exit
> > > > > +       * conditions can never be met.
> > > > > +       */
> > > > > +      if (loop_node != &loop->info->limiting_terminator-
> > > >nif-
> > > > > >cf_node) {
> > > > > +         nir_cf_node_remove(loop_node);
> > > >
> > > > Can we really do this?  What happens if the terminator is
> > > >
> > > > if (...) {
> > > >    /*stuff */
> > > >    break;
> > > > } else {
> > > >    /* other stuff */
> > > > }
> > > >
> > > > Answer (from IRC): That can never happen since loop analysis
> > > will
> > > > bail in that case.
> > > >
> > > > Also, is it actually necessary to delete the other
> > > terminators?  Can
> > > > we just delete the breaks from them and trust dead_cf to clean
> > > them
> > > > up properly?
> > > 
> > > I *think* we can do this (and I did a some stage) but to what
> > > ends? The
> > > code to do so would be more complex and it means we are cloning a
> > > whole
> > > bunch of extra if staments.
> > > 
> > 
> > Just thinking through things. :-)  If we want to deal with non-
> > simple if's, that might be a good way to go.
> >  
> > > >  
> > > > > +      }
> > > > > +   }
> > > > > +
> > > > > +   nir_cf_node *cf_node = nir_cf_node_next(if_node);
> > > > > +
> > > > > +   /* Pluck out the loop header */
> > > > > +   nir_cf_list lp_header;
> > > > > +   nir_cf_extract(&lp_header,
> > > > > nir_before_cf_node(lp_header_cf_node),
> > > > > +                  nir_before_cf_node(if_node));
> > > > > +
> > > > > +   /* Pluck out the loop body */
> > > > > +   nir_cf_list loop_body;
> > > > > +   extract_loop_body(&loop_body, cf_node);
> > > >
> > > > It might be clearer if this were
> > > >
> > > > nir_cf_extract(&loop_body, nir_after_cf_node(if_node),
> > > > nir_after_cf_node(nir_loop_last_cf_node(loop)));
> > > >
> > > > I'm not sure the extract_loop_body helper is actually helping
> > > you.
> > > 
> > > Sure.
> > > 
> > > >  
> > > > > +
> > > > > +   /* Clone the loop header */
> > > > > +   nir_cf_list cloned_header;
> > > > > +   exec_list_make_empty(&cloned_header.list);
> > > > > +   cloned_header.impl = loop_body.impl; 
> > > > > +
> > > > > +   clone_list(ns, loop, &lp_header, &cloned_header,
> > > remap_table);
> > > >
> > > > It would be cool if these 4 lines were just
> > > >
> > > > nir_cf_list_clone(&lp_header, &cloned_header, remap_table);
> > > 
> > > Hmm. I'm not sure thats better. It would still be at least 2
> > > lines:
> > > 
> > > nir_cf_list cloned_header;   
> > > nir_cf_list_clone(&lp_header, &cloned_header, remap_table);
> > 
> > Sure, but it would also simplify the clone implementation a bit and
> > a cf_list is a well-defined thing in NIR where as an arbitrary
> > exec_list with things that we've been promissed are cf_nodes isn't
> > so much.
> >  
> > > But now we are calling:
> > > 
> > > exec_list_make_empty(&cloned_header.list);
> > > cloned_header.impl = loop_body.impl;
> > > 
> > > Inside the loop below when we don't need to. Although I guess
> > > it's
> > > normally not going to be a huge number of iterations so I guess
> > > that
> > > its worth it for cleaner code. Ok I'm convinced I'll change it.
> > > 
> > > >  
> > > > > +
> > > > > +   /* Insert cloned loop header before the loop */
> > > > > +   b->cursor = nir_before_cf_node(&loop->cf_node);
> > > > > +   nir_cf_reinsert(&cloned_header, b->cursor);
> > > > > +
> > > > > +   /* Create temp block to store the cloned loop body as we
> > > unroll
> > > > > */
> > > > > +   nir_cf_list unrolled_lp_body;
> > > > > +   exec_list_make_empty(&unrolled_lp_body.list);
> > > > > +   unrolled_lp_body.impl = loop_body.impl;
> > > > > +
> > > > > +   /* Clone loop header and append to the loop body */
> > > > > +   for (unsigned i = 0; i < loop->info->trip_count; i++) {
> > > > > +      /* Clone loop body */
> > > > > +      clone_list(ns, loop, &loop_body, &unrolled_lp_body,
> > > > > remap_table);
> > > > > +
> > > > > +      update_remap_tables(i == 0, remap_table, phi_remap,
> > > > > src_before_loop,
> > > > > +                          src_after_loop);
> > > > > +
> > > > > +      /* Insert unrolled loop body before the loop */
> > > > > +      b->cursor = nir_before_cf_node(&loop->cf_node);
> > > > > +      nir_cf_reinsert(&unrolled_lp_body, b->cursor);
> > > > > +
> > > > > +      /* Clone loop header */
> > > > > +      clone_list(ns, loop, &lp_header, &cloned_header,
> > > > > remap_table);
> > > > > +
> > > > > +      /* Insert loop header after loop body */
> > > > > +      b->cursor = nir_before_cf_node(&loop->cf_node);
> > > > > +      nir_cf_reinsert(&cloned_header, b->cursor);
> > > > > +   }
> > > > > +
> > > > > +   /* The loop has been unrolled so remove it. */
> > > > > +   simple_loop_fix_lcssa_phis(&loop->cf_node, remap_table);
> > > > > +
> > > > > +   /* Remove the loop */
> > > > > +   nir_cf_node_remove(&loop->cf_node);
> > > > > +
> > > > > +   /* Delete the original loop body & header */
> > > > > +   nir_cf_delete(&lp_header);
> > > > > +   nir_cf_delete(&loop_body);
> > > > > +
> > > > > +   _mesa_hash_table_destroy(remap_table, NULL);
> > > > > +   _mesa_hash_table_destroy(phi_remap, NULL);
> > > > > +   _mesa_hash_table_destroy(src_before_loop, NULL);
> > > > > +   _mesa_hash_table_destroy(src_after_loop, NULL);
> > > >
> > > > I think I would be mildly happier if you used a ralloc context
> > > rather
> > > > than destroying them all manually.
> > > 
> > > Yeah that would have saved a few crashes when writing the complex
> > > unroll code. Will do.
> > > 
> > > >  
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * Unroll a loop with two exists when the trip count of one
> > > of the
> > > > > exits is
> > > > > + * unknown.  If continue_from_then_branch is true, the loop
> > > is
> > > > > repeated only
> > > > > + * when the "then" branch of the if is taken; otherwise it
> > > is
> > > > > repeated only
> > > > > + * when the "else" branch of the if is taken.
> > > > > + *
> > > > > + * For example, if the input is:
> > > > > + *
> > > > > + *     (loop (...)
> > > > > + *      ...body...
> > > > > + *      (if (cond)
> > > > > + *          (...then_instrs...)
> > > > > + *        (...else_instrs...)))
> > > > > + *
> > > > > + * And the iteration count is 3, and \c
> > > continue_from_then_branch
> > > > > is true,
> > > > > + * then the output will be:
> > > > > + *
> > > > > + *     ...body...
> > > > > + *     (if (cond)
> > > > > + *         (...then_instrs...
> > > > > + *          ...body...
> > > > > + *          (if (cond)
> > > > > + *              (...then_instrs...
> > > > > + *               ...body...
> > > > > + *               (if (cond)
> > > > > + *                   (...then_instrs...)
> > > > > + *                 (...else_instrs...)))
> > > > > + *            (...else_instrs...)))
> > > > > + *       (...else_instrs))
> > > > > + */
> > > > > +static void
> > > > > +complex_unroll(nir_function *fn, nir_loop *loop, nir_builder
> > > *b,
> > > > > +               nir_cf_node *if_node, nir_cf_node *last_node,
> > > > > +               bool continue_from_then_branch, bool
> > > > > limiting_term_second)
> > > > > +{
> > > > > +   nir_cf_node *limiting_trm = &loop->info-
> > > >limiting_terminator-
> > > > > >nif->cf_node;
> > > > > +   nir_cf_node *lp_header_cf_node =
> > > nir_loop_first_cf_node(loop);
> > > > > +   nir_block *loop_header_blk =
> > > > > nir_cf_node_as_block(lp_header_cf_node);
> > > > > +
> > > > > +   struct hash_table *remap_table;
> > > > > +   struct hash_table *phi_remap;
> > > > > +   struct hash_table *src_before_loop;
> > > > > +   struct hash_table *src_after_loop;
> > > > > +   create_remap_tables(loop, loop_header_blk, &remap_table,
> > > > > &phi_remap,
> > > > > +                       &src_before_loop, &src_after_loop);
> > > > > +
> > > > > +   struct hash_table *loop_phis;
> > > > > +   struct hash_table *loop_term_phis;
> > > > > +   get_table_of_lcssa_and_loop_term_phis(&loop->cf_node,
> > > > > &loop_phis,
> > > > > +                                         &loop_term_phis,
> > > > > +                                         loop->info-
> > > > > >limiting_terminator->nif);
> > > > > +
> > > > > +   if (limiting_term_second) {
> > > > > +      /* We need some special handling when its the second
> > > > > terminator causing
> > > > > +       * us to exit the loop for example:
> > > > > +       *
> > > > > +       *   for (int i = 0; i < uniform_lp_count; i++) {
> > > > > +       *      colour = vec4(0.0, 1.0, 0.0, 1.0);
> > > > > +       *
> > > > > +       *      if (i == 1)
> > > > > +       *         break;
> > > > > +       *      }
> > > > > +       *      ... any further code is unreachable after i ==
> > > 1 ...
> > > > > +       *   }
> > > > > +       *
> > > > > +       * Bump the trip count by one so we actually clone
> > > > > something. Also
> > > > > +       * extract everything after the limiting terminator
> > > and
> > > > > insert it into
> > > > > +       * the branch we will continue from.
> > > > > +       */
> > > > > +      loop->info->trip_count++;
> > > > > +
> > > > > +      nir_cf_list after_lt;
> > > > > +      extract_loop_body(&after_lt,
> > > > > nir_cf_node_next(limiting_trm));
> > > > > +
> > > > > +      nir_if *if_stmt = loop->info->limiting_terminator-
> > > >nif;
> > > > > +      nir_cf_node *last_then =
> > > nir_if_last_then_node(if_stmt);
> > > > > +      if (last_then->type == nir_cf_node_block &&
> > > > > +          ends_in_break(nir_cf_node_as_block(last_then))) {
> > > > > +         move_cf_list_into_if(&after_lt, limiting_trm,
> > > last_then,
> > > > > false);
> > > > > +      } else {
> > > > > +         nir_cf_node *last_else =
> > > nir_if_last_else_node(if_stmt);
> > > > > +         if (last_else->type == nir_cf_node_block &&
> > > > > +             ends_in_break(nir_cf_node_as_block(last_else)))
> > > {
> > > > > +            move_cf_list_into_if(&after_lt, limiting_trm,
> > > > > last_else, true);
> > > > > +         }
> > > > > +      }
> > > > > +   } else {
> > > > > +      /* Remove the limiting terminator.  Loop analysis will
> > > only
> > > > > find a
> > > > > +       * terminator for trival if statments (then only
> > > contains
> > > > > break, else
> > > > > +       * is empty) so its safe to remove the whole thing.
> > > > > +       */
> > > > > +      nir_cf_node_remove(limiting_trm);
> > > > > +   }
> > > > > +
> > > > > +   nir_shader *ns = fn->shader;
> > > > > +   struct hash_table *lcssa_phis =
> > > > > +      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
> > > > > +                              _mesa_key_pointer_equal);
> > > > > +
> > > > > +   /* Create phis to be used post-if (replacements for the
> > > post-
> > > > > loop phis) */
> > > > > +   struct hash_entry *phi_hte;
> > > > > +   hash_table_foreach(loop_phis, phi_hte) {
> > > > > +      nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte-
> > > >key;
> > > > > +      nir_phi_instr *new_phi = create_complex_unroll_phi(ns,
> > > > > phi_instr);
> > > > > +
> > > > > +      nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte->data;
> > > > > +      _mesa_hash_table_insert(lcssa_phis, new_phi, ssa_def);
> > > > > +
> > > > > +      /* Update loop_phis to point to the replacement phi */
> > > > > +      phi_hte->data = &new_phi->dest.ssa;
> > > > > +
> > > > > +      struct hash_entry *loop_term_hte =
> > > > > +         _mesa_hash_table_search(loop_term_phis, phi_hte-
> > > >key);
> > > > > +      if (loop_term_hte) {
> > > > > +         _mesa_hash_table_insert(loop_term_phis, new_phi,
> > > > > loop_term_hte->data);
> > > > > +         _mesa_hash_table_remove(loop_term_phis,
> > > loop_term_hte);
> > > > > +      }
> > > > > +   }
> > > > > +
> > > > > +   /* Move everything after the terminator we don't have a
> > > trip
> > > > > count for
> > > > > +    * inside the if.
> > > > > +    */
> > > > > +   nir_cf_list loop_end;
> > > > > +   extract_loop_body(&loop_end, nir_cf_node_next(if_node));
> > > > > +   nir_if *if_stmt = nir_cf_node_as_if(if_node);
> > > > > +   move_cf_list_into_if(&loop_end, if_node, last_node,
> > > > > +                        continue_from_then_branch);
> > > > > +
> > > > > +   /* Pluck out the loop body. Unlike the simple unroll pass
> > > there
> > > > > are no
> > > > > +    * breaks remaining in the loop so we do not have the
> > > concept
> > > > > of a loop
> > > > > +    * header and a loop body, instead we just extract
> > > everything.
> > > > > +    */
> > > > > +   nir_cf_list loop_body;
> > > > > +   extract_loop_body(&loop_body, lp_header_cf_node);
> > > > > +
> > > > > +   /* Create temp block to store the cloned loop body as we
> > > unroll
> > > > > */
> > > > > +   nir_cf_list unrolled_lp_body;
> > > > > +   exec_list_make_empty(&unrolled_lp_body.list);
> > > > > +   unrolled_lp_body.impl = loop_body.impl;
> > > > > +
> > > > > +   /* Set the cursor to before the loop */
> > > > > +   b->cursor = nir_before_cf_node(&loop->cf_node);
> > > > > +
> > > > > +   nir_cf_node *continue_from_node = NULL;
> > > > > +   for (unsigned i = 0; i < loop->info->trip_count; i++) {
> > > > > +      /* Clone loop body */
> > > > > +      clone_list(ns, loop, &loop_body, &unrolled_lp_body,
> > > > > remap_table);
> > > > > +
> > > > > +      nir_cf_node *last_node =
> > > > > +         exec_node_data(nir_cf_node,
> > > > > +                       
> > > > > exec_list_get_tail(&unrolled_lp_body.list), node);
> > > > > +      assert(last_node->type == nir_cf_node_block &&
> > > > > +           
> > >  exec_list_is_empty(&nir_cf_node_as_block(last_node)-
> > > > > >instr_list));
> > > > > +
> > > > > +      /* Insert unrolled loop body */
> > > > > +      nir_cf_reinsert(&unrolled_lp_body, b->cursor);
> > > > > +
> > > > > +      nir_cf_node *if_node = nir_cf_node_prev(last_node);
> > > > > +      assert(if_node->type == nir_cf_node_if);
> > > > > +      if_stmt = nir_cf_node_as_if(if_node);
> > > > > +
> > > > > +      nir_cf_node *exit_from_node;
> > > > > +      if (continue_from_then_branch) {
> > > > > +         continue_from_node =
> > > nir_if_last_then_node(if_stmt);
> > > > > +         exit_from_node = nir_if_last_else_node(if_stmt);
> > > > > +      } else {
> > > > > +         exit_from_node = nir_if_last_then_node(if_stmt);
> > > > > +         continue_from_node =
> > > nir_if_last_else_node(if_stmt);
> > > > > +      }
> > > > > +
> > > > > +      b->cursor = nir_after_cf_node(if_node);
> > > > > +      if (i < loop->info->trip_count - 1) {
> > > > > +         struct hash_table *tmp =
> > > > > +            _mesa_hash_table_create(NULL,
> > > _mesa_hash_pointer,
> > > > > +                                   
> > > _mesa_key_pointer_equal);
> > > > > +
> > > > > +         struct hash_entry *phi_hte;
> > > > > +         hash_table_foreach(lcssa_phis, phi_hte) {
> > > > > +            /* Insert phi created in previous iteration */
> > > > > +            nir_phi_instr *phi_instr = (nir_phi_instr *)
> > > phi_hte-
> > > > > >key;
> > > > > +            insert_phi_and_set_block_on_uses(b, phi_instr);
> > > > > +
> > > > > +            nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte-
> > > >data;
> > > > > +            add_complex_unroll_phi_src(ssa_def, phi_instr,
> > > > > remap_table,
> > > > > +                                     
> > > > >  nir_cf_node_as_block(exit_from_node));
> > > > > +
> > > > > +            /* Create phi to be fixed up by next iteration
> > > */
> > > > > +            nir_phi_instr *new_phi =
> > > create_complex_unroll_phi(ns,
> > > > > phi_instr);
> > > > > +            _mesa_hash_table_insert(tmp, new_phi, ssa_def);
> > > > > +
> > > > > +            struct hash_entry *loop_term_hte =
> > > > > +               _mesa_hash_table_search(loop_term_phis,
> > > phi_hte-
> > > > > >key);
> > > > > +            if (loop_term_hte) {
> > > > > +               _mesa_hash_table_insert(loop_term_phis,
> > > new_phi,
> > > > > +                                       loop_term_hte->data);
> > > > > +               _mesa_hash_table_remove(loop_term_phis,
> > > > > loop_term_hte);
> > > > > +            }
> > > > > +         }
> > > > > +
> > > > > +         /* Now that the phis have been processed replace
> > > the
> > > > > table with the
> > > > > +          * phis to be fixed up in the next iteration.
> > > > > +          */
> > > > > +         _mesa_hash_table_destroy(lcssa_phis, NULL);
> > > > > +         lcssa_phis = tmp;
> > > > > +      } else {
> > > > > +         struct hash_entry *phi_hte;
> > > > > +         hash_table_foreach(lcssa_phis, phi_hte) {
> > > > > +            /* Insert phi created in previous iteration */
> > > > > +            nir_phi_instr *phi_instr = (nir_phi_instr *)
> > > phi_hte-
> > > > > >key;
> > > > > +            insert_phi_and_set_block_on_uses(b, phi_instr);
> > > > > +
> > > > > +            nir_ssa_def *ssa_def = (nir_ssa_def *) phi_hte-
> > > >data;
> > > > > +            add_complex_unroll_phi_src(ssa_def, phi_instr,
> > > > > remap_table,
> > > > > +                                     
> > > > >  nir_cf_node_as_block(exit_from_node));
> > > > > +         }
> > > > > +      }
> > > > > +
> > > > > +      /* Ready the remap tables for the next iteration */
> > > > > +      update_remap_tables(i == 0, remap_table, phi_remap,
> > > > > src_before_loop,
> > > > > +                          src_after_loop);
> > > > > +
> > > > > +      /* Set the cursor to the last if in the loop body we
> > > just
> > > > > unrolled ready
> > > > > +       * for the next iteration.
> > > > > +       */
> > > > > +      b->cursor = nir_after_cf_node(continue_from_node);
> > > > > +   }
> > > > > +
> > > > > +   /* Now that the remap table is updated add the second src
> > > to
> > > > > the innermost
> > > > > +    * phis.
> > > > > +    */
> > > > > +   hash_table_foreach(lcssa_phis, phi_hte) {
> > > > > +      nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte-
> > > >key;
> > > > > +      nir_ssa_def *phi_src = (nir_ssa_def *) phi_hte->data;
> > > > > +
> > > > > +      assert(exec_list_length(&phi_instr->srcs) == 1);
> > > > > +
> > > > > +      /* Get the src for when exiting by the loop terminator
> > > */
> > > > > +      struct hash_entry *loop_term_hte =
> > > > > +         _mesa_hash_table_search(loop_term_phis, phi_instr);
> > > > > +      if (loop_term_hte)
> > > > > +         phi_src = (nir_ssa_def *) loop_term_hte->data;
> > > > > +
> > > > > +      add_complex_unroll_phi_src(phi_src, phi_instr,
> > > remap_table,
> > > > > +                               
> > > > >  nir_cf_node_as_block(continue_from_node));
> > > > > +   }
> > > > > +
> > > > > +   /* Rewrite the uses of the old loop phis */
> > > > > +   hash_table_foreach(loop_phis, phi_hte) {
> > > > > +      nir_phi_instr *phi_instr = (nir_phi_instr *) phi_hte-
> > > >key;
> > > > > +
> > > > > +      nir_foreach_use_safe(use_src, &phi_instr->dest.ssa) {
> > > > > +         nir_src new_src = nir_src_for_ssa((nir_ssa_def *)
> > > > > phi_hte->data);
> > > > > +         nir_instr_rewrite_src(use_src->parent_instr,
> > > use_src,
> > > > > new_src);
> > > > > +      }
> > > > > +
> > > > > +      nir_foreach_if_use_safe(use_src, &phi_instr->dest.ssa) 
> > > {
> > > > > +         nir_src new_src = nir_src_for_ssa((nir_ssa_def *)
> > > > > phi_hte->data);
> > > > > +         nir_if_rewrite_condition(use_src->parent_if,
> > > new_src);
> > > > > +      }
> > > > > +   }
> > > > > +
> > > > > +   /* The loop has been unrolled so remove it. */
> > > > > +   nir_cf_node_remove(&loop->cf_node);
> > > > > +
> > > > > +   /* Delete the original loop body */
> > > > > +   nir_cf_delete(&loop_body);
> > > > > +
> > > > > +   _mesa_hash_table_destroy(loop_phis, NULL);
> > > > > +   _mesa_hash_table_destroy(loop_term_phis, NULL);
> > > > > +   _mesa_hash_table_destroy(lcssa_phis, NULL);
> > > > > +   _mesa_hash_table_destroy(remap_table, NULL);
> > > > > +   _mesa_hash_table_destroy(phi_remap, NULL);
> > > > > +   _mesa_hash_table_destroy(src_before_loop, NULL);
> > > > > +   _mesa_hash_table_destroy(src_after_loop, NULL);
> > > >
> > > > ralloc context?
> > > >  
> > > > > +}
> > > > > +
> > > > > +static bool
> > > > > +process_loops(nir_cf_node *cf_node, nir_builder *b, bool
> > > > > *innermost_loop)
> > > > > +{
> > > > > +   bool progress = false;
> > > > > +   nir_loop *loop;
> > > > > +
> > > > > +   switch (cf_node->type) {
> > > > > +   case nir_cf_node_block:
> > > > > +      return progress;
> > > > > +   case nir_cf_node_if: {
> > > > > +      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
> > > > > +      foreach_list_typed_safe(nir_cf_node, nested_node,
> > > node,
> > > > > &if_stmt->then_list)
> > > > > +         progress |= process_loops(nested_node, b,
> > > > > innermost_loop);
> > > > > +      foreach_list_typed_safe(nir_cf_node, nested_node,
> > > node,
> > > > > &if_stmt->else_list)
> > > > > +         progress |= process_loops(nested_node, b,
> > > > > innermost_loop);
> > > > > +      return progress;
> > > > > +   }
> > > > > +   case nir_cf_node_loop: {
> > > > > +      loop = nir_cf_node_as_loop(cf_node);
> > > > > +      foreach_list_typed_safe(nir_cf_node, nested_node,
> > > node,
> > > > > &loop->body)
> > > > > +         progress |= process_loops(nested_node, b,
> > > > > innermost_loop);
> > > > > +      break;
> > > > > +   }
> > > > > +   default:
> > > > > +      unreachable("unknown cf node type");
> > > > > +   }
> > > > > +
> > > > > +   if (*innermost_loop) {
> > > > > +      nir_function *fn = nir_cf_node_get_function(&loop-
> > > >cf_node)-
> > > > > >function;
> > > > > +
> > > > > +      /* Don't attempt to unroll outer loops or a second
> > > inner
> > > > > loop in
> > > > > +       * this pass wait until the next pass as we have
> > > altered the
> > > > > cf.
> > > > > +       */
> > > > > +      *innermost_loop = false;
> > > > > +
> > > > > +      if (loop->info->limiting_terminator == NULL) {
> > > > > +         return progress;
> > > > > +      }
> > > > > +
> > > > > +      if (is_simple_loop(fn->shader, loop->info)) {
> > > > > +         simple_unroll(fn, loop, b);
> > > > > +         progress = true;
> > > > > +      } else {
> > > > > +         /* Attempt to unroll loops with two terminators. */
> > > > > +         if (is_complex_loop(fn->shader, loop->info)) {
> > > > > +            bool first_terminator = true;
> > > > > +            list_for_each_entry(nir_loop_terminator,
> > > terminator,
> > > > > +                                &loop->info-
> > > >loop_terminator_list,
> > > > > +                                loop_terminator_link) {
> > > > > +
> > > > > +               nir_cf_node *if_node = &terminator->nif-
> > > >cf_node;
> > > > > +
> > > > > +               if (if_node == &loop->info-
> > > >limiting_terminator-
> > > > > >nif->cf_node) {
> > > > > +                  first_terminator = false;
> > > > > +                  continue;
> > > > > +               }
> > > > > +
> > > > > +               /* If the first terminator has a trip count
> > > of zero
> > > > > just do a
> > > > > +                * simple unroll as the second terminator can
> > > never
> > > > > be reached.
> > > > > +                */
> > > > > +               if (loop->info->trip_count == 0 &&
> > > > > first_terminator) {
> > > > > +                  simple_unroll(fn, loop, b);
> > > > > +                  progress = true;
> > > > > +                  break;
> > > > > +               }
> > > > > +
> > > > > +               nir_if *if_stmt = nir_cf_node_as_if(if_node);
> > > > > +
> > > > > +               /* Determine which if-statement branch, if
> > > any,
> > > > > ends with a
> > > > > +                * break. Note that since
> > > predicted_num_loop_jumps
> > > > > == 1, it is
> > > > > +                * impossible for both branches to end with a
> > > > > break.
> > > > > +                */
> > > > > +               nir_cf_node *last_then =
> > > > > nir_if_last_then_node(if_stmt);
> > > > > +               if (last_then->type == nir_cf_node_block &&
> > > > > +                 
> > >  ends_in_break(nir_cf_node_as_block(last_then)))
> > > > > {
> > > > > +
> > > > > +                  complex_unroll(fn, loop, b, if_node,
> > > last_then,
> > > > > false,
> > > > > +                                 !first_terminator);
> > > > > +
> > > > > +                  progress = true;
> > > > > +                  break;
> > > > > +               } else {
> > > > > +                  nir_cf_node *last_else =
> > > > > nir_if_last_else_node(if_stmt);
> > > > > +                  if (last_else->type == nir_cf_node_block
> > > &&
> > > > > +                     
> > > > > ends_in_break(nir_cf_node_as_block(last_else))) {
> > > > > +
> > > > > +                     complex_unroll(fn, loop, b, if_node,
> > > > > last_else, true,
> > > > > +                                    !first_terminator);
> > > > > +
> > > > > +                     progress = true;
> > > > > +                     break;
> > > > > +                  }
> > > > > +               }
> > > > > +            }
> > > > > +         }
> > > > > +      }
> > > > > +   }
> > > > > +
> > > > > +   return progress;
> > > > > +}
> > > > > +
> > > > > +static bool
> > > > > +nir_opt_loop_unroll_impl(nir_function_impl *impl,
> > > > > +                         nir_variable_mode indirect_mask)
> > > > > +{
> > > > > +   bool progress = false;
> > > > > +   nir_metadata_require(impl, nir_metadata_loop_analysis,
> > > > > indirect_mask);
> > > > > +
> > > > > +   nir_builder b;
> > > > > +   nir_builder_init(&b, impl);
> > > > > +
> > > > > +   foreach_list_typed_safe(nir_cf_node, node, node, &impl-
> > > >body) {
> > > > > +      bool innermost_loop = true;
> > > > > +      progress |= process_loops(node, &b, &innermost_loop);
> > > > > +   }
> > > > > +
> > > > > +   return progress;
> > > > > +}
> > > > > +
> > > > > +bool
> > > > > +nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode
> > > > > indirect_mask)
> > > > > +{
> > > > > +   bool progress = false;
> > > > > +
> > > > > +   nir_foreach_function(function, shader) {
> > > > > +      if (function->impl) {
> > > > > +         progress |= nir_opt_loop_unroll_impl(function-
> > > >impl,
> > > > > indirect_mask);
> > > > > +      }
> > > > > +   }
> > > > > +   return false;
> > > > > +}
> > > > > --
> > > > > 2.7.4
> > > > >
> > > > > _______________________________________________
> > > > > mesa-dev mailing list
> > > > > mesa-dev at lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > > > >
> > > >
> > > > _______________________________________________
> > > > mesa-dev mailing list
> > > > mesa-dev at lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > > 
> > 
> > 
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list