[Mesa-dev] [PATCH 05/11] nir: Add a LCSAA-pass

Timothy Arceri timothy.arceri at collabora.com
Wed Oct 5 23:09:55 UTC 2016


On Wed, 2016-10-05 at 09:59 -0700, Jason Ekstrand wrote:
> On Tue, Oct 4, 2016 at 6:46 PM, Timothy Arceri <timothy.arceri at collab
> ora.com> wrote:
> > On Tue, 2016-10-04 at 16:47 -0700, Jason Ekstrand wrote:
> > > On Fri, Sep 16, 2016 at 6:24 AM, Timothy Arceri <timothy.arceri at c
> > olla
> > > bora.com> wrote:
> > > > From: Thomas Helland <thomashelland90 at gmail.com>
> > > >
> > > > V2: Do a "depth first search" to convert to LCSSA
> > > >
> > > > V3: Small comment fixup
> > > >
> > > > V4: Rebase, adapt to removal of function overloads
> > > >
> > > > V5: Rebase, adapt to relocation of nir to compiler/nir
> > > >     Still need to adapt to potential if-uses
> > > >     Work around nir_validate issue
> > > >
> > > > V6 (Timothy):
> > > >  - tidy lcssa and stop leaking memory
> > > >  - dont rewrite the src for the lcssa phi node
> > > >  - validate lcssa phi srcs to avoid postvalidate assert
> > > >  - don't add new phi if one already exists
> > > >  - more lcssa phi validation fixes
> > > >  - Rather than marking ssa defs inside a loop just mark blocks
> > > > inside
> > > >    a loop. This is simpler and fixes lcssa for intrinsics which
> > do
> > > >    not have a destination.
> > > >  - don't create LCSSA phis for loops we won't unroll
> > > >  - require loop metadata for lcssa pass
> > > >  - handle case were the ssa defs use outside the loop is
> > already a
> > > > phi
> > > >
> > > > V7: (Timothy)
> > > > - pass indirect mask to metadata call
> > > > ---
> > > >  src/compiler/Makefile.sources   |   1 +
> > > >  src/compiler/nir/nir.h          |   6 ++
> > > >  src/compiler/nir/nir_to_lcssa.c | 227
> > > > ++++++++++++++++++++++++++++++++++++++++
> > > >  src/compiler/nir/nir_validate.c |  11 +-
> > > >  4 files changed, 242 insertions(+), 3 deletions(-)
> > > >  create mode 100644 src/compiler/nir/nir_to_lcssa.c
> > > >
> > > > diff --git a/src/compiler/Makefile.sources
> > > > b/src/compiler/Makefile.sources
> > > > index 7ed26a9..8ef6080 100644
> > > > --- a/src/compiler/Makefile.sources
> > > > +++ b/src/compiler/Makefile.sources
> > > > @@ -247,6 +247,7 @@ NIR_FILES = \
> > > >         nir/nir_search_helpers.h \
> > > >         nir/nir_split_var_copies.c \
> > > >         nir/nir_sweep.c \
> > > > +       nir/nir_to_lcssa.c \
> > > >         nir/nir_to_ssa.c \
> > > >         nir/nir_validate.c \
> > > >         nir/nir_vla.h \
> > > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > > > index cc8f4b6..29a6f45 100644
> > > > --- a/src/compiler/nir/nir.h
> > > > +++ b/src/compiler/nir/nir.h
> > > > @@ -1387,6 +1387,8 @@ typedef struct {
> > > >     struct exec_list srcs; /** < list of nir_phi_src */
> > > >
> > > >     nir_dest dest;
> > > > +
> > > > +   bool is_lcssa_phi;
> > > >  } nir_phi_instr;
> > > >
> > > >  typedef struct {
> > > > @@ -2643,6 +2645,10 @@ void nir_convert_to_ssa(nir_shader
> > *shader);
> > > >  bool nir_repair_ssa_impl(nir_function_impl *impl);
> > > >  bool nir_repair_ssa(nir_shader *shader);
> > > >
> > > > +void nir_to_lcssa_impl(nir_function_impl *impl,
> > > > +                       nir_variable_mode indirect_mask);
> > > > +void nir_to_lcssa(nir_shader *shader, nir_variable_mode
> > > > indirect_mask);
> > > > +
> > > >  /* If phi_webs_only is true, only convert SSA values involved
> > in
> > > > phi nodes to
> > > >   * registers.  If false, convert all values (even those not
> > > > involved in a phi
> > > >   * node) to registers.
> > > > diff --git a/src/compiler/nir/nir_to_lcssa.c
> > > > b/src/compiler/nir/nir_to_lcssa.c
> > > > new file mode 100644
> > > > index 0000000..25d0bdb
> > > > --- /dev/null
> > > > +++ b/src/compiler/nir/nir_to_lcssa.c
> > > > @@ -0,0 +1,227 @@
> > > > +/*
> > > > + * Copyright © 2015 Thomas Helland
> > > > + *
> > > > + * Permission is hereby granted, free of charge, to any person
> > > > obtaining a
> > > > + * copy of this software and associated documentation files
> > (the
> > > > "Software"),
> > > > + * to deal in the Software without restriction, including
> > without
> > > > limitation
> > > > + * the rights to use, copy, modify, merge, publish,
> > distribute,
> > > > sublicense,
> > > > + * and/or sell copies of the Software, and to permit persons
> > to
> > > > whom the
> > > > + * Software is furnished to do so, subject to the following
> > > > conditions:
> > > > + *
> > > > + * The above copyright notice and this permission notice
> > > > (including the next
> > > > + * paragraph) shall be included in all copies or substantial
> > > > portions of the
> > > > + * Software.
> > > > + *
> > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND,
> > > > EXPRESS OR
> > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > > > MERCHANTABILITY,
> > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN
> > NO
> > > > EVENT SHALL
> > > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > > > DAMAGES OR OTHER
> > > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> > OTHERWISE,
> > > > ARISING
> > > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR
> > > > OTHER DEALINGS
> > > > + * IN THE SOFTWARE.
> > > > + */
> > > > +
> > > > +/*
> > > > + * This pass converts the ssa-graph into "Loop Closed SSA
> > form".
> > > > This is
> > > > + * done by placing phi nodes at the exits of the loop for all
> > > > values
> > > > + * that are used outside the loop. The result is it
> > transforms:
> > > > + *
> > > > + * loop {                    ->      loop {
> > > > + *    ssa2 = ....            ->          ssa2 = ...
> > > > + *    if (cond)              ->          if (cond) {
> > > > + *       break;              ->             break;
> > > > + *    ssa3 = ssa2 * ssa4     ->          }
> > > > + * }                         ->          ssa3 = ssa2 * ssa4
> > > > + * ssa6 = ssa2 + 4           ->       }
> > > > + *                                    ssa5 = lcssa_phi(ssa2)
> > > > + *                                    ssa6 = ssa5 + 4
> > > > + */
> > >
> > > Let me make sure I understand this correctly.  The point here
> > seems
> > > to be to ensure that SSA defs can never escape a loop without
> > going
> > > through a phi.  Correct?  This can happen if the ssa def, for
> > > whatever reason, dominates all of the breaks in the loop.  In
> > that
> > > case, it will dominate the block after the loop and can go
> > through
> > > without a phi.  This raises a question: Is there any real
> > difference
> > > between an LCSSA phi and a regular phi?
> > 
> > There shouldn't be any real difference besides only having a single
> > src
> > which validation doesn't like. I guess I'll try the suggestion you
> > make
> > below, but I'm concerned I'll hit more validation problems.
> > 
> 
> Honestly, I'm surprised what you have now passes validation. :/  We
> really should be validating that a phi has a set of predecessors
> exactly equal to the predecessors of the block in which it lives.
>  
> > >  
> > > > +
> > > > +#include "nir.h"
> > > > +
> > > > +typedef struct {
> > > > +   /* The nir_shader we are transforming */
> > > > +   nir_shader *shader;
> > > > +
> > > > +   /* The loop we store information for */
> > > > +   nir_loop *loop;
> > > > +
> > > > +   /* Keep track of which defs are in the loop */
> > > > +   BITSET_WORD *is_in_loop;
> > > > +
> > > > +   /* General purpose bool */
> > > > +   bool flag;
> > > > +} lcssa_state;
> > > > +
> > > > +static void
> > > > +mark_block_as_in_loop(nir_block *blk, void *state)
> > > > +{
> > > > +   lcssa_state *state_cast = (lcssa_state *) state;
> > > > +   BITSET_SET(state_cast->is_in_loop, blk->index);
> > > > +}
> > > > +
> > > > +static void
> > > > +is_block_outside_loop(nir_block *blk, void *void_state)
> > > > +{
> > > > +   lcssa_state *state = void_state;
> > > > +   if (!BITSET_TEST(state->is_in_loop, blk->index)) {
> > > > +      state->flag = true;
> > > > +   }
> > > > +}
> > >
> > > These helpers aren't really needed anymore (they probably were
> > prior
> > > to better block iteration) and I think they're just confusing.
> > 
> > Hmm. Your probably right. Thomas was originally checking inividual
> > definitions rather than just using the blocks.
> 
> What I meant is that the is_block_outside_loop helper made sense
> because it could be passed to nir_foreach_block.  Now that we have
> better block iteration, the helpers that set a flag in a struct
> passed in as a void * are just piles of wrapping that's just
> confusing.

Oh ok, that makes sense I thought you were suggesting we didn't need
the coed at all :P


>  
> > Are you thinking
> > something like:
> > 
> > if (block_after_loop->index <= src->parent_instr->block->index)
> >   ... we are outside the loop ..
> > 
> 
> Actually, that would probably work and would save us some iteration. 
> I hadn't thought of that.

Thinking about it more to handle nested loops we might need to handle
things before the loop also, but it still might work.

    if (block_after_loop->index <= src->parent_instr->block->index ||
        first_block_in_loop->index > src->parent_instr->block->index)  
     ... we are outside the loop ..


>  
> > >  
> > > > +
> > > > +static bool
> > > > +convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state)
> > > > +{
> > > > +   lcssa_state *state = void_state;
> > > > +
> > > > +   state->flag = false;
> > > > +
> > > > +   nir_block *block_after_loop =
> > > > +      nir_cf_node_as_block(nir_cf_node_next(&state->loop-
> > > > >cf_node));
> > > > +
> > > > +   nir_foreach_use_safe(src, def) {
> > > > +      if (src->parent_instr->type == nir_instr_type_phi &&
> > > > +          (block_after_loop->index == src->parent_instr-
> > >block-
> > > > >index ||
> > >
> > > You should be able to just compare the block pointers.
> > >  
> > > > +           nir_instr_as_phi(src->parent_instr)->is_lcssa_phi))
> > >
> > > I don't think you need this check.expand
> > 
> > As I say below I don't recall how this could already be an
> > lcssa_phi
> > but I recall removing it once before and it caused issues. Maybe
> > nested
> > loops although we shouldn't be touching those. I need to run some
> > tests
> > (and add comments this time around).
> > 
> > >  
> > > > +         continue;
> > > > +
> > > > +      is_block_outside_loop(src->parent_instr->block,
> > void_state);
> > > > +   }
> > > > +
> > > > +   /* There where no sources that had defs outside the loop */
> > > > +   if (!state->flag)
> > > > +      return true;
> > > > +
> > > > +   /* Initialize a phi-instruction */
> > > > +   nir_phi_instr *phi = nir_phi_instr_create(state->shader);
> > > > +   phi->instr.block = block_after_loop;
> > > > +   nir_ssa_dest_init(&phi->instr, &phi->dest,
> > > > +                     def->num_components, def->bit_size,
> > "LCSSA-
> > > > phi");
> > > > +   phi->is_lcssa_phi = true;
> > > > +
> > > > +   /* Connect the ssa-def and the phi instruction */
> > > > +   nir_phi_src *phi_src = ralloc(phi, nir_phi_src);
> > > > +   phi_src->pred = def->parent_instr->block;
> > >
> > > Uh... I don't think this is what you want.  I think you want to
> > place
> > > the phi in block_after_loop.
> > 
> > ??
> > 
> > It is in the code below, this is setting the src pred to the block
> > from
> > the loop.
> > 
> >    nir_instr_insert_before_block(phi->instr.block, &phi->instr);
> > 
> > Here phi->instr.block == block_after_loop.
> > 
> 
> Drp... I can't read.  Your code is fine. :-)
>  
> > >  
> > > > +   phi_src->src = nir_src_for_ssa(def);
> > >
> > > Is there any particular reason why we have this is_lcssa_phi
> > flag? 
> > > Why don't we just create a regular phi node with as many sources
> > as
> > > the block has predecessors and make them all point to the same
> > ssa
> > > def?
> > 
> > I guess that could work I'll give it a try. Although will that not
> > cause some kind of validation problems? For example the ssa_def
> > belongs
> > to only one of the predecessors.
> > 
> 
> The only time you can get into a case where you're adding this phi is
> if you have the same ssa_def for all predecessors.  If that's not the
> case, then the ssa_def does not dominate the use and we'll have a phi
> in between anyway.  I don't think we have an issue there.
>  
> > >  
> > > > +
> > > > +   exec_list_push_tail(&phi->srcs, &phi_src->node);
> > > > +
> > > > +   nir_instr_insert_before_block(phi->instr.block, &phi-
> > >instr);
> > > > +
> > > > +   /* Run through all uses and rewrite those outside the loop
> > to
> > > > point to
> > > > +    * the phi instead of pointing to the ssa-def.
> > > > +    */
> > > > +   nir_foreach_use_safe(src, def) {
> > > > +      state->flag = false;
> > > > +
> > > > +      if (src->parent_instr->type == nir_instr_type_phi &&
> > > > +          (block_after_loop->index == src->parent_instr-
> > >block-
> > > > >index ||
> > > > +           nir_instr_as_phi(src->parent_instr)->is_lcssa_phi))
> > > > +         continue;
> > >
> > > How is this different from "src->parent_instr == &phi->instr"?
> > 
> > That would check that the we are not trying to rewrite the use of
> > the
> > phi we just added as opposed to avoiding rewriting a use if it is
> > an
> > existing phi.
> 
> Right.  However, I don't think you want the is_lcssa_phi.  You may
> have some other phi in block_after_loop that uses the ssa_def and you
> don't want to overwrite that.
>  
> > Although I'm starting to wonder how this could already be
> > an lcssa_phi
> > at this point. I'll need double check a few things, my mind
> > has purged
> > this from memory.
> 
> I'm not sure if it could either...
>  
> > >  
> > > > +
> > > > +      is_block_outside_loop(src->parent_instr->block, state);
> > > > +
> > > > +      if (!state->flag)
> > > > +         continue;
> > >
> > > Yeah, those helpers need to go.
> > >  
> > > > +
> > > > +      nir_instr_rewrite_src(src->parent_instr, src,
> > > > +                            nir_src_for_ssa(&phi->dest.ssa));
> > > > +   }
> > > > +
> > > > +   return true;
> > > > +}
> > > > +
> > > > +static void
> > > > +convert_to_lcssa(nir_cf_node *cf_node, lcssa_state *state)
> > > > +{
> > > > +   switch (cf_node->type) {
> > > > +   case nir_cf_node_block:
> > > > +      nir_foreach_instr(instr, nir_cf_node_as_block(cf_node))
> > > > +         nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa,
> > > > state);
> > > > +      return;
> > > > +   case nir_cf_node_if: {
> > > > +      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
> > > > +      foreach_list_typed(nir_cf_node, nested_node, node,
> > &if_stmt-
> > > > >then_list)
> > > > +         convert_to_lcssa(nested_node, state);
> > > > +      foreach_list_typed(nir_cf_node, nested_node, node,
> > &if_stmt-
> > > > >else_list)
> > > > +         convert_to_lcssa(nested_node, state);
> > > > +      return;
> > > > +   }
> > > > +   case nir_cf_node_loop:
> > > > +      /* Since we are going depth first the innermost loop
> > will
> > > > already have
> > > > +       * been rewritten, and so there should be no defs inside
> > the
> > > > inner loop
> > > > +       * that are not already rewritten with LCSSA-phis in our
> > > > current loop.
> > > > +       */
> > >
> > > I have a feeling your recursion here could be a bit simpler.  I'm
> > not
> > > 100% sure how off-hand.  I'll think about it a bit.
> > 
> > Yeah I've had the same thoughts but this is what the original code
> > did
> > and I couldn't think of anything obvious so I just left it.
> > 
> > >  
> > > > +      return;
> > > > +   default:
> > > > +      unreachable("unknown cf node type");
> > > > +   }
> > > > +}
> > > > +
> > > > +static void
> > > > +compute_lcssa(nir_cf_node *cf_node, nir_function_impl *impl)
> > > > +{
> > > > +   nir_loop *loop;
> > > > +
> > > > +   switch (cf_node->type) {
> > > > +   case nir_cf_node_block:
> > > > +      return;
> > > > +   case nir_cf_node_if: {
> > > > +      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
> > > > +      foreach_list_typed(nir_cf_node, nested_node, node,
> > &if_stmt-
> > > > >then_list)
> > > > +         compute_lcssa(nested_node, impl);
> > > > +      foreach_list_typed(nir_cf_node, nested_node, node,
> > &if_stmt-
> > > > >else_list)
> > > > +         compute_lcssa(nested_node, impl);
> > > > +      return;
> > > > +   }
> > > > +   case nir_cf_node_loop:
> > > > +      loop = nir_cf_node_as_loop(cf_node);
> > > > +      foreach_list_typed(nir_cf_node, nested_node, node,
> > &loop-
> > > > >body)
> > > > +         compute_lcssa(nested_node, impl);
> > > > +      break;
> > > > +   default:
> > > > +      unreachable("unknown cf node type");
> > > > +   }
> > > > +
> > > > +   if (loop->info->limiting_terminator == NULL)
> > > > +      return;
> > > > +
> > > > +   nir_function *fn = nir_cf_node_get_function(&loop-
> > >cf_node)-
> > > > >function;
> > > > +   if (!is_simple_loop(fn->shader, loop->info) &&
> > > > +       !is_complex_loop(fn->shader, loop->info)) {
> > > > +      return;
> > > > +   }
> > >
> > > Why are the above two checks needed?  I don't really see why we
> > need
> > > loop analysis for lcssa.
> > 
> > Well we need lcssa and loop analysis to do loop unrolling so why
> > not
> > skip lcssa is we can't unroll the loop?
> 
> As I've been reviewing your series, I've been thinking about how we
> can decouple things.  Ideally, I'd like to have:
> 
>  1) A loop analysis pass that's pure analysis and doesn't make any
> real decisions
>  2) A dumb LCSSA pass that just converts to LCSSA
>  3) A "smart" loop unrolling pass that makes all of the decisions
> about what gets unrolled and does the unrolling

Makes sense.

> 
> I'm not really happy with having all three be as tightly coupled as
> they are.  Maybe they need to be?  But I don't think so.

It *should* be possible, at the very lease we should be decouple it
more than it currently is.

> 
> Another thought I had this morning was that maybe we would just want
> to convert to LCSSA on-demand one loop at a time as we unroll. 
> Looking at the algorithm, I don't think that would be terribly
> difficult and it would certainly solve both the progress problem and
> the "don't do unnecessary work" problem.  I'm not convinced it's a
> good idea, but it's a thought.

Yeah I had thought about that, but lcssa could be useful for other loop
optimisation passes we might want to have a go at for remaining loops
that are too lange to unroll.


>  
> > Also if we remove the is_lcssa phi flag we would end up in
> > an infinite
> > loop of adding lcssa phis and removing them (see patch 6 which come
> > to
> > think of it might not be needed any longer since we already do
> > this).
> > 
> 
> Right.  I'm ok with keeping the is_lcssa_phi flag for the sole
> purpose of proper progress reporting.  However, I'd like us to avoid
> making "special" phi nodes with different rules.

Sure.

>  
> > >  
> > > > +
> > > > +   lcssa_state *state = rzalloc(NULL, lcssa_state);
> > > > +   state->is_in_loop = rzalloc_array(state, BITSET_WORD,
> > > > +                                     BITSET_WORDS(impl-
> > > > >ssa_alloc));
> > > > +   state->loop = loop;
> > > > +
> > > > +   nir_foreach_block_in_cf_node(block, cf_node) {
> > > > +      mark_block_as_in_loop(block, state);
> > > > +   }
> > > > +
> > > > +   foreach_list_typed(nir_cf_node, node, node, &loop->body)
> > > > +      convert_to_lcssa(node, state);
> > > > +
> > > > +   ralloc_free(state);
> > > > +}
> > > > +
> > > > +void
> > > > +nir_to_lcssa_impl(nir_function_impl *impl, nir_variable_mode
> > > > indirect_mask)
> > > > +{
> > > > +   nir_metadata_require(impl, nir_metadata_loop_analysis,
> > > > indirect_mask);
> > >
> > > You need to also require block indices.
> > 
> > Will add.
> > 
> > >  
> > > > +
> > > > +   foreach_list_typed(nir_cf_node, node, node, &impl->body)
> > > > +      compute_lcssa(node, impl);
> > > > +}
> > > > +
> > > > +void
> > > > +nir_to_lcssa(nir_shader *shader, nir_variable_mode
> > indirect_mask)
> > > > +{
> > > > +   nir_foreach_function(func, shader) {
> > > > +      if (func->impl) {
> > > > +         nir_to_lcssa_impl(func->impl, indirect_mask);
> > > > +      }
> > > > +   }
> > > > +}
> > > > diff --git a/src/compiler/nir/nir_validate.c
> > > > b/src/compiler/nir/nir_validate.c
> > > > index 60af715..9d1566c 100644
> > > > --- a/src/compiler/nir/nir_validate.c
> > > > +++ b/src/compiler/nir/nir_validate.c
> > > > @@ -564,8 +564,13 @@ validate_phi_instr(nir_phi_instr *instr,
> > > > validate_state *state)
> > > >     validate_dest(&instr->dest, state);
> > > >
> > > >     exec_list_validate(&instr->srcs);
> > > > -   validate_assert(state, exec_list_length(&instr->srcs) ==
> > > > -          state->block->predecessors->entries);
> > > > +
> > > > +   if (!instr->is_lcssa_phi) {
> > > > +      validate_assert(state, exec_list_length(&instr->srcs) ==
> > > > +             state->block->predecessors->entries);
> > > > +   } else {
> > > > +      validate_assert(state, exec_list_length(&instr->srcs) ==
> > 1);
> > > > +   }
> > > >  }
> > > >
> > > >  static void
> > > > @@ -624,7 +629,7 @@ validate_phi_src(nir_phi_instr *instr,
> > > > nir_block *pred, validate_state *state)
> > > >
> > > >     exec_list_validate(&instr->srcs);
> > > >     nir_foreach_phi_src(src, instr) {
> > > > -      if (src->pred == pred) {
> > > > +      if (src->pred == pred || instr->is_lcssa_phi) {
> > > >           validate_assert(state, src->src.is_ssa);
> > > >           validate_assert(state, src->src.ssa->num_components
> > ==
> > > >                  instr->dest.ssa.num_components);
> > > > --
> > > > 2.7.4
> > > >
> > > > _______________________________________________
> > > > mesa-dev mailing list
> > > > mesa-dev at lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > > >
> > >
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list