[Mesa-dev] [PATCH 05/11] nir: Add a LCSAA-pass

Thomas Helland thomashelland90 at gmail.com
Fri Oct 7 06:55:10 UTC 2016


2016-10-05 18:59 GMT+02:00 Jason Ekstrand <jason at jlekstrand.net>:
> On Tue, Oct 4, 2016 at 6:46 PM, Timothy Arceri
> <timothy.arceri at collabora.com> wrote:
>>
>> On Tue, 2016-10-04 at 16:47 -0700, Jason Ekstrand wrote:
>> > On Fri, Sep 16, 2016 at 6:24 AM, Timothy Arceri <timothy.arceri at colla
>> > bora.com> wrote:
>> > > From: Thomas Helland <thomashelland90 at gmail.com>
>> > >
>> > > V2: Do a "depth first search" to convert to LCSSA
>> > >
>> > > V3: Small comment fixup
>> > >
>> > > V4: Rebase, adapt to removal of function overloads
>> > >
>> > > V5: Rebase, adapt to relocation of nir to compiler/nir
>> > >     Still need to adapt to potential if-uses
>> > >     Work around nir_validate issue
>> > >
>> > > V6 (Timothy):
>> > >  - tidy lcssa and stop leaking memory
>> > >  - dont rewrite the src for the lcssa phi node
>> > >  - validate lcssa phi srcs to avoid postvalidate assert
>> > >  - don't add new phi if one already exists
>> > >  - more lcssa phi validation fixes
>> > >  - Rather than marking ssa defs inside a loop just mark blocks
>> > > inside
>> > >    a loop. This is simpler and fixes lcssa for intrinsics which do
>> > >    not have a destination.
>> > >  - don't create LCSSA phis for loops we won't unroll
>> > >  - require loop metadata for lcssa pass
>> > >  - handle case were the ssa defs use outside the loop is already a
>> > > phi
>> > >
>> > > V7: (Timothy)
>> > > - pass indirect mask to metadata call
>> > > ---
>> > >  src/compiler/Makefile.sources   |   1 +
>> > >  src/compiler/nir/nir.h          |   6 ++
>> > >  src/compiler/nir/nir_to_lcssa.c | 227
>> > > ++++++++++++++++++++++++++++++++++++++++
>> > >  src/compiler/nir/nir_validate.c |  11 +-
>> > >  4 files changed, 242 insertions(+), 3 deletions(-)
>> > >  create mode 100644 src/compiler/nir/nir_to_lcssa.c
>> > >
>> > > diff --git a/src/compiler/Makefile.sources
>> > > b/src/compiler/Makefile.sources
>> > > index 7ed26a9..8ef6080 100644
>> > > --- a/src/compiler/Makefile.sources
>> > > +++ b/src/compiler/Makefile.sources
>> > > @@ -247,6 +247,7 @@ NIR_FILES = \
>> > >         nir/nir_search_helpers.h \
>> > >         nir/nir_split_var_copies.c \
>> > >         nir/nir_sweep.c \
>> > > +       nir/nir_to_lcssa.c \
>> > >         nir/nir_to_ssa.c \
>> > >         nir/nir_validate.c \
>> > >         nir/nir_vla.h \
>> > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> > > index cc8f4b6..29a6f45 100644
>> > > --- a/src/compiler/nir/nir.h
>> > > +++ b/src/compiler/nir/nir.h
>> > > @@ -1387,6 +1387,8 @@ typedef struct {
>> > >     struct exec_list srcs; /** < list of nir_phi_src */
>> > >
>> > >     nir_dest dest;
>> > > +
>> > > +   bool is_lcssa_phi;
>> > >  } nir_phi_instr;
>> > >
>> > >  typedef struct {
>> > > @@ -2643,6 +2645,10 @@ void nir_convert_to_ssa(nir_shader *shader);
>> > >  bool nir_repair_ssa_impl(nir_function_impl *impl);
>> > >  bool nir_repair_ssa(nir_shader *shader);
>> > >
>> > > +void nir_to_lcssa_impl(nir_function_impl *impl,
>> > > +                       nir_variable_mode indirect_mask);
>> > > +void nir_to_lcssa(nir_shader *shader, nir_variable_mode
>> > > indirect_mask);
>> > > +
>> > >  /* If phi_webs_only is true, only convert SSA values involved in
>> > > phi nodes to
>> > >   * registers.  If false, convert all values (even those not
>> > > involved in a phi
>> > >   * node) to registers.
>> > > diff --git a/src/compiler/nir/nir_to_lcssa.c
>> > > b/src/compiler/nir/nir_to_lcssa.c
>> > > new file mode 100644
>> > > index 0000000..25d0bdb
>> > > --- /dev/null
>> > > +++ b/src/compiler/nir/nir_to_lcssa.c
>> > > @@ -0,0 +1,227 @@
>> > > +/*
>> > > + * Copyright © 2015 Thomas Helland
>> > > + *
>> > > + * Permission is hereby granted, free of charge, to any person
>> > > obtaining a
>> > > + * copy of this software and associated documentation files (the
>> > > "Software"),
>> > > + * to deal in the Software without restriction, including without
>> > > limitation
>> > > + * the rights to use, copy, modify, merge, publish, distribute,
>> > > sublicense,
>> > > + * and/or sell copies of the Software, and to permit persons to
>> > > whom the
>> > > + * Software is furnished to do so, subject to the following
>> > > conditions:
>> > > + *
>> > > + * The above copyright notice and this permission notice
>> > > (including the next
>> > > + * paragraph) shall be included in all copies or substantial
>> > > portions of the
>> > > + * Software.
>> > > + *
>> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > > EXPRESS OR
>> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > > MERCHANTABILITY,
>> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>> > > EVENT SHALL
>> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
>> > > DAMAGES OR OTHER
>> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> > > ARISING
>> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> > > OTHER DEALINGS
>> > > + * IN THE SOFTWARE.
>> > > + */
>> > > +
>> > > +/*
>> > > + * This pass converts the ssa-graph into "Loop Closed SSA form".
>> > > This is
>> > > + * done by placing phi nodes at the exits of the loop for all
>> > > values
>> > > + * that are used outside the loop. The result is it transforms:
>> > > + *
>> > > + * loop {                    ->      loop {
>> > > + *    ssa2 = ....            ->          ssa2 = ...
>> > > + *    if (cond)              ->          if (cond) {
>> > > + *       break;              ->             break;
>> > > + *    ssa3 = ssa2 * ssa4     ->          }
>> > > + * }                         ->          ssa3 = ssa2 * ssa4
>> > > + * ssa6 = ssa2 + 4           ->       }
>> > > + *                                    ssa5 = lcssa_phi(ssa2)
>> > > + *                                    ssa6 = ssa5 + 4
>> > > + */
>> >
>> > Let me make sure I understand this correctly.  The point here seems
>> > to be to ensure that SSA defs can never escape a loop without going
>> > through a phi.  Correct?  This can happen if the ssa def, for
>> > whatever reason, dominates all of the breaks in the loop.  In that
>> > case, it will dominate the block after the loop and can go through
>> > without a phi.  This raises a question: Is there any real difference
>> > between an LCSSA phi and a regular phi?
>>
>> There shouldn't be any real difference besides only having a single src
>> which validation doesn't like. I guess I'll try the suggestion you make
>> below, but I'm concerned I'll hit more validation problems.
>
>
> Honestly, I'm surprised what you have now passes validation. :/  We really
> should be validating that a phi has a set of predecessors exactly equal to
> the predecessors of the block in which it lives.
>
>>
>> >
>> > > +
>> > > +#include "nir.h"
>> > > +
>> > > +typedef struct {
>> > > +   /* The nir_shader we are transforming */
>> > > +   nir_shader *shader;
>> > > +
>> > > +   /* The loop we store information for */
>> > > +   nir_loop *loop;
>> > > +
>> > > +   /* Keep track of which defs are in the loop */
>> > > +   BITSET_WORD *is_in_loop;
>> > > +
>> > > +   /* General purpose bool */
>> > > +   bool flag;
>> > > +} lcssa_state;
>> > > +
>> > > +static void
>> > > +mark_block_as_in_loop(nir_block *blk, void *state)
>> > > +{
>> > > +   lcssa_state *state_cast = (lcssa_state *) state;
>> > > +   BITSET_SET(state_cast->is_in_loop, blk->index);
>> > > +}
>> > > +
>> > > +static void
>> > > +is_block_outside_loop(nir_block *blk, void *void_state)
>> > > +{
>> > > +   lcssa_state *state = void_state;
>> > > +   if (!BITSET_TEST(state->is_in_loop, blk->index)) {
>> > > +      state->flag = true;
>> > > +   }
>> > > +}
>> >
>> > These helpers aren't really needed anymore (they probably were prior
>> > to better block iteration) and I think they're just confusing.
>>
>> Hmm. Your probably right. Thomas was originally checking inividual
>> definitions rather than just using the blocks.
>
>
> What I meant is that the is_block_outside_loop helper made sense because it
> could be passed to nir_foreach_block.  Now that we have better block
> iteration, the helpers that set a flag in a struct passed in as a void * are
> just piles of wrapping that's just confusing.
>

Yeah, I believe the patches I wrote may contain quite a lot of this.
The block iteration helpers had not yet landed when I was working on this,
and I forgot their existence when rebasing.

>>
>> Are you thinking
>> something like:
>>
>> if (block_after_loop->index <= src->parent_instr->block->index)
>>   ... we are outside the loop ..
>
>
> Actually, that would probably work and would save us some iteration.  I
> hadn't thought of that.
>
>>
>> >
>> > > +
>> > > +static bool
>> > > +convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state)
>> > > +{
>> > > +   lcssa_state *state = void_state;
>> > > +
>> > > +   state->flag = false;
>> > > +
>> > > +   nir_block *block_after_loop =
>> > > +      nir_cf_node_as_block(nir_cf_node_next(&state->loop-
>> > > >cf_node));
>> > > +
>> > > +   nir_foreach_use_safe(src, def) {
>> > > +      if (src->parent_instr->type == nir_instr_type_phi &&
>> > > +          (block_after_loop->index == src->parent_instr->block-
>> > > >index ||
>> >
>> > You should be able to just compare the block pointers.
>> >
>> > > +           nir_instr_as_phi(src->parent_instr)->is_lcssa_phi))
>> >
>> > I don't think you need this check.expand
>>
>> As I say below I don't recall how this could already be an lcssa_phi
>> but I recall removing it once before and it caused issues. Maybe nested
>> loops although we shouldn't be touching those. I need to run some tests
>> (and add comments this time around).
>>
>> >
>> > > +         continue;
>> > > +
>> > > +      is_block_outside_loop(src->parent_instr->block, void_state);
>> > > +   }
>> > > +
>> > > +   /* There where no sources that had defs outside the loop */
>> > > +   if (!state->flag)
>> > > +      return true;
>> > > +
>> > > +   /* Initialize a phi-instruction */
>> > > +   nir_phi_instr *phi = nir_phi_instr_create(state->shader);
>> > > +   phi->instr.block = block_after_loop;
>> > > +   nir_ssa_dest_init(&phi->instr, &phi->dest,
>> > > +                     def->num_components, def->bit_size, "LCSSA-
>> > > phi");
>> > > +   phi->is_lcssa_phi = true;
>> > > +
>> > > +   /* Connect the ssa-def and the phi instruction */
>> > > +   nir_phi_src *phi_src = ralloc(phi, nir_phi_src);
>> > > +   phi_src->pred = def->parent_instr->block;
>> >
>> > Uh... I don't think this is what you want.  I think you want to place
>> > the phi in block_after_loop.
>>
>> ??
>>
>> It is in the code below, this is setting the src pred to the block from
>> the loop.
>>
>>    nir_instr_insert_before_block(phi->instr.block, &phi->instr);
>>
>> Here phi->instr.block == block_after_loop.
>
>
> Drp... I can't read.  Your code is fine. :-)
>
>>
>> >
>> > > +   phi_src->src = nir_src_for_ssa(def);
>> >
>> > Is there any particular reason why we have this is_lcssa_phi flag?
>> > Why don't we just create a regular phi node with as many sources as
>> > the block has predecessors and make them all point to the same ssa
>> > def?
>>
>> I guess that could work I'll give it a try. Although will that not
>> cause some kind of validation problems? For example the ssa_def belongs
>> to only one of the predecessors.
>
>
> The only time you can get into a case where you're adding this phi is if you
> have the same ssa_def for all predecessors.  If that's not the case, then
> the ssa_def does not dominate the use and we'll have a phi in between
> anyway.  I don't think we have an issue there.
>
>>
>> >
>> > > +
>> > > +   exec_list_push_tail(&phi->srcs, &phi_src->node);
>> > > +
>> > > +   nir_instr_insert_before_block(phi->instr.block, &phi->instr);
>> > > +
>> > > +   /* Run through all uses and rewrite those outside the loop to
>> > > point to
>> > > +    * the phi instead of pointing to the ssa-def.
>> > > +    */
>> > > +   nir_foreach_use_safe(src, def) {
>> > > +      state->flag = false;
>> > > +
>> > > +      if (src->parent_instr->type == nir_instr_type_phi &&
>> > > +          (block_after_loop->index == src->parent_instr->block-
>> > > >index ||
>> > > +           nir_instr_as_phi(src->parent_instr)->is_lcssa_phi))
>> > > +         continue;
>> >
>> > How is this different from "src->parent_instr == &phi->instr"?
>>
>> That would check that the we are not trying to rewrite the use of the
>> phi we just added as opposed to avoiding rewriting a use if it is an
>> existing phi.
>
>
> Right.  However, I don't think you want the is_lcssa_phi.  You may have some
> other phi in block_after_loop that uses the ssa_def and you don't want to
> overwrite that.
>
>>
>> Although I'm starting to wonder how this could already be an lcssa_phi
>> at this point. I'll need double check a few things, my mind has purged
>> this from memory.
>
>
> I'm not sure if it could either...
>
>>
>> >
>> > > +
>> > > +      is_block_outside_loop(src->parent_instr->block, state);
>> > > +
>> > > +      if (!state->flag)
>> > > +         continue;
>> >
>> > Yeah, those helpers need to go.
>> >
>> > > +
>> > > +      nir_instr_rewrite_src(src->parent_instr, src,
>> > > +                            nir_src_for_ssa(&phi->dest.ssa));
>> > > +   }
>> > > +
>> > > +   return true;
>> > > +}
>> > > +
>> > > +static void
>> > > +convert_to_lcssa(nir_cf_node *cf_node, lcssa_state *state)
>> > > +{
>> > > +   switch (cf_node->type) {
>> > > +   case nir_cf_node_block:
>> > > +      nir_foreach_instr(instr, nir_cf_node_as_block(cf_node))
>> > > +         nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa,
>> > > state);
>> > > +      return;
>> > > +   case nir_cf_node_if: {
>> > > +      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
>> > > +      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt-
>> > > >then_list)
>> > > +         convert_to_lcssa(nested_node, state);
>> > > +      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt-
>> > > >else_list)
>> > > +         convert_to_lcssa(nested_node, state);
>> > > +      return;
>> > > +   }
>> > > +   case nir_cf_node_loop:
>> > > +      /* Since we are going depth first the innermost loop will
>> > > already have
>> > > +       * been rewritten, and so there should be no defs inside the
>> > > inner loop
>> > > +       * that are not already rewritten with LCSSA-phis in our
>> > > current loop.
>> > > +       */
>> >
>> > I have a feeling your recursion here could be a bit simpler.  I'm not
>> > 100% sure how off-hand.  I'll think about it a bit.
>>
>> Yeah I've had the same thoughts but this is what the original code did
>> and I couldn't think of anything obvious so I just left it.
>>

Yeah, I didn't particularly like this either.
But it was the only solution I found at the time of writing this.
I believe this was discussed extensively on irc at the time,
and this solution was found to be the simplest one.

>> >
>> > > +      return;
>> > > +   default:
>> > > +      unreachable("unknown cf node type");
>> > > +   }
>> > > +}
>> > > +
>> > > +static void
>> > > +compute_lcssa(nir_cf_node *cf_node, nir_function_impl *impl)
>> > > +{
>> > > +   nir_loop *loop;
>> > > +
>> > > +   switch (cf_node->type) {
>> > > +   case nir_cf_node_block:
>> > > +      return;
>> > > +   case nir_cf_node_if: {
>> > > +      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
>> > > +      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt-
>> > > >then_list)
>> > > +         compute_lcssa(nested_node, impl);
>> > > +      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt-
>> > > >else_list)
>> > > +         compute_lcssa(nested_node, impl);
>> > > +      return;
>> > > +   }
>> > > +   case nir_cf_node_loop:
>> > > +      loop = nir_cf_node_as_loop(cf_node);
>> > > +      foreach_list_typed(nir_cf_node, nested_node, node, &loop-
>> > > >body)
>> > > +         compute_lcssa(nested_node, impl);
>> > > +      break;
>> > > +   default:
>> > > +      unreachable("unknown cf node type");
>> > > +   }
>> > > +
>> > > +   if (loop->info->limiting_terminator == NULL)
>> > > +      return;
>> > > +
>> > > +   nir_function *fn = nir_cf_node_get_function(&loop->cf_node)-
>> > > >function;
>> > > +   if (!is_simple_loop(fn->shader, loop->info) &&
>> > > +       !is_complex_loop(fn->shader, loop->info)) {
>> > > +      return;
>> > > +   }
>> >
>> > Why are the above two checks needed?  I don't really see why we need
>> > loop analysis for lcssa.
>>
>> Well we need lcssa and loop analysis to do loop unrolling so why not
>> skip lcssa is we can't unroll the loop?
>
>
> As I've been reviewing your series, I've been thinking about how we can
> decouple things.  Ideally, I'd like to have:
>
>  1) A loop analysis pass that's pure analysis and doesn't make any real
> decisions
>  2) A dumb LCSSA pass that just converts to LCSSA
>  3) A "smart" loop unrolling pass that makes all of the decisions about what
> gets unrolled and does the unrolling
>

This was indeed the intention when I was doing this work.
It might have gotten more tightly coupled than intended though.

> I'm not really happy with having all three be as tightly coupled as they
> are.  Maybe they need to be?  But I don't think so.
>
> Another thought I had this morning was that maybe we would just want to
> convert to LCSSA on-demand one loop at a time as we unroll.  Looking at the
> algorithm, I don't think that would be terribly difficult and it would
> certainly solve both the progress problem and the "don't do unnecessary
> work" problem.  I'm not convinced it's a good idea, but it's a thought.
>

I don't think this is a good idea. LCSSA can be advantegous for other passes.
Loop unsiwthcing is one thing that comes to mind. (Splitting a loop with an
invariant if/else inside it into two loops, one for the if and one for
the else).
Another usecase is LICM, but I'm not sure how this interacts with GVN/GCM.
Last time I checked the results of our GCM pass it caused a lot of invariants
to be pulled into the loop, doing the complete opposite work of a LICM pass.

>>
>> Also if we remove the is_lcssa phi flag we would end up in an infinite
>> loop of adding lcssa phis and removing them (see patch 6 which come to
>> think of it might not be needed any longer since we already do this).
>
>
> Right.  I'm ok with keeping the is_lcssa_phi flag for the sole purpose of
> proper progress reporting.  However, I'd like us to avoid making "special"
> phi nodes with different rules.

Avoiding the infinite optimization loop was the reason I added the flag.
It was not intended to have any effect apart from this, and possibly
short circuit some unnecessary compiler work as an optimization.

It's nice to see this work being picked up again =)

>
>>
>> >
>> > > +
>> > > +   lcssa_state *state = rzalloc(NULL, lcssa_state);
>> > > +   state->is_in_loop = rzalloc_array(state, BITSET_WORD,
>> > > +                                     BITSET_WORDS(impl-
>> > > >ssa_alloc));
>> > > +   state->loop = loop;
>> > > +
>> > > +   nir_foreach_block_in_cf_node(block, cf_node) {
>> > > +      mark_block_as_in_loop(block, state);
>> > > +   }
>> > > +
>> > > +   foreach_list_typed(nir_cf_node, node, node, &loop->body)
>> > > +      convert_to_lcssa(node, state);
>> > > +
>> > > +   ralloc_free(state);
>> > > +}
>> > > +
>> > > +void
>> > > +nir_to_lcssa_impl(nir_function_impl *impl, nir_variable_mode
>> > > indirect_mask)
>> > > +{
>> > > +   nir_metadata_require(impl, nir_metadata_loop_analysis,
>> > > indirect_mask);
>> >
>> > You need to also require block indices.
>>
>> Will add.
>>
>> >
>> > > +
>> > > +   foreach_list_typed(nir_cf_node, node, node, &impl->body)
>> > > +      compute_lcssa(node, impl);
>> > > +}
>> > > +
>> > > +void
>> > > +nir_to_lcssa(nir_shader *shader, nir_variable_mode indirect_mask)
>> > > +{
>> > > +   nir_foreach_function(func, shader) {
>> > > +      if (func->impl) {
>> > > +         nir_to_lcssa_impl(func->impl, indirect_mask);
>> > > +      }
>> > > +   }
>> > > +}
>> > > diff --git a/src/compiler/nir/nir_validate.c
>> > > b/src/compiler/nir/nir_validate.c
>> > > index 60af715..9d1566c 100644
>> > > --- a/src/compiler/nir/nir_validate.c
>> > > +++ b/src/compiler/nir/nir_validate.c
>> > > @@ -564,8 +564,13 @@ validate_phi_instr(nir_phi_instr *instr,
>> > > validate_state *state)
>> > >     validate_dest(&instr->dest, state);
>> > >
>> > >     exec_list_validate(&instr->srcs);
>> > > -   validate_assert(state, exec_list_length(&instr->srcs) ==
>> > > -          state->block->predecessors->entries);
>> > > +
>> > > +   if (!instr->is_lcssa_phi) {
>> > > +      validate_assert(state, exec_list_length(&instr->srcs) ==
>> > > +             state->block->predecessors->entries);
>> > > +   } else {
>> > > +      validate_assert(state, exec_list_length(&instr->srcs) == 1);
>> > > +   }
>> > >  }
>> > >
>> > >  static void
>> > > @@ -624,7 +629,7 @@ validate_phi_src(nir_phi_instr *instr,
>> > > nir_block *pred, validate_state *state)
>> > >
>> > >     exec_list_validate(&instr->srcs);
>> > >     nir_foreach_phi_src(src, instr) {
>> > > -      if (src->pred == pred) {
>> > > +      if (src->pred == pred || instr->is_lcssa_phi) {
>> > >           validate_assert(state, src->src.is_ssa);
>> > >           validate_assert(state, src->src.ssa->num_components ==
>> > >                  instr->dest.ssa.num_components);
>> > > --
>> > > 2.7.4
>> > >
>> > > _______________________________________________
>> > > mesa-dev mailing list
>> > > mesa-dev at lists.freedesktop.org
>> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> > >
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>


More information about the mesa-dev mailing list