[Mesa-dev] [PATCH 051/133] nir: Add an SSA-based liveness analysis pass.

Connor Abbott cwabbott0 at gmail.com
Wed Dec 17 14:23:10 PST 2014


On Wed, Dec 17, 2014 at 5:11 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> On Wed, Dec 17, 2014 at 1:52 PM, Connor Abbott <cwabbott0 at gmail.com> wrote:
>>
>> I'm sure you're already aware, but there are two things we could do to
>> speed this up:
>>
>> 1. Pre-compute def/kill sets for each block similar to what i965 does.
>
>
> Sure, but we walk the instructions at most deepest block depth + 1 and the
> depest we've ever seen in the wild is 2.

Well, there's always orbital explorer ;)

>
>> 2. Use a worklist + an array of flags for "this block is in the
>> worklist" rather than walking all the basic blocks in reverse to find
>> the few we need to update.
>
>
> Sure, we could, but I don't see how pushing the blocks onto a stack and then
> popping them back off really gains us anything over just walking them.  If
> there's something I'm missing, please let me know because it's not jumping
> out at me.  I'll freely admit that I'm not terribly familiar with worklists.

I didn't mean a stack, I meant a FIFO queue. The idea is that rather
than walking through every instruction, even when there are only
changes to be made, we pop a pointer to a block off the worklist, see
if it's live-in set changed, and if it did, we push every predecessor
not already in the worklist onto the worklist and then mark the block
as not being in the worklist. We start by pushing all the blocks in
the worklist in reverse order, since that's the most efficient
ordering. Then, we're only processing blocks that we know might change
rather than every possible one, and for control flow without loops we
only walk the instructions once instead of twice. I think this is
going to be a pretty common thing for optimization passes to do, so it
would be nice to have a common implementation of the worklist.

>
>>
>>
>> Wrt #2, we already use a worklist in the DCE pass, but it's kinda lame
>> because it's using a linked list when we could just allocate an array
>> of pointers up-front based on the maximum size (the number of blocks
>> in this case, the number of SSA definitions in that case) and use it
>> as a ringbuffer. It would be a nice cleanup to implement such a
>> bounded worklist and share it between these two passes, since we'll
>> probably want to use it for lots of other passes too.
>>
>> I don't either thing should block merging this, though.
>>
>> A few other comments below.
>>
>> On Tue, Dec 16, 2014 at 1:05 AM, Jason Ekstrand <jason at jlekstrand.net>
>> wrote:
>> > ---
>> >  src/glsl/Makefile.sources         |   1 +
>> >  src/glsl/nir/nir.h                |  13 ++
>> >  src/glsl/nir/nir_live_variables.c | 282
>> > ++++++++++++++++++++++++++++++++++++++
>> >  src/glsl/nir/nir_metadata.c       |   2 +
>> >  src/mesa/main/bitset.h            |   1 +
>> >  5 files changed, 299 insertions(+)
>> >  create mode 100644 src/glsl/nir/nir_live_variables.c
>> >
>> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>> > index 4eb6320..433224e 100644
>> > --- a/src/glsl/Makefile.sources
>> > +++ b/src/glsl/Makefile.sources
>> > @@ -20,6 +20,7 @@ NIR_FILES = \
>> >         $(GLSL_SRCDIR)/nir/nir_from_ssa.c \
>> >         $(GLSL_SRCDIR)/nir/nir_intrinsics.c \
>> >         $(GLSL_SRCDIR)/nir/nir_intrinsics.h \
>> > +       $(GLSL_SRCDIR)/nir/nir_live_variables.c \
>> >         $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \
>> >         $(GLSL_SRCDIR)/nir/nir_lower_samplers.cpp \
>> >         $(GLSL_SRCDIR)/nir/nir_lower_system_values.c \
>> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> > index f744736..2f2edb6 100644
>> > --- a/src/glsl/nir/nir.h
>> > +++ b/src/glsl/nir/nir.h
>> > @@ -420,6 +420,9 @@ typedef struct {
>> >     /** generic SSA definition index. */
>> >     unsigned index;
>> >
>> > +   /** Index into the live_in and live_out bitfields */
>> > +   unsigned live_index;
>> > +
>> >     nir_instr *parent_instr;
>> >
>> >     struct set *uses;
>> > @@ -999,6 +1002,10 @@ typedef struct nir_block {
>> >     struct nir_block **dom_children;
>> >
>> >     struct set *dom_frontier;
>> > +
>> > +   /* live in and out for this block; used for liveness analysis */
>> > +   BITSET_WORD *live_in;
>> > +   BITSET_WORD *live_out;
>> >  } nir_block;
>> >
>> >  #define nir_block_first_instr(block) \
>> > @@ -1047,6 +1054,7 @@ typedef enum {
>> >     nir_metadata_none = 0x0,
>> >     nir_metadata_block_index = 0x1,
>> >     nir_metadata_dominance = 0x2,
>> > +   nir_metadata_live_variables = 0x4,
>> >  } nir_metadata;
>> >
>> >  typedef struct {
>> > @@ -1274,6 +1282,8 @@ bool nir_foreach_src(nir_instr *instr,
>> > nir_foreach_src_cb cb, void *state);
>> >  typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
>> >  bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb
>> > cb,
>> >                         void *state);
>> > +bool nir_foreach_block_reverse(nir_function_impl *impl,
>> > nir_foreach_block_cb cb,
>> > +                               void *state);
>>
>> This hunk should go in the previous patch that defined this function.
>>
>> >
>> >  /* If the following CF node is an if, this function returns that if.
>> >   * Otherwise, it returns NULL.
>> > @@ -1318,6 +1328,9 @@ void nir_lower_system_values(nir_shader *shader);
>> >
>> >  void nir_lower_atomics(nir_shader *shader);
>> >
>> > +void nir_live_variables_impl(nir_function_impl *impl);
>> > +bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
>> > +
>> >  void nir_convert_to_ssa_impl(nir_function_impl *impl);
>> >  void nir_convert_to_ssa(nir_shader *shader);
>> >  void nir_convert_from_ssa(nir_shader *shader);
>> > diff --git a/src/glsl/nir/nir_live_variables.c
>> > b/src/glsl/nir/nir_live_variables.c
>> > new file mode 100644
>> > index 0000000..7d99a06
>> > --- /dev/null
>> > +++ b/src/glsl/nir/nir_live_variables.c
>> > @@ -0,0 +1,282 @@
>> > +/*
>> > + * Copyright © 2014 Intel Corporation
>> > + *
>> > + * Permission is hereby granted, free of charge, to any person
>> > obtaining a
>> > + * copy of this software and associated documentation files (the
>> > "Software"),
>> > + * to deal in the Software without restriction, including without
>> > limitation
>> > + * the rights to use, copy, modify, merge, publish, distribute,
>> > sublicense,
>> > + * and/or sell copies of the Software, and to permit persons to whom
>> > the
>> > + * Software is furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice (including the
>> > next
>> > + * paragraph) shall be included in all copies or substantial portions
>> > of the
>> > + * Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> > SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> > OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> > ARISING
>> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> > DEALINGS
>> > + * IN THE SOFTWARE.
>> > + *
>> > + * Authors:
>> > + *    Jason Ekstrand (jason at jlekstrand.net)
>> > + */
>> > +
>> > +#include "nir.h"
>> > +
>> > +/*
>> > + * Basic liveness analysis.  This works only in SSA form.
>> > + *
>> > + * This liveness pass treats phi nodes as being melded to the space
>> > between
>> > + * blocks so that the destinations of a phi are in the livein of the
>> > block
>> > + * in which it resides and the sources are in the liveout of the
>> > + * corresponding block.  By formulating the liveness information in
>> > this
>> > + * way, we ensure that the definition of any variable dominates its
>> > entire
>> > + * live range.  This is true because the only way that the definition
>> > of an
>> > + * SSA value may not dominate a use is if the use is in a phi node and
>> > the
>> > + * uses in phi no are in the live-out of the corresponding predecessor
>> > + * block but not in the live-in of the block containing the phi node.
>> > + */
>> > +
>> > +struct live_variables_state {
>> > +   unsigned num_ssa_defs;
>> > +   unsigned bitset_words;
>> > +   BITSET_WORD progress;
>>
>> I'd rather have this be
>>
>> bool progress;
>>
>> since it's effectively being used as a boolean, no need to confuse
>> people with this and it's just unnecessary micro-optimization.
>
>
> Sure.  That can be done.
> --Jason
>
>>
>>
>> > +};
>> > +
>> > +static bool
>> > +index_dest(nir_dest *dest, void *void_state)
>> > +{
>> > +   struct live_variables_state *state = void_state;
>> > +
>> > +   if (dest->is_ssa)
>> > +      dest->ssa.live_index = state->num_ssa_defs++;
>> > +
>> > +   return true;
>> > +}
>> > +
>> > +static bool
>> > +index_ssa_definitions_block(nir_block *block, void *void_state)
>> > +{
>> > +   struct live_variables_state *state = void_state;
>> > +
>> > +   nir_foreach_instr(block, instr) {
>> > +      if (instr->type == nir_instr_type_ssa_undef) {
>> > +         nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(instr);
>> > +         undef->def.live_index = 0;
>> > +      } else {
>> > +         nir_foreach_dest(instr, index_dest, state);
>> > +      }
>> > +   }
>> > +
>> > +   return true;
>> > +}
>> > +
>> > +static bool
>> > +init_liveness_block(nir_block *block, void *void_state)
>> > +{
>> > +   struct live_variables_state *state = void_state;
>> > +
>> > +   block->live_in = reralloc(block, block->live_in, BITSET_WORD,
>> > +                             state->bitset_words);
>> > +   memset(block->live_in, 0, state->bitset_words *
>> > sizeof(BITSET_WORD));
>> > +
>> > +   block->live_out = reralloc(block, block->live_out, BITSET_WORD,
>> > +                              state->bitset_words);
>> > +   memset(block->live_out, 0, state->bitset_words *
>> > sizeof(BITSET_WORD));
>> > +
>> > +   return true;
>> > +}
>> > +
>> > +static bool
>> > +set_src_live(nir_src *src, void *void_live)
>> > +{
>> > +   BITSET_WORD *live = void_live;
>> > +
>> > +   if (!src->is_ssa)
>> > +      return true;
>> > +
>> > +   if (src->ssa->live_index == 0)
>> > +      return true;   /* undefined variables are never live */
>> > +
>> > +   BITSET_SET(live, src->ssa->live_index);
>> > +
>> > +   return true;
>> > +}
>> > +
>> > +static bool
>> > +set_dest_dead(nir_dest *dest, void *void_live)
>> > +{
>> > +   BITSET_WORD *live = void_live;
>> > +
>> > +   if (dest->is_ssa)
>> > +      BITSET_CLEAR(live, dest->ssa.live_index);
>> > +
>> > +   return true;
>> > +}
>> > +
>> > +/* Phi nodes exist "between" blocks and all the phi nodes at the start
>> > of a
>> > + * block act "in parallel".  When we propagate from the live_in of one
>> > + * block to the live out of the other, we have to kill any writes from
>> > phis
>> > + * and make live any sources.
>> > + */
>> > +static void
>> > +propagate_across_edge(nir_block *pred, nir_block *succ,
>> > +                      struct live_variables_state *state)
>> > +{
>> > +   BITSET_WORD live[state->bitset_words];
>> > +   memcpy(live, succ->live_in, sizeof live);
>> > +
>> > +   nir_foreach_instr(succ, instr) {
>> > +      if (instr->type != nir_instr_type_phi)
>> > +         break;
>> > +      nir_phi_instr *phi = nir_instr_as_phi(instr);
>> > +
>> > +      set_dest_dead(&phi->dest, live);
>> > +   }
>> > +
>> > +   nir_foreach_instr(succ, instr) {
>> > +      if (instr->type != nir_instr_type_phi)
>> > +         break;
>> > +      nir_phi_instr *phi = nir_instr_as_phi(instr);
>> > +
>> > +      foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
>> > +         if (src->pred == pred) {
>> > +            set_src_live(&src->src, live);
>> > +            break;
>> > +         }
>> > +      }
>> > +   }
>> > +
>> > +   for (unsigned i = 0; i < state->bitset_words; ++i) {
>> > +      state->progress |= live[i] & ~pred->live_out[i];
>>
>> state->progress = state->progress || (live[i] & ~pred->live_out[i]) != 0;
>>
>> > +      pred->live_out[i] |= live[i];
>> > +   }
>> > +}
>> > +
>> > +static bool
>> > +walk_instructions_block(nir_block *block, void *void_state)
>> > +{
>> > +   struct live_variables_state *state = void_state;
>> > +
>> > +   /* The live out is the union (modulo phi nodes) of the live ins of
>> > its
>> > +    * successors */
>> > +   if (block->successors[0])
>> > +      propagate_across_edge(block, block->successors[0], state);
>> > +   if (block->successors[1])
>> > +      propagate_across_edge(block, block->successors[1], state);
>> > +
>> > +   memcpy(block->live_in, block->live_out,
>> > +          state->bitset_words * sizeof(BITSET_WORD));
>> > +
>> > +   nir_if *following_if = nir_block_following_if(block);
>> > +   if (following_if)
>> > +      set_src_live(&following_if->condition, block->live_in);
>> > +
>> > +   nir_foreach_instr_reverse(block, instr) {
>> > +      /* Phi nodes are handled seperately so we want to skip them.
>> > Since
>> > +       * we are going backwards and they are at the beginning, we can
>> > just
>> > +       * break as soon as we see one.
>> > +       */
>> > +      if (instr->type == nir_instr_type_phi)
>> > +         break;
>> > +
>> > +      nir_foreach_dest(instr, set_dest_dead, block->live_in);
>> > +      nir_foreach_src(instr, set_src_live, block->live_in);
>> > +   }
>> > +
>> > +   return true;
>> > +}
>> > +
>> > +static bool
>> > +src_does_not_use_def(nir_src *src, void *def)
>> > +{
>> > +   return !src->is_ssa || src->ssa != (nir_ssa_def *)def;
>> > +}
>> > +
>> > +static bool
>> > +search_for_use_after_instr(nir_instr *start, nir_ssa_def *def)
>> > +{
>> > +   /* Only look for a use strictly after the given instruction */
>> > +   struct exec_node *node = start->node.next;
>> > +   while (!exec_node_is_tail_sentinel(node)) {
>> > +      nir_instr *instr = exec_node_data(nir_instr, node, node);
>> > +      if (!nir_foreach_src(instr, src_does_not_use_def, def))
>> > +         return true;
>> > +      node = node->next;
>> > +   }
>> > +   return false;
>> > +}
>> > +
>> > +/* Returns true if def is live at instr assuming that def comes before
>> > + * instr in a pre DFS search of the dominance tree.
>> > + */
>> > +static bool
>> > +nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr)
>> > +{
>> > +   if (BITSET_TEST(instr->block->live_out, def->live_index)) {
>> > +      /* Since def dominates instr, if def is in the liveout of the
>> > block,
>> > +       * it's live at instr
>> > +       */
>> > +      return true;
>> > +   } else {
>> > +      if (BITSET_TEST(instr->block->live_in, def->live_index) ||
>> > +          def->parent_instr->block == instr->block) {
>> > +         /* In this case it is either live coming into instr's block or
>> > it
>> > +          * is defined in the same block.  In this case, we simply need
>> > to
>> > +          * see if it is used after instr.
>> > +          */
>> > +         return search_for_use_after_instr(instr, def);
>> > +      } else {
>> > +         return false;
>> > +      }
>> > +   }
>> > +}
>> > +
>> > +bool
>> > +nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b)
>> > +{
>> > +   if (a->parent_instr == b->parent_instr) {
>> > +      /* Two variables defined at the same time interfere assuming at
>> > +       * least one isn't dead.
>> > +       */
>> > +      return true;
>> > +   } else if (a->live_index == 0 || b->live_index == 0) {
>> > +      /* If either variable is an ssa_undef, then there's no
>> > interference */
>> > +      return false;
>> > +   } else if (a->live_index < b->live_index) {
>> > +      return nir_ssa_def_is_live_at(a, b->parent_instr);
>> > +   } else {
>> > +      return nir_ssa_def_is_live_at(b, a->parent_instr);
>> > +   }
>> > +}
>> > +
>> > +void
>> > +nir_live_variables_impl(nir_function_impl *impl)
>> > +{
>> > +   struct live_variables_state state;
>> > +
>> > +   /* We start at 1 because we reserve the index value of 0 for
>> > ssa_undef
>> > +    * instructions.  Those are never live, so their liveness
>> > information
>> > +    * can be compacted into a single bit.
>> > +    */
>> > +   state.num_ssa_defs = 1;
>> > +   nir_foreach_block(impl, index_ssa_definitions_block, &state);
>> > +
>> > +   /* We now know how many unique ssa definitions we have and we can go
>> > +    * ahead and allocate live_in and live_out sets
>> > +    */
>> > +   state.bitset_words = BITSET_WORDS(state.num_ssa_defs);
>> > +   nir_foreach_block(impl, init_liveness_block, &state);
>> > +
>> > +   /* We need to propagate the liveness back through the CFG.  Thanks
>> > to
>> > +    * the wonders of SSA, this will run no more times than the depth of
>> > the
>> > +    * deepest loop + 1.
>> > +    */
>> > +   do {
>> > +      state.progress = 0;
>>
>> state.progress = false;
>>
>> > +      nir_foreach_block_reverse(impl, walk_instructions_block, &state);
>> > +   } while (state.progress);
>> > +}
>> > diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
>> > index 070cb74..a4d618c 100644
>> > --- a/src/glsl/nir/nir_metadata.c
>> > +++ b/src/glsl/nir/nir_metadata.c
>> > @@ -39,6 +39,8 @@ nir_metadata_require(nir_function_impl *impl,
>> > nir_metadata required)
>> >        nir_index_blocks(impl);
>> >     if (NEEDS_UPDATE(nir_metadata_dominance))
>> >        nir_calc_dominance_impl(impl);
>> > +   if (NEEDS_UPDATE(nir_metadata_live_variables))
>> > +      nir_live_variables_impl(impl);
>> >
>> >  #undef NEEDS_UPDATE
>> >
>> > diff --git a/src/mesa/main/bitset.h b/src/mesa/main/bitset.h
>> > index 601fd0e..2558da4 100644
>> > --- a/src/mesa/main/bitset.h
>> > +++ b/src/mesa/main/bitset.h
>> > @@ -32,6 +32,7 @@
>> >  #define BITSET_H
>> >
>> >  #include "imports.h"
>> > +#include "macros.h"
>> >
>> >
>> > /****************************************************************************
>> >   * generic bitset implementation
>> > --
>> > 2.2.0
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev at lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list