[Mesa-dev] [PATCH 02/13] nir: limit copy propagation inside loops

Mon Aug 29 06:30:40 UTC 2016

On Mon, 2016-08-29 at 01:30 -0400, Connor Abbott wrote:
> On Mon, Aug 29, 2016 at 12:54 AM, Timothy Arceri
> <timothy.arceri at collabora.com> wrote:
> > 
> > Don't do copy propagation inside loops until after we try
> > unrolling them.
> > 
> > This helps avoid propagating everything to the phis which
> > makes loop unrolling more difficult.
> > 
> > For example without this:
> > 
> >    loop {
> >       block block_1:
> >       /* preds: block_0 block_4 */
> >       vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15
> >       vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_17
> >       vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_18
> >       vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_19
> >       vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_20
> >       vec1 32 ssa_15 = iadd ssa_10, ssa_2
> >       vec1 32 ssa_16 = ige ssa_15, ssa_1
> >       /* succs: block_2 block_3 */
> >       if ssa_16 {
> >          block block_2:
> >          /* preds: block_1 */
> >          break
> >          /* succs: block_5 */
> >       } else {
> >          block block_3:
> >          /* preds: block_1 */
> >          /* succs: block_4 */
> >       }
> >       block block_4:
> >       /* preds: block_3 */
> >       vec1 32 ssa_17 = imov ssa_12
> >       vec1 32 ssa_18 = imov ssa_13
> >       vec1 32 ssa_19 = imov ssa_14
> >       vec1 32 ssa_20 = imov ssa_11
> >       /* succs: block_1 */
> >    }
> > 
> > Will end up as:
> > 
> >    loop {
> >       /* preds: block_0 block_4 */
> >       block block_1:
> >       vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15
> >       vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_12
> >       vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_13
> >       vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_14
> >       vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_11
> >       vec1 32 ssa_15 = iadd ssa_10, ssa_2
> >       vec1 32 ssa_16 = ige ssa_15, ssa_1
> >       /* succs: block_2 block_3 */
> >       if ssa_16 {
> >          block block_2:
> >          /* preds: block_1 */
> >          break
> >          /* succs: block_5 */
> >       } else {
> >          block block_3:
> >          /* preds: block_1 */
> >          /* succs: block_4 */
> >       }
> >       block block_4:
> >       /* preds: block_3 */
> >       /* succs: block_1 */
> >    }
> 
> This change seems really fishy to me, since moves like those in your
> example are just a trivial renaming of values.

If you look closely this is not just a trivial renaming of values. This
example was from a loop that looked something like this:

   int i = 0;
   while (i < 2) {
      a = a.yzwx;
   }

>  Just turning off copy
> propagation isn't a very robust solution (what if whatever's
> producing
> the NIR doesn't insert the moves in the first place?),

I haven't seen this happen when testing shader-db or any of the test
suites.

>  and it can hurt
> other optimization passes if they can't see through moves.

While true loops in glsl tend to be fairly simple. I haven't been able
to detect any negative results in my copy of shader-db. Also we should
be unrolling a high percentage of loops we come across which means copy
propagation will kick in once they are unrolled.

>  Why is the
> second loop harder to unroll?

Because manipulating the control flow in NIR is painful. When the movs
remain in place we can simply clone them as we unroll.

>  Why can't it be handled by inserting
> moves before unrolling the loop, or directly during the loop
> unrolling
> pass?

We could possibly do this and I did attempt it at first, but it would
mean an extra pass over all phi's in the loop header that would likely
be expensive and complicated. So instead I went with the simple
solution that had no measurable impact as far as I could tell.

> 
> > 
> > ---
> >  src/compiler/nir/nir.h                    |  2 +-
> >  src/compiler/nir/nir_opt_copy_propagate.c | 47
> > ++++++++++++++++++++-----------
> >  src/mesa/drivers/dri/i965/brw_nir.c       |  6 ++--
> >  3 files changed, 35 insertions(+), 20 deletions(-)
> > 
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index d0dfb0d..7ff5394 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2573,7 +2573,7 @@ bool nir_opt_constant_folding(nir_shader
> > *shader);
> > 
> >  bool nir_opt_global_to_local(nir_shader *shader);
> > 
> > -bool nir_copy_prop(nir_shader *shader);
> > +bool nir_copy_prop(nir_shader *shader, bool prop_loops);
> > 
> >  bool nir_opt_cse(nir_shader *shader);
> > 
> > diff --git a/src/compiler/nir/nir_opt_copy_propagate.c
> > b/src/compiler/nir/nir_opt_copy_propagate.c
> > index c26e07f..12daeb6 100644
> > --- a/src/compiler/nir/nir_opt_copy_propagate.c
> > +++ b/src/compiler/nir/nir_opt_copy_propagate.c
> > @@ -99,11 +99,14 @@ is_swizzleless_move(nir_alu_instr *instr)
> >  }
> > 
> >  static bool
> > -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if
> > *parent_if)
> > +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if
> > *parent_if,
> > +              bool prop_loops)
> >  {
> >     if (!src->is_ssa) {
> > -      if (src->reg.indirect)
> > -         return copy_prop_src(src->reg.indirect, parent_instr,
> > parent_if);
> > +      if (src->reg.indirect) {
> > +         return copy_prop_src(src->reg.indirect, parent_instr,
> > parent_if,
> > +                              prop_loops);
> > +      }
> >        return false;
> >     }
> > 
> > @@ -125,6 +128,14 @@ copy_prop_src(nir_src *src, nir_instr
> > *parent_instr, nir_if *parent_if)
> >        if (phi->dest.ssa.num_components !=
> >            alu_instr->src[0].src.ssa->num_components)
> >           return false;
> > +
> > +      /* Avoid propagating moves inside a loop into phis which
> > makes
> > +       * unrolling difficult.
> > +       */
> > +      if (!prop_loops) {
> > +         if (phi->instr.block->cf_node.parent->type ==
> > nir_cf_node_loop)
> > +            return false;
> > +      }
> >     }
> > 
> >     if (parent_instr) {
> > @@ -140,13 +151,14 @@ copy_prop_src(nir_src *src, nir_instr
> > *parent_instr, nir_if *parent_if)
> >  }
> > 
> >  static bool
> > -copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
> > +copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index,
> > +                  bool prop_loops)
> >  {
> >     nir_alu_src *src = &parent_alu_instr->src[index];
> >     if (!src->src.is_ssa) {
> >        if (src->src.reg.indirect)
> >           return copy_prop_src(src->src.reg.indirect,
> > &parent_alu_instr->instr,
> > -                              NULL);
> > +                              NULL, prop_loops);
> >        return false;
> >     }
> > 
> > @@ -195,6 +207,7 @@ copy_prop_alu_src(nir_alu_instr
> > *parent_alu_instr, unsigned index)
> > 
> >  typedef struct {
> >     nir_instr *parent_instr;
> > +   bool prop_loops;
> >     bool progress;
> >  } copy_prop_state;
> > 
> > @@ -202,25 +215,26 @@ static bool
> >  copy_prop_src_cb(nir_src *src, void *_state)
> >  {
> >     copy_prop_state *state = (copy_prop_state *) _state;
> > -   while (copy_prop_src(src, state->parent_instr, NULL))
> > +   while (copy_prop_src(src, state->parent_instr, NULL, state-
> > >prop_loops))
> >        state->progress = true;
> > 
> >     return true;
> >  }
> > 
> >  static bool
> > -copy_prop_instr(nir_instr *instr)
> > +copy_prop_instr(nir_instr *instr, bool prop_loops)
> >  {
> >     if (instr->type == nir_instr_type_alu) {
> >        nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
> >        bool progress = false;
> > 
> >        for (unsigned i = 0; i < nir_op_infos[alu_instr-
> > >op].num_inputs; i++)
> > -         while (copy_prop_alu_src(alu_instr, i))
> > +         while (copy_prop_alu_src(alu_instr, i, prop_loops))
> >              progress = true;
> > 
> >        if (!alu_instr->dest.dest.is_ssa && alu_instr-
> > >dest.dest.reg.indirect)
> > -         while (copy_prop_src(alu_instr->dest.dest.reg.indirect,
> > instr, NULL))
> > +         while (copy_prop_src(alu_instr->dest.dest.reg.indirect,
> > instr, NULL,
> > +                              prop_loops))
> >              progress = true;
> > 
> >        return progress;
> > @@ -229,30 +243,31 @@ copy_prop_instr(nir_instr *instr)
> >     copy_prop_state state;
> >     state.parent_instr = instr;
> >     state.progress = false;
> > +   state.prop_loops = prop_loops;
> >     nir_foreach_src(instr, copy_prop_src_cb, &state);
> > 
> >     return state.progress;
> >  }
> > 
> >  static bool
> > -copy_prop_if(nir_if *if_stmt)
> > +copy_prop_if(nir_if *if_stmt, bool prop_loops)
> >  {
> > -   return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
> > +   return copy_prop_src(&if_stmt->condition, NULL, if_stmt,
> > prop_loops);
> >  }
> > 
> >  static bool
> > -nir_copy_prop_impl(nir_function_impl *impl)
> > +nir_copy_prop_impl(nir_function_impl *impl, bool prop_loops)
> >  {
> >     bool progress = false;
> > 
> >     nir_foreach_block(block, impl) {
> >        nir_foreach_instr(instr, block) {
> > -         if (copy_prop_instr(instr))
> > +         if (copy_prop_instr(instr, prop_loops))
> >              progress = true;
> >        }
> > 
> >        nir_if *if_stmt = nir_block_get_following_if(block);
> > -      if (if_stmt && copy_prop_if(if_stmt))
> > +      if (if_stmt && copy_prop_if(if_stmt, prop_loops))
> >           progress = true;
> >        }
> > 
> > @@ -265,12 +280,12 @@ nir_copy_prop_impl(nir_function_impl *impl)
> >  }
> > 
> >  bool
> > -nir_copy_prop(nir_shader *shader)
> > +nir_copy_prop(nir_shader *shader, bool prop_loops)
> >  {
> >     bool progress = false;
> > 
> >     nir_foreach_function(function, shader) {
> > -      if (function->impl && nir_copy_prop_impl(function->impl))
> > +      if (function->impl && nir_copy_prop_impl(function->impl,
> > prop_loops))
> >           progress = true;
> >     }
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> > b/src/mesa/drivers/dri/i965/brw_nir.c
> > index 29a3e3e..b8457d2 100644
> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > @@ -378,13 +378,13 @@ nir_optimize(nir_shader *nir, bool is_scalar)
> >           OPT_V(nir_lower_alu_to_scalar);
> >        }
> > 
> > -      OPT(nir_copy_prop);
> > +      OPT(nir_copy_prop, false);
> > 
> >        if (is_scalar) {
> >           OPT_V(nir_lower_phis_to_scalar);
> >        }
> > 
> > -      OPT(nir_copy_prop);
> > +      OPT(nir_copy_prop, false);
> >        OPT(nir_opt_dce);
> >        OPT(nir_opt_cse);
> >        OPT(nir_opt_peephole_select);
> > @@ -499,7 +499,7 @@ brw_postprocess_nir(nir_shader *nir,
> >     OPT(nir_lower_locals_to_regs);
> > 
> >     OPT_V(nir_lower_to_source_mods);
> > -   OPT(nir_copy_prop);
> > +   OPT(nir_copy_prop, true);
> >     OPT(nir_opt_dce);
> > 
> >     if (unlikely(debug_enabled)) {
> > --
> > 2.7.4
> > 
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev