[Mesa-dev] [PATCH 02/13] nir: limit copy propagation inside loops
Connor Abbott
cwabbott0 at gmail.com
Mon Aug 29 05:30:49 UTC 2016
On Mon, Aug 29, 2016 at 12:54 AM, Timothy Arceri
<timothy.arceri at collabora.com> wrote:
> Don't do copy propagation inside loops until after we try
> unrolling them.
>
> This helps avoid propagating everything to the phis which
> makes loop unrolling more difficult.
>
> For example without this:
>
> loop {
> block block_1:
> /* preds: block_0 block_4 */
> vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15
> vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_17
> vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_18
> vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_19
> vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_20
> vec1 32 ssa_15 = iadd ssa_10, ssa_2
> vec1 32 ssa_16 = ige ssa_15, ssa_1
> /* succs: block_2 block_3 */
> if ssa_16 {
> block block_2:
> /* preds: block_1 */
> break
> /* succs: block_5 */
> } else {
> block block_3:
> /* preds: block_1 */
> /* succs: block_4 */
> }
> block block_4:
> /* preds: block_3 */
> vec1 32 ssa_17 = imov ssa_12
> vec1 32 ssa_18 = imov ssa_13
> vec1 32 ssa_19 = imov ssa_14
> vec1 32 ssa_20 = imov ssa_11
> /* succs: block_1 */
> }
>
> Will end up as:
>
> loop {
> /* preds: block_0 block_4 */
> block block_1:
> vec1 32 ssa_10 = phi block_0: ssa_5, block_4: ssa_15
> vec1 32 ssa_11 = phi block_0: ssa_6, block_4: ssa_12
> vec1 32 ssa_12 = phi block_0: ssa_7, block_4: ssa_13
> vec1 32 ssa_13 = phi block_0: ssa_8, block_4: ssa_14
> vec1 32 ssa_14 = phi block_0: ssa_9, block_4: ssa_11
> vec1 32 ssa_15 = iadd ssa_10, ssa_2
> vec1 32 ssa_16 = ige ssa_15, ssa_1
> /* succs: block_2 block_3 */
> if ssa_16 {
> block block_2:
> /* preds: block_1 */
> break
> /* succs: block_5 */
> } else {
> block block_3:
> /* preds: block_1 */
> /* succs: block_4 */
> }
> block block_4:
> /* preds: block_3 */
> /* succs: block_1 */
> }
This change seems really fishy to me, since moves like those in your
example are just a trivial renaming of values. Just turning off copy
propagation isn't a very robust solution (what if whatever's producing
the NIR doesn't insert the moves in the first place?), and it can hurt
other optimization passes if they can't see through moves. Why is the
second loop harder to unroll? Why can't it be handled by inserting
moves before unrolling the loop, or directly during the loop unrolling
pass?
> ---
> src/compiler/nir/nir.h | 2 +-
> src/compiler/nir/nir_opt_copy_propagate.c | 47 ++++++++++++++++++++-----------
> src/mesa/drivers/dri/i965/brw_nir.c | 6 ++--
> 3 files changed, 35 insertions(+), 20 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index d0dfb0d..7ff5394 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2573,7 +2573,7 @@ bool nir_opt_constant_folding(nir_shader *shader);
>
> bool nir_opt_global_to_local(nir_shader *shader);
>
> -bool nir_copy_prop(nir_shader *shader);
> +bool nir_copy_prop(nir_shader *shader, bool prop_loops);
>
> bool nir_opt_cse(nir_shader *shader);
>
> diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c
> index c26e07f..12daeb6 100644
> --- a/src/compiler/nir/nir_opt_copy_propagate.c
> +++ b/src/compiler/nir/nir_opt_copy_propagate.c
> @@ -99,11 +99,14 @@ is_swizzleless_move(nir_alu_instr *instr)
> }
>
> static bool
> -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
> +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
> + bool prop_loops)
> {
> if (!src->is_ssa) {
> - if (src->reg.indirect)
> - return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
> + if (src->reg.indirect) {
> + return copy_prop_src(src->reg.indirect, parent_instr, parent_if,
> + prop_loops);
> + }
> return false;
> }
>
> @@ -125,6 +128,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
> if (phi->dest.ssa.num_components !=
> alu_instr->src[0].src.ssa->num_components)
> return false;
> +
> + /* Avoid propagating moves inside a loop into phis which makes
> + * unrolling difficult.
> + */
> + if (!prop_loops) {
> + if (phi->instr.block->cf_node.parent->type == nir_cf_node_loop)
> + return false;
> + }
> }
>
> if (parent_instr) {
> @@ -140,13 +151,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
> }
>
> static bool
> -copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
> +copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index,
> + bool prop_loops)
> {
> nir_alu_src *src = &parent_alu_instr->src[index];
> if (!src->src.is_ssa) {
> if (src->src.reg.indirect)
> return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
> - NULL);
> + NULL, prop_loops);
> return false;
> }
>
> @@ -195,6 +207,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
>
> typedef struct {
> nir_instr *parent_instr;
> + bool prop_loops;
> bool progress;
> } copy_prop_state;
>
> @@ -202,25 +215,26 @@ static bool
> copy_prop_src_cb(nir_src *src, void *_state)
> {
> copy_prop_state *state = (copy_prop_state *) _state;
> - while (copy_prop_src(src, state->parent_instr, NULL))
> + while (copy_prop_src(src, state->parent_instr, NULL, state->prop_loops))
> state->progress = true;
>
> return true;
> }
>
> static bool
> -copy_prop_instr(nir_instr *instr)
> +copy_prop_instr(nir_instr *instr, bool prop_loops)
> {
> if (instr->type == nir_instr_type_alu) {
> nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
> bool progress = false;
>
> for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
> - while (copy_prop_alu_src(alu_instr, i))
> + while (copy_prop_alu_src(alu_instr, i, prop_loops))
> progress = true;
>
> if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
> - while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
> + while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL,
> + prop_loops))
> progress = true;
>
> return progress;
> @@ -229,30 +243,31 @@ copy_prop_instr(nir_instr *instr)
> copy_prop_state state;
> state.parent_instr = instr;
> state.progress = false;
> + state.prop_loops = prop_loops;
> nir_foreach_src(instr, copy_prop_src_cb, &state);
>
> return state.progress;
> }
>
> static bool
> -copy_prop_if(nir_if *if_stmt)
> +copy_prop_if(nir_if *if_stmt, bool prop_loops)
> {
> - return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
> + return copy_prop_src(&if_stmt->condition, NULL, if_stmt, prop_loops);
> }
>
> static bool
> -nir_copy_prop_impl(nir_function_impl *impl)
> +nir_copy_prop_impl(nir_function_impl *impl, bool prop_loops)
> {
> bool progress = false;
>
> nir_foreach_block(block, impl) {
> nir_foreach_instr(instr, block) {
> - if (copy_prop_instr(instr))
> + if (copy_prop_instr(instr, prop_loops))
> progress = true;
> }
>
> nir_if *if_stmt = nir_block_get_following_if(block);
> - if (if_stmt && copy_prop_if(if_stmt))
> + if (if_stmt && copy_prop_if(if_stmt, prop_loops))
> progress = true;
> }
>
> @@ -265,12 +280,12 @@ nir_copy_prop_impl(nir_function_impl *impl)
> }
>
> bool
> -nir_copy_prop(nir_shader *shader)
> +nir_copy_prop(nir_shader *shader, bool prop_loops)
> {
> bool progress = false;
>
> nir_foreach_function(function, shader) {
> - if (function->impl && nir_copy_prop_impl(function->impl))
> + if (function->impl && nir_copy_prop_impl(function->impl, prop_loops))
> progress = true;
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
> index 29a3e3e..b8457d2 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -378,13 +378,13 @@ nir_optimize(nir_shader *nir, bool is_scalar)
> OPT_V(nir_lower_alu_to_scalar);
> }
>
> - OPT(nir_copy_prop);
> + OPT(nir_copy_prop, false);
>
> if (is_scalar) {
> OPT_V(nir_lower_phis_to_scalar);
> }
>
> - OPT(nir_copy_prop);
> + OPT(nir_copy_prop, false);
> OPT(nir_opt_dce);
> OPT(nir_opt_cse);
> OPT(nir_opt_peephole_select);
> @@ -499,7 +499,7 @@ brw_postprocess_nir(nir_shader *nir,
> OPT(nir_lower_locals_to_regs);
>
> OPT_V(nir_lower_to_source_mods);
> - OPT(nir_copy_prop);
> + OPT(nir_copy_prop, true);
> OPT(nir_opt_dce);
>
> if (unlikely(debug_enabled)) {
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list