[Mesa-dev] [PATCH 5/6] nir: Add a pass for moving SPIR-V continue blocks to the ends of loops

Thu Dec 22 03:11:49 UTC 2016

On Mon, 2016-12-19 at 20:11 -0800, Jason Ekstrand wrote:
> When shaders come in from SPIR-V, we handle continue blocks by
> placing
> the contents of the continue inside of a "if (!first_iteration)".  We
> do
> this so that we can properly handle the fact that continues in SPIR-V
> jump to the continue block at the end of the loop rather than jumping
> directly to the top of the loop like they do in NIR.  In particular,
> the
> increment step of a simple for loop ends up in the continue
> block.  This
> pass looks for this case in loops that don't actually have any
> continues
> and moves the continue contents to the end of the loop instead.  We
> need
> this because loop unrolling doesn't work if the increment is inside
> of a
> condition.
> ---
>  src/compiler/Makefile.sources |   1 +
>  src/compiler/nir/nir.h        |   2 +
>  src/compiler/nir/nir_opt_if.c | 253
> ++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 256 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_if.c
> 
> diff --git a/src/compiler/Makefile.sources
> b/src/compiler/Makefile.sources
> index a0abede..6f701af 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -239,6 +239,7 @@ NIR_FILES = \
>  	nir/nir_opt_dead_cf.c \
>  	nir/nir_opt_gcm.c \
>  	nir/nir_opt_global_to_local.c \
> +	nir/nir_opt_if.c \
>  	nir/nir_opt_loop_unroll.c \
>  	nir/nir_opt_peephole_select.c \
>  	nir/nir_opt_remove_phis.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index a6c8956..7b582a6 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2557,6 +2557,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
>  
>  bool nir_opt_gcm(nir_shader *shader, bool value_number);
>  
> +bool nir_opt_if(nir_shader *shader);
> +
>  bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode
> indirect_mask);
>  
>  bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
> diff --git a/src/compiler/nir/nir_opt_if.c
> b/src/compiler/nir/nir_opt_if.c
> new file mode 100644
> index 0000000..5590684
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -0,0 +1,253 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> the
> + * Software is furnished to do so, subject to the following
> conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the next
> + * paragraph) shall be included in all copies or substantial
> portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_control_flow.h"
> +
> +/**
> + * This optimization detects if statements at the tops of loops
> where the
> + * condition is a phi node of two constants and moves half of the if
> to above
> + * the loop and the other half of the if to the end of the loop.  A
> simple for
> + * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V
> front-end,
> + * ends up looking something like this:
> + *
> + * vec1 32 ssa_0 = load_const (0x00000000)
> + * vec1 32 ssa_1 = load_const (0xffffffff)
> + * loop {
> + *    block block_1:
> + *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
> + *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
> + *    if ssa_2 {
> + *       block block_2:
> + *       vec1 32 ssa_4 = load_const (0x00000001)
> + *       vec1 32 ssa_5 = iadd ssa_2, ssa_4
> + *    } else {
> + *       block block_3:
> + *    }
> + *    block block_4:
> + *    vec1 32 ssa_6 = load_const (0x00000004)
> + *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
> + *    if ssa_7 {
> + *       block block_5:
> + *    } else {
> + *       block block_6:
> + *       break
> + *    }
> + *    block block_7:
> + * }
> + *
> + * This turns it into something like this:
> + *
> + * // Stuff from block 1
> + * // Stuff from block 3
> + * loop {
> + *    block block_1:
> + *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
> + *    vec1 32 ssa_6 = load_const (0x00000004)
> + *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
> + *    if ssa_7 {
> + *       block block_5:
> + *    } else {
> + *       block block_6:
> + *       break
> + *    }
> + *    block block_7:
> + *    // Stuff from block 1
> + *    // Stuff from block 2
> + *    vec1 32 ssa_4 = load_const (0x00000001)
> + *    vec1 32 ssa_5 = iadd ssa_2, ssa_4
> + * }
> + */
> +static bool
> +opt_peel_loop_initial_if(nir_loop *loop)
> +{
> +   nir_block *header_block = nir_loop_first_block(loop);
> +   nir_block *prev_block =
> +      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
> +
> +   /* It would be insane if this were not true */
> +   assert(_mesa_set_search(header_block->predecessors, prev_block));
> +   /* It must have exactly one "continue" */

After the trivial opt continue pass. Shouldn't this say something like:

     /* It must contain no continues */

> +   if (header_block->predecessors->entries != 2)
> +      return false;
> +
> +   nir_block *cont_block = NULL;
> +   struct set_entry *pred_entry;
> +   set_foreach(header_block->predecessors, pred_entry) {
> +      if (pred_entry->key != prev_block)
> +         cont_block = (void *)pred_entry->key;
> +   }
> +
> +   nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
> +   if (!if_node || if_node->type != nir_cf_node_if)
> +      return false;
> +
> +   nir_if *nif = nir_cf_node_as_if(if_node);
> +   assert(nif->condition.is_ssa);
> +
> +   nir_ssa_def *cond = nif->condition.ssa;
> +   if (cond->parent_instr->type != nir_instr_type_phi)
> +      return false;
> +
> +   nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
> +   if (cond->parent_instr->block != header_block)
> +      return false;
> +
> +   /* We already know we have exactly one continue */

This comment feels a little out of place. It could be reworded/expanded
but probably best to just remove it.

Or if you like restructure this block slightly:

   /* We already know we have exactly one continue */
   assert(exec_list_length(&cond_phi->srcs) == 2);

   uint32_t entry_val, cont_val;
   ...

Although again we have no continues so a slight rewording here would
still be needed.

> +   uint32_t entry_val, cont_val;
> +   assert(exec_list_length(&cond_phi->srcs) == 2);
> +   nir_foreach_phi_src(src, cond_phi) {
> +      assert(src->src.is_ssa);
> +      nir_const_value *const_src = nir_src_as_const_value(src->src);
> +      if (!const_src)
> +         return false;
> +
> +      if (src->pred == cont_block) {

I stumbled over this at first. Is it possible to just rename cont_block
-> continue_block having it next to const_src is confusing for those of
use who can't read properly :)

Looking over the rest of this function I don't think this will result
in any line wrapping.

> +         cont_val = const_src->u32[0];
> +      } else {
> +         assert(src->pred == prev_block);
> +         entry_val = const_src->u32[0];
> +      }
> +   }
> +
> +   /* If they both execute or both don't execute, this is a job for
> +    * nir_dead_cf, not this pass.
> +    */
> +   if ((entry_val && cont_val) || (!entry_val && !cont_val))
> +      return false;
> +
> +   struct exec_list *cont_list, *entry_list;

I guess to be consistent you might want to rename cont_list ->
continue_list also.

> +   if (cont_val) {
> +      cont_list = &nif->then_list;
> +      entry_list = &nif->else_list;
> +   } else {
> +      cont_list = &nif->else_list;
> +      entry_list = &nif->then_list;
> +   }
> +
> +   /* We want to be moving the contents of entry_list to above the
> loop so it
> +    * can't contain any break or continue instructions.
> +    */
> +   foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
> +      nir_foreach_block_in_cf_node(block, cf_node) {

I think in my loop analysis pass I added a loop over all instructions
and assert if I find a jump in case dead cf hasn't run or failed for
some reason. 

> +         nir_instr *last_instr = nir_block_last_instr(block);
> +         if (last_instr && last_instr->type == nir_instr_type_jump)
> +            return false;
> +      }
> +   }
> +
> +   /* Before we do anything, convert the loop to LCSSA.  We're about
> to
> +    * replace a bunch of SSA defs with registers and this will
> prevent any of
> +    * it from leaking outside the loop.
> +    */
> +   nir_convert_loop_to_lcssa(loop);
> +
> +   nir_block *after_if_block =
> +      nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
> +
> +   /* Get rid of phis in the header block since we will be
> duplicating it */
> +   nir_lower_phis_to_regs_block(header_block);
> +   /* Get rid of phis after the if since dominance will change */
> +   nir_lower_phis_to_regs_block(after_if_block);
> +
> +   /* Get rid of SSA defs in the pieces we're about to move around
> */
> +   nir_lower_ssa_defs_to_regs_block(header_block);
> +   nir_foreach_block_in_cf_node(block, &nif->cf_node)
> +      nir_lower_ssa_defs_to_regs_block(block);
> +
> +   nir_cf_list header, tmp;
> +   nir_cf_extract(&header, nir_before_block(header_block),
> +                           nir_after_block(header_block));
> +
> +   nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
> +   nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
> +   nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
> +                        nir_after_cf_list(entry_list));
> +   nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
> +
> +   nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);

Do we really need to clone it the second time? Is there any reason we
can't just use the original header here?

> +   nir_cf_reinsert(&tmp, nir_after_block_before_jump(cont_block));
> +   nir_cf_extract(&tmp, nir_before_cf_list(cont_list),
> +                        nir_after_cf_list(cont_list));
> +   nir_cf_reinsert(&tmp, nir_after_block_before_jump(cont_block));
> +
> +   nir_cf_delete(&header);
> +   nir_cf_node_remove(&nif->cf_node);
> +
> +   return true;
> +}
> +
> +static bool
> +opt_if_cf_list(struct exec_list *cf_list)
> +{
> +   bool progress = false;
> +   foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
> +      switch (cf_node->type) {
> +      case nir_cf_node_block:
> +         break;
> +
> +      case nir_cf_node_if: {
> +         nir_if *nif = nir_cf_node_as_if(cf_node);
> +         progress |= opt_if_cf_list(&nif->then_list);
> +         progress |= opt_if_cf_list(&nif->else_list);
> +         break;
> +      }
> +
> +      case nir_cf_node_loop: {
> +         nir_loop *loop = nir_cf_node_as_loop(cf_node);
> +         progress |= opt_if_cf_list(&loop->body);
> +         progress |= opt_peel_loop_initial_if(loop);
> +         break;
> +      }
> +
> +      case nir_cf_node_function:
> +         unreachable("Invalid cf type");
> +      }
> +   }
> +
> +   return progress;
> +}
> +
> +bool
> +nir_opt_if(nir_shader *shader)

nir_opt_if is a fairly generic name and this pass is pretty specific.
Is there a better name we could use for this pass?

> +{
> +   bool progress = false;
> +
> +   nir_foreach_function(function, shader) {
> +      if (function->impl == NULL)
> +         continue;
> +
> +      if (opt_if_cf_list(&function->impl->body)) {
> +         nir_metadata_preserve(function->impl, nir_metadata_none);
> +
> +         /* If that made progress, we're no longer really in SSA
> form.  We
> +          * need to convert registers back into SSA defs and clean
> up SSA defs
> +          * that don't dominate their uses.
> +          */
> +         nir_convert_to_ssa_impl(function->impl);
> +         progress = true;
> +      }
> +   }
> +
> +   return progress;
> +}