[Mesa-dev] [PATCH v4 7/7] nir: add loop unroll support for complex wrapper loops

Timothy Arceri tarceri at itsqueeze.com
Tue Aug 28 22:25:09 UTC 2018


On 29/08/18 03:59, Jason Ekstrand wrote:
> On Mon, Aug 27, 2018 at 4:10 AM Timothy Arceri <tarceri at itsqueeze.com 
> <mailto:tarceri at itsqueeze.com>> wrote:
> 
>     In GLSL IR we cheat with switch statements and simply convert them
>     into loops with a single iteration. This allowed us to make use of
>     the existing jump instruction handling provided by the loop handing
>     code, it also allows dead code to be cleaned up once we have
>     wrapped the code in a loop.
> 
>     However using loops in this way created previously unrollable loops
>     which limits further optimisations. Here we provide a way to unroll
>     loops that end in a break and have multiple other exits.
> 
>     All shader-db changes are from the dolphin uber shaders. There is a
>     small amount of HURT shaders but in general the improvements far
>     exceed the HURT.
> 
>     shader-db results IVB:
> 
>     total instructions in shared programs: 10018187 -> 10016468 (-0.02%)
>     instructions in affected programs: 104080 -> 102361 (-1.65%)
>     helped: 36
>     HURT: 15
> 
>     total cycles in shared programs: 220065064 -> 154529655 (-29.78%)
>     cycles in affected programs: 126063017 -> 60527608 (-51.99%)
>     helped: 51
>     HURT: 0
> 
>     total loops in shared programs: 2515 -> 2308 (-8.23%)
>     loops in affected programs: 903 -> 696 (-22.92%)
>     helped: 51
>     HURT: 0
> 
>     total spills in shared programs: 4370 -> 4124 (-5.63%)
>     spills in affected programs: 1397 -> 1151 (-17.61%)
>     helped: 9
>     HURT: 12
> 
>     total fills in shared programs: 4581 -> 4419 (-3.54%)
>     fills in affected programs: 2201 -> 2039 (-7.36%)
>     helped: 9
>     HURT: 15
>     ---
>       src/compiler/nir/nir_opt_loop_unroll.c | 113 +++++++++++++++++--------
>       1 file changed, 76 insertions(+), 37 deletions(-)
> 
>     diff --git a/src/compiler/nir/nir_opt_loop_unroll.c
>     b/src/compiler/nir/nir_opt_loop_unroll.c
>     index 9c33267cb72..fa60523aac7 100644
>     --- a/src/compiler/nir/nir_opt_loop_unroll.c
>     +++ b/src/compiler/nir/nir_opt_loop_unroll.c
>     @@ -67,7 +67,6 @@ loop_prepare_for_unroll(nir_loop *loop)
>          /* Remove continue if its the last instruction in the loop */
>          nir_instr *last_instr =
>     nir_block_last_instr(nir_loop_last_block(loop));
>          if (last_instr && last_instr->type == nir_instr_type_jump) {
>     -      assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
>             nir_instr_remove(last_instr);
>          }
>       }
>     @@ -474,54 +473,91 @@ complex_unroll(nir_loop *loop,
>     nir_loop_terminator *unlimit_term,
>       static bool
>       wrapper_unroll(nir_loop *loop)
>       {
>     -   bool progress = false;
>     -
>     -   nir_block *blk_after_loop =
>     -      nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));
>     -
>     -   /* There may still be some single src phis following the loop that
>     -    * have not yet been cleaned up by another pass. Tidy those up
>     before
>     -    * unrolling the loop.
>     -    */
>     -   nir_foreach_instr_safe(instr, blk_after_loop) {
>     -      if (instr->type != nir_instr_type_phi)
>     -         break;
>     +   if (!list_empty(&loop->info->loop_terminator_list)) {
> 
>     -      nir_phi_instr *phi = nir_instr_as_phi(instr);
>     -      assert(exec_list_length(&phi->srcs) == 1);
>     +      /* Unrolling a loop with a large number of exits can result in a
>     +       * large inrease in register pressure. For now we just skip
>     +       * unrolling if we have more than 3 exits (not including the
>     break
>     +       * at the end of the loop).
>     +       *
>     +       * TODO: Most loops that fit this pattern are simply switch
>     +       * statements that are converted to a loop to take advantage of
>     +       * exiting jump instruction handling. In this case we could make
>     +       * use of a binary seach pattern like we do in
>     +       * nir_lower_indirect_derefs(), this should allow us to
>     unroll the
>     +       * loops in an optimal way and should also avoid some of the
>     +       * register pressure that comes from simply nesting the
>     +       * terminators one after the other.
>     +       */
>     +      if (list_length(&loop->info->loop_terminator_list) > 3)
>     +         return false;
>     +
>     +      loop_prepare_for_unroll(loop);
>     +
>     +      nir_cursor cursor = nir_after_block(nir_loop_last_block(loop));
> 
> 
> Maybe call this loop_end; that's less generic than "cursor".

ok

> 
>     +      list_for_each_entry(nir_loop_terminator, terminator,
>     +                          &loop->info->loop_terminator_list,
>     +                          loop_terminator_link) {
> 
> 
> I presume the terminator list is guaranteed to be sorted top to bottom?

yes

> 
> Reviewed-by: Jason Ekstrand <jason at jlekstrand.net 
> <mailto:jason at jlekstrand.net>>

Thanks!

> 
>     +
>     +         /* Remove break from the terminator */
>     +         nir_instr *break_instr =
>     +            nir_block_last_instr(terminator->break_block);
>     +         nir_instr_remove(break_instr);
>     +
>     +         /* Pluck out the loop body. */
>     +         nir_cf_list loop_body;
>     +         nir_cf_extract(&loop_body,
>     +                        nir_after_cf_node(&terminator->nif->cf_node),
>     +                        cursor);
>     +
>     +         /* Reinsert loop body into continue from block */
>     +         nir_cf_reinsert(&loop_body,
>     +                       
>       nir_after_block(terminator->continue_from_block));
>     +
>     +         cursor = terminator->continue_from_then ?
>     +           nir_after_block(nir_if_last_then_block(terminator->nif)) :
>     +           nir_after_block(nir_if_last_else_block(terminator->nif));
>     +      }
>     +   } else {
>     +      nir_block *blk_after_loop =
>     +         nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));
> 
>     -      nir_phi_src *phi_src = exec_node_data(nir_phi_src,
>     -                                           
>     exec_list_get_head(&phi->srcs),
>     -                                            node);
>     +      /* There may still be some single src phis following the loop
>     that
>     +       * have not yet been cleaned up by another pass. Tidy those up
>     +       * before unrolling the loop.
>     +       */
>     +      nir_foreach_instr_safe(instr, blk_after_loop) {
>     +         if (instr->type != nir_instr_type_phi)
>     +            break;
> 
>     -      nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);
>     -      nir_instr_remove(instr);
>     +         nir_phi_instr *phi = nir_instr_as_phi(instr);
>     +         assert(exec_list_length(&phi->srcs) == 1);
> 
>     -      progress = true;
>     -   }
>     +         nir_phi_src *phi_src =
>     +            exec_node_data(nir_phi_src,
>     exec_list_get_head(&phi->srcs), node);
> 
>     -   nir_block *last_loop_blk = nir_loop_last_block(loop);
>     -   if (nir_block_ends_in_break(last_loop_blk)) {
>     +         nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);
>     +         nir_instr_remove(instr);
>     +      }
> 
>             /* Remove break at end of the loop */
>     +      nir_block *last_loop_blk = nir_loop_last_block(loop);
>             nir_instr *break_instr = nir_block_last_instr(last_loop_blk);
>             nir_instr_remove(break_instr);
>     +   }
> 
>     -      /* Pluck out the loop body. */
>     -      nir_cf_list loop_body;
>     -      nir_cf_extract(&loop_body,
>     nir_before_block(nir_loop_first_block(loop)),
>     -                     nir_after_block(nir_loop_last_block(loop)));
>     -
>     -      /* Reinsert loop body after the loop */
>     -      nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
>     +   /* Pluck out the loop body. */
>     +   nir_cf_list loop_body;
>     +   nir_cf_extract(&loop_body,
>     nir_before_block(nir_loop_first_block(loop)),
>     +                  nir_after_block(nir_loop_last_block(loop)));
> 
>     -      /* The loop has been unrolled so remove it. */
>     -      nir_cf_node_remove(&loop->cf_node);
>     +   /* Reinsert loop body after the loop */
>     +   nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
> 
>     -      progress = true;
>     -   }
>     +   /* The loop has been unrolled so remove it. */
>     +   nir_cf_node_remove(&loop->cf_node);
> 
>     -   return progress;
>     +   return true;
>       }
> 
>       static bool
>     @@ -585,9 +621,12 @@ process_loops(nir_shader *sh, nir_cf_node
>     *cf_node, bool *has_nested_loop_out)
>              * statements in a loop like this.
>              */
>             if (loop->info->limiting_terminator == NULL &&
>     -          list_empty(&loop->info->loop_terminator_list) &&
>                 !loop->info->complex_loop) {
> 
>     +         nir_block *last_loop_blk = nir_loop_last_block(loop);
>     +         if (!nir_block_ends_in_break(last_loop_blk))
>     +            goto exit;
>     +
>                progress = wrapper_unroll(loop);
> 
>                goto exit;
>     -- 
>     2.17.1
> 
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list