<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Mon, Aug 27, 2018 at 4:10 AM Timothy Arceri <<a href="mailto:tarceri@itsqueeze.com">tarceri@itsqueeze.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">In GLSL IR we cheat with switch statements and simply convert them<br>
into loops with a single iteration. This allowed us to make use of<br>
the existing jump instruction handling provided by the loop handing<br>
code, it also allows dead code to be cleaned up once we have<br>
wrapped the code in a loop.<br>
<br>
However using loops in this way created previously unrollable loops<br>
which limits further optimisations. Here we provide a way to unroll<br>
loops that end in a break and have multiple other exits.<br>
<br>
All shader-db changes are from the dolphin uber shaders. There is a<br>
small amount of HURT shaders but in general the improvements far<br>
exceed the HURT.<br>
<br>
shader-db results IVB:<br>
<br>
total instructions in shared programs: 10018187 -> 10016468 (-0.02%)<br>
instructions in affected programs: 104080 -> 102361 (-1.65%)<br>
helped: 36<br>
HURT: 15<br>
<br>
total cycles in shared programs: 220065064 -> 154529655 (-29.78%)<br>
cycles in affected programs: 126063017 -> 60527608 (-51.99%)<br>
helped: 51<br>
HURT: 0<br>
<br>
total loops in shared programs: 2515 -> 2308 (-8.23%)<br>
loops in affected programs: 903 -> 696 (-22.92%)<br>
helped: 51<br>
HURT: 0<br>
<br>
total spills in shared programs: 4370 -> 4124 (-5.63%)<br>
spills in affected programs: 1397 -> 1151 (-17.61%)<br>
helped: 9<br>
HURT: 12<br>
<br>
total fills in shared programs: 4581 -> 4419 (-3.54%)<br>
fills in affected programs: 2201 -> 2039 (-7.36%)<br>
helped: 9<br>
HURT: 15<br>
---<br>
 src/compiler/nir/nir_opt_loop_unroll.c | 113 +++++++++++++++++--------<br>
 1 file changed, 76 insertions(+), 37 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c<br>
index 9c33267cb72..fa60523aac7 100644<br>
--- a/src/compiler/nir/nir_opt_loop_unroll.c<br>
+++ b/src/compiler/nir/nir_opt_loop_unroll.c<br>
@@ -67,7 +67,6 @@ loop_prepare_for_unroll(nir_loop *loop)<br>
    /* Remove continue if its the last instruction in the loop */<br>
    nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));<br>
    if (last_instr && last_instr->type == nir_instr_type_jump) {<br>
-      assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);<br>
       nir_instr_remove(last_instr);<br>
    }<br>
 }<br>
@@ -474,54 +473,91 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,<br>
 static bool<br>
 wrapper_unroll(nir_loop *loop)<br>
 {<br>
-   bool progress = false;<br>
-<br>
-   nir_block *blk_after_loop =<br>
-      nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));<br>
-<br>
-   /* There may still be some single src phis following the loop that<br>
-    * have not yet been cleaned up by another pass. Tidy those up before<br>
-    * unrolling the loop.<br>
-    */<br>
-   nir_foreach_instr_safe(instr, blk_after_loop) {<br>
-      if (instr->type != nir_instr_type_phi)<br>
-         break;<br>
+   if (!list_empty(&loop->info->loop_terminator_list)) {<br>
<br>
-      nir_phi_instr *phi = nir_instr_as_phi(instr);<br>
-      assert(exec_list_length(&phi->srcs) == 1);<br>
+      /* Unrolling a loop with a large number of exits can result in a<br>
+       * large inrease in register pressure. For now we just skip<br>
+       * unrolling if we have more than 3 exits (not including the break<br>
+       * at the end of the loop).<br>
+       *<br>
+       * TODO: Most loops that fit this pattern are simply switch<br>
+       * statements that are converted to a loop to take advantage of<br>
+       * exiting jump instruction handling. In this case we could make<br>
+       * use of a binary seach pattern like we do in<br>
+       * nir_lower_indirect_derefs(), this should allow us to unroll the<br>
+       * loops in an optimal way and should also avoid some of the<br>
+       * register pressure that comes from simply nesting the<br>
+       * terminators one after the other.<br>
+       */<br>
+      if (list_length(&loop->info->loop_terminator_list) > 3)<br>
+         return false;<br>
+<br>
+      loop_prepare_for_unroll(loop);<br>
+<br>
+      nir_cursor cursor = nir_after_block(nir_loop_last_block(loop));<br></blockquote><div><br></div><div>Maybe call this loop_end; that's less generic than "cursor".<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+      list_for_each_entry(nir_loop_terminator, terminator,<br>
+                          &loop->info->loop_terminator_list,<br>
+                          loop_terminator_link) {<br></blockquote><div><br></div><div>I presume the terminator list is guaranteed to be sorted top to bottom?</div><div><br></div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+         /* Remove break from the terminator */<br>
+         nir_instr *break_instr =<br>
+            nir_block_last_instr(terminator->break_block);<br>
+         nir_instr_remove(break_instr);<br>
+<br>
+         /* Pluck out the loop body. */<br>
+         nir_cf_list loop_body;<br>
+         nir_cf_extract(&loop_body,<br>
+                        nir_after_cf_node(&terminator->nif->cf_node),<br>
+                        cursor);<br>
+<br>
+         /* Reinsert loop body into continue from block */<br>
+         nir_cf_reinsert(&loop_body,<br>
+                         nir_after_block(terminator->continue_from_block));<br>
+<br>
+         cursor = terminator->continue_from_then ?<br>
+           nir_after_block(nir_if_last_then_block(terminator->nif)) :<br>
+           nir_after_block(nir_if_last_else_block(terminator->nif));<br>
+      }<br>
+   } else {<br>
+      nir_block *blk_after_loop =<br>
+         nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));<br>
<br>
-      nir_phi_src *phi_src = exec_node_data(nir_phi_src,<br>
-                                            exec_list_get_head(&phi->srcs),<br>
-                                            node);<br>
+      /* There may still be some single src phis following the loop that<br>
+       * have not yet been cleaned up by another pass. Tidy those up<br>
+       * before unrolling the loop.<br>
+       */<br>
+      nir_foreach_instr_safe(instr, blk_after_loop) {<br>
+         if (instr->type != nir_instr_type_phi)<br>
+            break;<br>
<br>
-      nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);<br>
-      nir_instr_remove(instr);<br>
+         nir_phi_instr *phi = nir_instr_as_phi(instr);<br>
+         assert(exec_list_length(&phi->srcs) == 1);<br>
<br>
-      progress = true;<br>
-   }<br>
+         nir_phi_src *phi_src =<br>
+            exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node);<br>
<br>
-   nir_block *last_loop_blk = nir_loop_last_block(loop);<br>
-   if (nir_block_ends_in_break(last_loop_blk)) {<br>
+         nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);<br>
+         nir_instr_remove(instr);<br>
+      }<br>
<br>
       /* Remove break at end of the loop */<br>
+      nir_block *last_loop_blk = nir_loop_last_block(loop);<br>
       nir_instr *break_instr = nir_block_last_instr(last_loop_blk);<br>
       nir_instr_remove(break_instr);<br>
+   }<br>
<br>
-      /* Pluck out the loop body. */<br>
-      nir_cf_list loop_body;<br>
-      nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),<br>
-                     nir_after_block(nir_loop_last_block(loop)));<br>
-<br>
-      /* Reinsert loop body after the loop */<br>
-      nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));<br>
+   /* Pluck out the loop body. */<br>
+   nir_cf_list loop_body;<br>
+   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),<br>
+                  nir_after_block(nir_loop_last_block(loop)));<br>
<br>
-      /* The loop has been unrolled so remove it. */<br>
-      nir_cf_node_remove(&loop->cf_node);<br>
+   /* Reinsert loop body after the loop */<br>
+   nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));<br>
<br>
-      progress = true;<br>
-   }<br>
+   /* The loop has been unrolled so remove it. */<br>
+   nir_cf_node_remove(&loop->cf_node);<br>
<br>
-   return progress;<br>
+   return true;<br>
 }<br>
<br>
 static bool<br>
@@ -585,9 +621,12 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)<br>
        * statements in a loop like this.<br>
        */<br>
       if (loop->info->limiting_terminator == NULL &&<br>
-          list_empty(&loop->info->loop_terminator_list) &&<br>
           !loop->info->complex_loop) {<br>
<br>
+         nir_block *last_loop_blk = nir_loop_last_block(loop);<br>
+         if (!nir_block_ends_in_break(last_loop_blk))<br>
+            goto exit;<br>
+<br>
          progress = wrapper_unroll(loop);<br>
<br>
          goto exit;<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>