[Mesa-dev] [PATCH] nir/radv: remove restrictions on opt_if_loop_last_continue()

Samuel Pitoiset samuel.pitoiset at gmail.com
Mon Apr 8 20:43:04 UTC 2019


Acked-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 4/8/19 12:13 PM, Timothy Arceri wrote:
> When I implemented opt_if_loop_last_continue() I had restricted
> this pass from moving other if-statements inside the branch opposite
> the continue. At the time it was causing a bunch of spilling in
> shader-db for i965.
>
> However Samuel Pitoiset noticed that making this pass more aggressive
> significantly improved the performance of Doom on RADV. Below are
> the statistics he gathered.
>
> 28717 shaders in 14931 tests
> Totals:
> SGPRS: 1267317 -> 1267549 (0.02 %)
> VGPRS: 896876 -> 895920 (-0.11 %)
> Spilled SGPRs: 24701 -> 26367 (6.74 %)
> Code Size: 48379452 -> 48507880 (0.27 %) bytes
> Max Waves: 241159 -> 241190 (0.01 %)
>
> Totals from affected shaders:
> SGPRS: 23584 -> 23816 (0.98 %)
> VGPRS: 25908 -> 24952 (-3.69 %)
> Spilled SGPRs: 503 -> 2169 (331.21 %)
> Code Size: 2471392 -> 2599820 (5.20 %) bytes
> Max Waves: 586 -> 617 (5.29 %)
>
> The codesize increases is related to Wolfenstein II it seems largely
> due to an increase in phis rather than the existing jumps.
>
> This gives +10% FPS with Doom on my Vega56.
>
> Rhys Perry also benchmarked Doom on his VEGA64:
>
> Before: 72.53 FPS
> After:  80.77 FPS
>
> v2: disable pass on non-AMD drivers
>
> Reviewed-by: Ian Romanick <ian.d.romanick at intel.com> (v1)
> ---
>   src/amd/vulkan/radv_shader.c                 |  2 +-
>   src/compiler/nir/nir.h                       |  2 +-
>   src/compiler/nir/nir_opt_if.c                | 87 ++++++++++++--------
>   src/freedreno/ir3/ir3_nir.c                  |  2 +-
>   src/gallium/auxiliary/nir/tgsi_to_nir.c      |  2 +-
>   src/gallium/drivers/freedreno/a2xx/ir2_nir.c |  2 +-
>   src/gallium/drivers/radeonsi/si_shader_nir.c |  2 +-
>   src/intel/compiler/brw_nir.c                 |  2 +-
>   src/mesa/state_tracker/st_glsl_to_nir.cpp    |  2 +-
>   9 files changed, 61 insertions(+), 42 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index d3d073d1db8..7cde5e728e4 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -158,7 +158,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
>   			NIR_PASS(progress, shader, nir_opt_remove_phis);
>                           NIR_PASS(progress, shader, nir_opt_dce);
>                   }
> -                NIR_PASS(progress, shader, nir_opt_if);
> +                NIR_PASS(progress, shader, nir_opt_if, true);
>                   NIR_PASS(progress, shader, nir_opt_dead_cf);
>                   NIR_PASS(progress, shader, nir_opt_cse);
>                   NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index bc72d8f83f5..c1ecf5ad561 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3434,7 +3434,7 @@ bool nir_opt_gcm(nir_shader *shader, bool value_number);
>   
>   bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
>   
> -bool nir_opt_if(nir_shader *shader);
> +bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
>   
>   bool nir_opt_intrinsics(nir_shader *shader);
>   
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index 4d3183ed151..065fa83c51c 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -824,47 +824,60 @@ nir_block_ends_in_continue(nir_block *block)
>    * The continue should then be removed by nir_opt_trivial_continues() and the
>    * loop can potentially be unrolled.
>    *
> - * Note: do_work_2() is only ever blocks and nested loops. We could also nest
> - * other if-statments in the branch which would allow further continues to
> - * be removed. However in practice this can result in increased register
> - * pressure.
> + * Note: Unless the function param aggressive_last_continue==true do_work_2()
> + * is only ever blocks and nested loops. We avoid nesting other if-statments
> + * in the branch as this can result in increased register pressure, and in
> + * the i965 driver it causes a large amount of spilling in shader-db.
> + * For RADV however nesting these if-statements allows further continues to be
> + * remove and provides a significant FPS boost in Doom, which is why we have
> + * opted for this special bool to enable for more aggresive optimisation.
> + * TODO: The GCM pass solves most of the spilling regressions in i965, if it
> + * is ever enabled we should consider removing the aggressive_last_continue
> + * param.
>    */
>   static bool
> -opt_if_loop_last_continue(nir_loop *loop)
> +opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
>   {
> -   /* Get the last if-stament in the loop */
> +   nir_if *nif;
> +   bool then_ends_in_continue;
> +   bool else_ends_in_continue;
> +
> +   /* Scan the control flow of the loop from the last to the first node
> +    * looking for an if-statement we can optimise.
> +    */
>      nir_block *last_block = nir_loop_last_block(loop);
>      nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
>      while (if_node) {
> -      if (if_node->type == nir_cf_node_if)
> -         break;
> +      if (if_node->type == nir_cf_node_if) {
> +         nif = nir_cf_node_as_if(if_node);
> +         nir_block *then_block = nir_if_last_then_block(nif);
> +         nir_block *else_block = nir_if_last_else_block(nif);
>   
> -      if_node = nir_cf_node_prev(if_node);
> -   }
> +         then_ends_in_continue = nir_block_ends_in_continue(then_block);
> +         else_ends_in_continue = nir_block_ends_in_continue(else_block);
>   
> -   if (!if_node || if_node->type != nir_cf_node_if)
> -      return false;
> -
> -   nir_if *nif = nir_cf_node_as_if(if_node);
> -   nir_block *then_block = nir_if_last_then_block(nif);
> -   nir_block *else_block = nir_if_last_else_block(nif);
> +         /* If both branches end in a jump do nothing, this should be handled
> +          * by nir_opt_dead_cf().
> +          */
> +         if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
> +             (else_ends_in_continue || nir_block_ends_in_break(else_block)))
> +            return false;
>   
> -   bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
> -   bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
> +         /* If continue found stop scanning and attempt optimisation, or
> +          */
> +         if (then_ends_in_continue || else_ends_in_continue ||
> +             !aggressive_last_continue)
> +            break;
> +      }
>   
> -   /* If both branches end in a continue do nothing, this should be handled
> -    * by nir_opt_dead_cf().
> -    */
> -   if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
> -       (else_ends_in_continue || nir_block_ends_in_break(else_block)))
> -      return false;
> +      if_node = nir_cf_node_prev(if_node);
> +   }
>   
> +   /* If we didn't find an if to optimise return */
>      if (!then_ends_in_continue && !else_ends_in_continue)
>         return false;
>   
> -   /* if the block after the if/else is empty we bail, otherwise we might end
> -    * up looping forever
> -    */
> +   /* If there is nothing after the if-statement we bail */
>      if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
>          exec_list_is_empty(&last_block->instr_list))
>         return false;
> @@ -1327,7 +1340,8 @@ opt_if_merge(nir_if *nif)
>   }
>   
>   static bool
> -opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
> +opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
> +               bool aggressive_last_continue)
>   {
>      bool progress = false;
>      foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
> @@ -1337,8 +1351,10 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
>   
>         case nir_cf_node_if: {
>            nir_if *nif = nir_cf_node_as_if(cf_node);
> -         progress |= opt_if_cf_list(b, &nif->then_list);
> -         progress |= opt_if_cf_list(b, &nif->else_list);
> +         progress |= opt_if_cf_list(b, &nif->then_list,
> +                                    aggressive_last_continue);
> +         progress |= opt_if_cf_list(b, &nif->else_list,
> +                                    aggressive_last_continue);
>            progress |= opt_if_loop_terminator(nif);
>            progress |= opt_if_merge(nif);
>            progress |= opt_if_simplification(b, nif);
> @@ -1347,10 +1363,12 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
>   
>         case nir_cf_node_loop: {
>            nir_loop *loop = nir_cf_node_as_loop(cf_node);
> -         progress |= opt_if_cf_list(b, &loop->body);
> +         progress |= opt_if_cf_list(b, &loop->body,
> +                                    aggressive_last_continue);
>            progress |= opt_simplify_bcsel_of_phi(b, loop);
>            progress |= opt_peel_loop_initial_if(loop);
> -         progress |= opt_if_loop_last_continue(loop);
> +         progress |= opt_if_loop_last_continue(loop,
> +                                               aggressive_last_continue);
>            break;
>         }
>   
> @@ -1399,7 +1417,7 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
>   }
>   
>   bool
> -nir_opt_if(nir_shader *shader)
> +nir_opt_if(nir_shader *shader, bool aggressive_last_continue)
>   {
>      bool progress = false;
>   
> @@ -1416,7 +1434,8 @@ nir_opt_if(nir_shader *shader)
>         nir_metadata_preserve(function->impl, nir_metadata_block_index |
>                               nir_metadata_dominance);
>   
> -      if (opt_if_cf_list(&b, &function->impl->body)) {
> +      if (opt_if_cf_list(&b, &function->impl->body,
> +                         aggressive_last_continue)) {
>            nir_metadata_preserve(function->impl, nir_metadata_none);
>   
>            /* If that made progress, we're no longer really in SSA form.  We
> diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
> index 8b66615a6e0..7a8b9753643 100644
> --- a/src/freedreno/ir3/ir3_nir.c
> +++ b/src/freedreno/ir3/ir3_nir.c
> @@ -147,7 +147,7 @@ ir3_optimize_loop(nir_shader *s)
>   			OPT(s, nir_copy_prop);
>   			OPT(s, nir_opt_dce);
>   		}
> -		progress |= OPT(s, nir_opt_if);
> +		progress |= OPT(s, nir_opt_if, false);
>   		progress |= OPT(s, nir_opt_remove_phis);
>   		progress |= OPT(s, nir_opt_undef);
>   
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index 09e40977fd8..e3cc5560033 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -2066,7 +2066,7 @@ ttn_optimize_nir(nir_shader *nir, bool scalar)
>            NIR_PASS(progress, nir, nir_opt_dce);
>         }
>   
> -      NIR_PASS(progress, nir, nir_opt_if);
> +      NIR_PASS(progress, nir, nir_opt_if, false);
>         NIR_PASS(progress, nir, nir_opt_dead_cf);
>         NIR_PASS(progress, nir, nir_opt_cse);
>         NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
> diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
> index 6aaff393167..3d4145fccdc 100644
> --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
> +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
> @@ -94,7 +94,7 @@ ir2_optimize_loop(nir_shader *s)
>   			OPT(s, nir_opt_dce);
>   		}
>   		progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
> -		progress |= OPT(s, nir_opt_if);
> +		progress |= OPT(s, nir_opt_if, false);
>   		progress |= OPT(s, nir_opt_remove_phis);
>   		progress |= OPT(s, nir_opt_undef);
>   
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 5ac18e2ebc8..938b0efcb76 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -880,7 +880,7 @@ si_lower_nir(struct si_shader_selector* sel)
>   			NIR_PASS(progress, sel->nir, nir_copy_prop);
>   			NIR_PASS(progress, sel->nir, nir_opt_dce);
>   		}
> -		NIR_PASS(progress, sel->nir, nir_opt_if);
> +		NIR_PASS(progress, sel->nir, nir_opt_if, true);
>   		NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
>   		NIR_PASS(progress, sel->nir, nir_opt_cse);
>   		NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 238db902b47..2e63efdc427 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -609,7 +609,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
>            OPT(nir_copy_prop);
>            OPT(nir_opt_dce);
>         }
> -      OPT(nir_opt_if);
> +      OPT(nir_opt_if, false);
>         if (nir->options->max_unroll_iterations != 0) {
>            OPT(nir_opt_loop_unroll, indirect_mask);
>         }
> diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> index 9a4e030413b..fb10869c9f9 100644
> --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> @@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir, bool scalar)
>            NIR_PASS(progress, nir, nir_copy_prop);
>            NIR_PASS(progress, nir, nir_opt_dce);
>         }
> -      NIR_PASS(progress, nir, nir_opt_if);
> +      NIR_PASS(progress, nir, nir_opt_if, false);
>         NIR_PASS(progress, nir, nir_opt_dead_cf);
>         NIR_PASS(progress, nir, nir_opt_cse);
>         NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);


More information about the mesa-dev mailing list