[Mesa-dev] [PATCH 3/9] nir/opt_peephole_select: Don't peephole_select expensive math instructions

Thomas Helland thomashelland90 at gmail.com
Mon Oct 8 20:34:05 UTC 2018


Den tor. 30. aug. 2018 kl. 07:37 skrev Ian Romanick <idr at freedesktop.org>:
>
> From: Ian Romanick <ian.d.romanick at intel.com>
>
> On some GPUs, especially older Intel GPUs, some math instructions are
> very expensive.  On those architectures, don't reduce flow control to a
> csel if one of the branches contains one of these expensive math
> instructions.
>
> This prevents a bunch of cycle count regressions on pre-Gen6 platforms
> with a later patch (intel/compiler: More peephole select for pre-Gen6).
>
> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
> ---
>  src/amd/vulkan/radv_shader.c                 |  2 +-
>  src/broadcom/compiler/nir_to_vir.c           |  2 +-
>  src/compiler/nir/nir.h                       |  2 +-
>  src/compiler/nir/nir_opt_peephole_select.c   | 46 +++++++++++++++++++++++-----
>  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  2 +-
>  src/gallium/drivers/radeonsi/si_shader_nir.c |  2 +-
>  src/gallium/drivers/vc4/vc4_program.c        |  2 +-
>  src/intel/compiler/brw_nir.c                 |  4 +--
>  src/mesa/state_tracker/st_glsl_to_nir.cpp    |  2 +-
>  9 files changed, 47 insertions(+), 17 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 632512db09b..c8d502a9e3a 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -143,7 +143,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
>                  NIR_PASS(progress, shader, nir_opt_if);
>                  NIR_PASS(progress, shader, nir_opt_dead_cf);
>                  NIR_PASS(progress, shader, nir_opt_cse);
> -                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
> +                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
>                  NIR_PASS(progress, shader, nir_opt_algebraic);
>                  NIR_PASS(progress, shader, nir_opt_constant_folding);
>                  NIR_PASS(progress, shader, nir_opt_undef);
> diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
> index 0d23cea4d5b..ec0ff4b907a 100644
> --- a/src/broadcom/compiler/nir_to_vir.c
> +++ b/src/broadcom/compiler/nir_to_vir.c
> @@ -1210,7 +1210,7 @@ v3d_optimize_nir(struct nir_shader *s)
>                  NIR_PASS(progress, s, nir_opt_dce);
>                  NIR_PASS(progress, s, nir_opt_dead_cf);
>                  NIR_PASS(progress, s, nir_opt_cse);
> -                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
> +                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
>                  NIR_PASS(progress, s, nir_opt_algebraic);
>                  NIR_PASS(progress, s, nir_opt_constant_folding);
>                  NIR_PASS(progress, s, nir_opt_undef);
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 67fa46d5557..feb69be6b59 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3003,7 +3003,7 @@ bool nir_opt_move_comparisons(nir_shader *shader);
>  bool nir_opt_move_load_ubo(nir_shader *shader);
>
>  bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
> -                             bool indirect_load_ok);
> +                             bool indirect_load_ok, bool expensive_alu_ok);
>
>  bool nir_opt_remove_phis_impl(nir_function_impl *impl);
>  bool nir_opt_remove_phis(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
> index 6808d3eda6c..09b55f3739e 100644
> --- a/src/compiler/nir/nir_opt_peephole_select.c
> +++ b/src/compiler/nir/nir_opt_peephole_select.c
> @@ -59,7 +59,8 @@
>
>  static bool
>  block_check_for_allowed_instrs(nir_block *block, unsigned *count,
> -                               bool alu_ok, bool indirect_load_ok)
> +                               bool alu_ok, bool indirect_load_ok,
> +                               bool expensive_alu_ok)
>  {
>     nir_foreach_instr(instr, block) {
>        switch (instr->type) {
> @@ -117,6 +118,25 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
>           case nir_op_vec3:
>           case nir_op_vec4:
>              break;
> +
> +         case nir_op_fcos:
> +         case nir_op_fdiv:
> +         case nir_op_fexp2:
> +         case nir_op_flog2:
> +         case nir_op_fmod:
> +         case nir_op_fpow:
> +         case nir_op_frcp:
> +         case nir_op_frem:
> +         case nir_op_frsq:
> +         case nir_op_fsin:
> +         case nir_op_idiv:
> +         case nir_op_irem:
> +         case nir_op_udiv:
> +            if (!alu_ok || !expensive_alu_ok)
> +               return false;
> +
> +            break;
> +
>           default:
>              if (!alu_ok) {
>                 /* It must be a move-like operation. */
> @@ -160,7 +180,8 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
>
>  static bool
>  nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
> -                              unsigned limit, bool indirect_load_ok)
> +                              unsigned limit, bool indirect_load_ok,
> +                              bool expensive_alu_ok)
>  {
>     if (nir_cf_node_is_first(&block->cf_node))
>        return false;
> @@ -180,10 +201,17 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
>
>     /* ... and those blocks must only contain "allowed" instructions. */
>     unsigned count = 0;
> +#if 1
>     if (!block_check_for_allowed_instrs(then_block, &count, limit != 0,
> -                                       indirect_load_ok) ||
> +                                       indirect_load_ok, expensive_alu_ok) ||
>         !block_check_for_allowed_instrs(else_block, &count, limit != 0,
> -                                       indirect_load_ok))
> +                                       indirect_load_ok, expensive_alu_ok))
> +#else
> +   if (!block_check_for_allowed_instrs(then_block, &count, shader->info.stage,
> +                                       limit != 0, indirect_load_ok, expensive_alu_ok) ||
> +       !block_check_for_allowed_instrs(else_block, &count, shader->info.stage,
> +                                       limit != 0, indirect_load_ok, expensive_alu_ok))
> +#endif

Leftover testing stuff?

I like the idea of hiding expensive instructions in a branch.
However, I'm wondering if it might be an idea to let drivers
provide a callback with what instructions they want to allow?
If no other driver plan on doing something like this then I
guess this is OK. I would expect the "expensive" instructions
to be mostly the same subset on most architectures too.
Might want to get some input from others, but with the debug
stuff above removed this is.

Reviewed-by: Thomas Helland<thomashelland90 at gmail.com>

>        return false;
>
>     if (count > limit)
> @@ -250,14 +278,15 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
>
>  static bool
>  nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
> -                             bool indirect_load_ok)
> +                             bool indirect_load_ok, bool expensive_alu_ok)
>  {
>     nir_shader *shader = impl->function->shader;
>     bool progress = false;
>
>     nir_foreach_block_safe(block, impl) {
>        progress |= nir_opt_peephole_select_block(block, shader, limit,
> -                                                indirect_load_ok);
> +                                                indirect_load_ok,
> +                                                expensive_alu_ok);
>     }
>
>     if (progress)
> @@ -268,14 +297,15 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
>
>  bool
>  nir_opt_peephole_select(nir_shader *shader, unsigned limit,
> -                        bool indirect_load_ok)
> +                        bool indirect_load_ok, bool expensive_alu_ok)
>  {
>     bool progress = false;
>
>     nir_foreach_function(function, shader) {
>        if (function->impl)
>           progress |= nir_opt_peephole_select_impl(function->impl, limit,
> -                                                  indirect_load_ok);
> +                                                  indirect_load_ok,
> +                                                  expensive_alu_ok);
>     }
>
>     return progress;
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> index 5f66ef5d170..bb3bb73644a 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> @@ -107,7 +107,7 @@ ir3_optimize_loop(nir_shader *s)
>                         progress |= OPT(s, nir_opt_gcm, true);
>                 else if (gcm == 2)
>                         progress |= OPT(s, nir_opt_gcm, false);
> -               progress |= OPT(s, nir_opt_peephole_select, 16, true);
> +               progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
>                 progress |= OPT(s, nir_opt_intrinsics);
>                 progress |= OPT(s, nir_opt_algebraic);
>                 progress |= OPT(s, nir_opt_constant_folding);
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 9a7a8264283..cb1e208be8f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -813,7 +813,7 @@ si_lower_nir(struct si_shader_selector* sel)
>                 NIR_PASS(progress, sel->nir, nir_opt_if);
>                 NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
>                 NIR_PASS(progress, sel->nir, nir_opt_cse);
> -               NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true);
> +               NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);
>
>                 /* Needed for algebraic lowering */
>                 NIR_PASS(progress, sel->nir, nir_opt_algebraic);
> diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
> index 39f7db9148c..fa2a32922f4 100644
> --- a/src/gallium/drivers/vc4/vc4_program.c
> +++ b/src/gallium/drivers/vc4/vc4_program.c
> @@ -1580,7 +1580,7 @@ vc4_optimize_nir(struct nir_shader *s)
>                  NIR_PASS(progress, s, nir_opt_dce);
>                  NIR_PASS(progress, s, nir_opt_dead_cf);
>                  NIR_PASS(progress, s, nir_opt_cse);
> -                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
> +                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
>                  NIR_PASS(progress, s, nir_opt_algebraic);
>                  NIR_PASS(progress, s, nir_opt_constant_folding);
>                  NIR_PASS(progress, s, nir_opt_undef);
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 1d65107a93d..04d399fd0b4 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -587,9 +587,9 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
>        const bool is_vec4_tessellation = !is_scalar &&
>           (nir->info.stage == MESA_SHADER_TESS_CTRL ||
>            nir->info.stage == MESA_SHADER_TESS_EVAL);
> -      OPT(nir_opt_peephole_select, 0, is_vec4_tessellation);
> +      OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false);
>        if (compiler->devinfo->gen >= 6)
> -         OPT(nir_opt_peephole_select, 1, is_vec4_tessellation);
> +         OPT(nir_opt_peephole_select, 1, is_vec4_tessellation, true);
>
>        OPT(nir_opt_intrinsics);
>        OPT(nir_opt_algebraic);
> diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> index bfcef3a293f..098a1e98833 100644
> --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> @@ -344,7 +344,7 @@ st_nir_opts(nir_shader *nir, bool scalar)
>        NIR_PASS(progress, nir, nir_opt_if);
>        NIR_PASS(progress, nir, nir_opt_dead_cf);
>        NIR_PASS(progress, nir, nir_opt_cse);
> -      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true);
> +      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
>
>        NIR_PASS(progress, nir, nir_opt_algebraic);
>        NIR_PASS(progress, nir, nir_opt_constant_folding);
> --
> 2.14.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list