[Mesa-dev] [PATCH] nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible

Marek Olšák maraeo at gmail.com
Mon Apr 15 21:03:00 UTC 2019


ping

On Tue, Apr 9, 2019 at 10:03 PM Marek Olšák <maraeo at gmail.com> wrote:

> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/compiler/nir/nir.h                    |  8 +++++
>  src/compiler/nir/nir_opt_intrinsics.c     | 40 +++++++++++++++++++++--
>  src/gallium/drivers/radeonsi/si_get.c     |  1 +
>  src/mesa/state_tracker/st_glsl_to_nir.cpp |  1 +
>  4 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 09950bf3398..d3874705235 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2283,20 +2283,28 @@ typedef struct nir_shader_compiler_options {
>      *
>      * This depends on some possibly hw implementation details, which may
>      * not be true for all hw.  In particular that the FS is only executed
>      * for covered samples or for helper invocations.  So, do not blindly
>      * enable this option.
>      *
>      * Note: See also issue #22 in ARB_shader_image_load_store
>      */
>     bool lower_helper_invocation;
>
> +   /**
> +    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
> +    *
> +    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> +    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> +    */
> +   bool optimize_sample_mask_in;
> +
>     bool lower_cs_local_index_from_id;
>     bool lower_cs_local_id_from_index;
>
>     bool lower_device_index_to_zero;
>
>     /* Set if nir_lower_wpos_ytransform() should also invert
> gl_PointCoord. */
>     bool lower_wpos_pntc;
>
>     bool lower_hadd;
>     bool lower_add_sat;
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c
> b/src/compiler/nir/nir_opt_intrinsics.c
> index 7b054faa204..2f9e4e466c9 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -22,21 +22,22 @@
>   */
>
>  #include "nir.h"
>  #include "nir_builder.h"
>
>  /**
>   * \file nir_opt_intrinsics.c
>   */
>
>  static bool
> -opt_intrinsics_impl(nir_function_impl *impl)
> +opt_intrinsics_impl(nir_function_impl *impl,
> +                    const struct nir_shader_compiler_options *options)
>  {
>     nir_builder b;
>     nir_builder_init(&b, impl);
>     bool progress = false;
>
>     nir_foreach_block(block, impl) {
>        nir_foreach_instr_safe(instr, block) {
>           if (instr->type != nir_instr_type_intrinsic)
>              continue;
>
> @@ -48,20 +49,55 @@ opt_intrinsics_impl(nir_function_impl *impl)
>           case nir_intrinsic_vote_any:
>           case nir_intrinsic_vote_all:
>              if (nir_src_is_const(intrin->src[0]))
>                 replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
>              break;
>           case nir_intrinsic_vote_feq:
>           case nir_intrinsic_vote_ieq:
>              if (nir_src_is_const(intrin->src[0]))
>                 replacement = nir_imm_true(&b);
>              break;
> +         case nir_intrinsic_load_sample_mask_in:
> +            /* Transform:
> +             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
> +             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
> +             */
> +            if (!options->optimize_sample_mask_in)
> +               continue;
> +
> +            nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
> +               if (use_src->parent_instr->type == nir_instr_type_alu) {
> +                  nir_alu_instr *alu =
> nir_instr_as_alu(use_src->parent_instr);
> +
> +                  if (alu->op == nir_op_ieq ||
> +                      alu->op == nir_op_ine) {
> +                     /* Check for 0 in either operand. */
> +                     nir_const_value *const_val =
> +                         nir_src_as_const_value(alu->src[0].src);
> +                     if (!const_val)
> +                        const_val =
> nir_src_as_const_value(alu->src[1].src);
> +                     if (!const_val || const_val->i32[0] != 0)
> +                        continue;
> +
> +                     nir_ssa_def *new_expr =
> nir_load_helper_invocation(&b, 1);
> +
> +                     if (alu->op == nir_op_ine32)
> +                        new_expr = nir_inot(&b, new_expr);
> +
> +                     nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
> +                                              nir_src_for_ssa(new_expr));
> +                     nir_instr_remove(&alu->instr);
> +                     continue;
> +                  }
> +               }
> +            }
> +            continue;
>           default:
>              break;
>           }
>
>           if (!replacement)
>              continue;
>
>           nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>                                    nir_src_for_ssa(replacement));
>           nir_instr_remove(instr);
> @@ -74,19 +110,19 @@ opt_intrinsics_impl(nir_function_impl *impl)
>
>  bool
>  nir_opt_intrinsics(nir_shader *shader)
>  {
>     bool progress = false;
>
>     nir_foreach_function(function, shader) {
>        if (!function->impl)
>           continue;
>
> -      if (opt_intrinsics_impl(function->impl)) {
> +      if (opt_intrinsics_impl(function->impl, shader->options)) {
>           progress = true;
>           nir_metadata_preserve(function->impl, nir_metadata_block_index |
>                                                 nir_metadata_dominance);
>        }
>     }
>
>     return progress;
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_get.c
> b/src/gallium/drivers/radeonsi/si_get.c
> index 58b56b34d13..2142d5a33f2 100644
> --- a/src/gallium/drivers/radeonsi/si_get.c
> +++ b/src/gallium/drivers/radeonsi/si_get.c
> @@ -494,20 +494,21 @@ static const struct nir_shader_compiler_options
> nir_options = {
>         .lower_pack_snorm_2x16 = true,
>         .lower_pack_snorm_4x8 = true,
>         .lower_pack_unorm_2x16 = true,
>         .lower_pack_unorm_4x8 = true,
>         .lower_unpack_snorm_2x16 = true,
>         .lower_unpack_snorm_4x8 = true,
>         .lower_unpack_unorm_2x16 = true,
>         .lower_unpack_unorm_4x8 = true,
>         .lower_extract_byte = true,
>         .lower_extract_word = true,
> +       .optimize_sample_mask_in = true,
>         .max_unroll_iterations = 32,
>         .native_integers = true,
>  };
>
>  static const void *
>  si_get_compiler_options(struct pipe_screen *screen,
>                         enum pipe_shader_ir ir,
>                         enum pipe_shader_type shader)
>  {
>         assert(ir == PIPE_SHADER_IR_NIR);
> diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> index fb10869c9f9..8028ccf7110 100644
> --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> @@ -497,20 +497,21 @@ st_glsl_to_nir_post_opts(struct st_context *st,
> struct gl_program *prog,
>     /* This has to be done last.  Any operation the can cause
>      * prog->ParameterValues to get reallocated (e.g., anything that adds a
>      * program constant) has to happen before creating this linkage.
>      */
>     _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
>
>     st_set_prog_affected_state_flags(prog);
>
>     NIR_PASS_V(nir, st_nir_lower_builtin);
>     NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
> +   NIR_PASS_V(nir, nir_opt_intrinsics);
>
>     nir_variable_mode mask = nir_var_function_temp;
>     nir_remove_dead_variables(nir, mask);
>
>     if (st->ctx->_Shader->Flags & GLSL_DUMP) {
>        _mesa_log("\n");
>        _mesa_log("NIR IR for linked %s program %d:\n",
>               _mesa_shader_stage_to_string(prog->info.stage),
>               shader_program->Name);
>        nir_print_shader(nir, _mesa_get_log_file());
> --
> 2.17.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190415/a94c1f65/attachment-0001.html>


More information about the mesa-dev mailing list