[Mesa-dev] [PATCH v2 43/52] nir: Add a new subgroups lowering pass

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Oct 13 10:52:47 UTC 2017


On 13/10/17 06:48, Jason Ekstrand wrote:
> This commit pulls nir_lower_read_invocations_to_scalar along with most
> of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
> into a new nir_lower_subgroups pass.  There are various other bits of
> subgroup lowering that we're going to want to do so it makes a bit more
> sense to keep it all together in one pass.  We also move it in i965 to
> happen after nir_lower_system_values to ensure that because we want to
> handle the subgroup mask system value intrinsics here.
> ---
>   src/compiler/Makefile.sources                      |   2 +-
>   src/compiler/nir/nir.h                             |  12 +-
>   .../nir/nir_lower_read_invocation_to_scalar.c      | 112 --------------
>   src/compiler/nir/nir_lower_subgroups.c             | 161 +++++++++++++++++++++
>   src/compiler/nir/nir_opt_intrinsics.c              |  51 +------
>   src/intel/compiler/brw_compiler.c                  |   3 -
>   src/intel/compiler/brw_nir.c                       |   8 +-
>   7 files changed, 184 insertions(+), 165 deletions(-)
>   delete mode 100644 src/compiler/nir/nir_lower_read_invocation_to_scalar.c
>   create mode 100644 src/compiler/nir/nir_lower_subgroups.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 2724a41..912c003 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -232,11 +232,11 @@ NIR_FILES = \
>   	nir/nir_lower_passthrough_edgeflags.c \
>   	nir/nir_lower_patch_vertices.c \
>   	nir/nir_lower_phis_to_scalar.c \
> -	nir/nir_lower_read_invocation_to_scalar.c \
>   	nir/nir_lower_regs_to_ssa.c \
>   	nir/nir_lower_returns.c \
>   	nir/nir_lower_samplers.c \
>   	nir/nir_lower_samplers_as_deref.c \
> +	nir/nir_lower_subgroups.c \
>   	nir/nir_lower_system_values.c \
>   	nir/nir_lower_tex.c \
>   	nir/nir_lower_to_source_mods.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 5af1503..1154c42 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1831,9 +1831,6 @@ typedef struct nir_shader_compiler_options {
>      bool lower_extract_byte;
>      bool lower_extract_word;
>   
> -   bool lower_vote_trivial;
> -   bool lower_subgroup_masks;
> -
>      /**
>       * Does the driver support real 32-bit integers?  (Otherwise, integers
>       * are simulated by floats.)
> @@ -2460,6 +2457,15 @@ bool nir_lower_samplers(nir_shader *shader,
>   bool nir_lower_samplers_as_deref(nir_shader *shader,
>                                    const struct gl_shader_program *shader_program);
>   
> +typedef struct nir_lower_subgroups_options {
> +   bool lower_to_scalar:1;
> +   bool lower_vote_trivial:1;
> +   bool lower_subgroup_masks:1;
> +} nir_lower_subgroups_options;
> +
> +bool nir_lower_subgroups(nir_shader *shader,
> +                         const nir_lower_subgroups_options *options);
> +
>   bool nir_lower_system_values(nir_shader *shader);
>   
>   typedef struct nir_lower_tex_options {
> diff --git a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
> deleted file mode 100644
> index 69e7c0a..0000000
> --- a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
> +++ /dev/null
> @@ -1,112 +0,0 @@
> -/*
> - * Copyright © 2017 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> - * IN THE SOFTWARE.
> - */
> -
> -#include "nir.h"
> -#include "nir_builder.h"
> -
> -/** @file nir_lower_read_invocation_to_scalar.c
> - *
> - * Replaces nir_intrinsic_read_invocation/nir_intrinsic_read_first_invocation
> - * operations with num_components != 1 with individual per-channel operations.
> - */
> -
> -static void
> -lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
> -{
> -   b->cursor = nir_before_instr(&intrin->instr);
> -
> -   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], intrin->num_components);
> -   nir_ssa_def *reads[4];
> -
> -   for (unsigned i = 0; i < intrin->num_components; i++) {
> -      nir_intrinsic_instr *chan_intrin =
> -         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
> -      nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
> -                        1, intrin->dest.ssa.bit_size, NULL);
> -      chan_intrin->num_components = 1;
> -
> -      /* value */
> -      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
> -      /* invocation */
> -      if (intrin->intrinsic == nir_intrinsic_read_invocation)
> -         nir_src_copy(&chan_intrin->src[1], &intrin->src[1], chan_intrin);
> -
> -      nir_builder_instr_insert(b, &chan_intrin->instr);
> -
> -      reads[i] = &chan_intrin->dest.ssa;
> -   }
> -
> -   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> -                            nir_src_for_ssa(nir_vec(b, reads,
> -                                                    intrin->num_components)));
> -   nir_instr_remove(&intrin->instr);
> -}
> -
> -static bool
> -nir_lower_read_invocation_to_scalar_impl(nir_function_impl *impl)
> -{
> -   bool progress = false;
> -   nir_builder b;
> -   nir_builder_init(&b, impl);
> -
> -   nir_foreach_block(block, impl) {
> -      nir_foreach_instr_safe(instr, block) {
> -         if (instr->type != nir_instr_type_intrinsic)
> -            continue;
> -
> -         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> -
> -         if (intrin->num_components == 1)
> -            continue;
> -
> -         switch (intrin->intrinsic) {
> -         case nir_intrinsic_read_invocation:
> -         case nir_intrinsic_read_first_invocation:
> -            lower_read_invocation_to_scalar(&b, intrin);
> -            progress = true;
> -            break;
> -         default:
> -            break;
> -         }
> -      }
> -   }
> -
> -   if (progress) {
> -      nir_metadata_preserve(impl, nir_metadata_block_index |
> -                                  nir_metadata_dominance);
> -   }
> -   return progress;
> -}
> -
> -bool
> -nir_lower_read_invocation_to_scalar(nir_shader *shader)
> -{
> -   bool progress = false;
> -
> -   nir_foreach_function(function, shader) {
> -      if (function->impl)
> -         progress |= nir_lower_read_invocation_to_scalar_impl(function->impl);
> -   }
> -
> -   return progress;
> -}
> diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c
> new file mode 100644
> index 0000000..02738c4
> --- /dev/null
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -0,0 +1,161 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +/**
> + * \file nir_opt_intrinsics.c
> + */
> +
> +static nir_ssa_def *
> +lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
> +{
> +   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0],
> +                                           intrin->num_components);
> +   nir_ssa_def *reads[4];
> +
> +   for (unsigned i = 0; i < intrin->num_components; i++) {
> +      nir_intrinsic_instr *chan_intrin =
> +         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
> +      nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
> +                        1, intrin->dest.ssa.bit_size, NULL);
> +      chan_intrin->num_components = 1;
> +
> +      /* value */
> +      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
> +      /* invocation */
> +      if (intrin->intrinsic == nir_intrinsic_read_invocation)
> +         nir_src_copy(&chan_intrin->src[1], &intrin->src[1], chan_intrin);
> +
> +      nir_builder_instr_insert(b, &chan_intrin->instr);
> +
> +      reads[i] = &chan_intrin->dest.ssa;
> +   }
> +
> +   return nir_vec(b, reads, intrin->num_components);
> +}
> +
> +static nir_ssa_def *
> +lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
> +                       const nir_lower_subgroups_options *options)
> +{
> +   switch (intrin->intrinsic) {
> +   case nir_intrinsic_vote_any:
> +   case nir_intrinsic_vote_all:
> +      if (options->lower_vote_trivial)
> +         return nir_ssa_for_src(b, intrin->src[0], 1);
> +      break;
> +
> +   case nir_intrinsic_vote_eq:
> +      if (options->lower_vote_trivial)
> +         return nir_imm_int(b, NIR_TRUE);
> +      break;
> +
> +   case nir_intrinsic_read_invocation:
> +   case nir_intrinsic_read_first_invocation:
> +      if (options->lower_to_scalar)
> +         return lower_read_invocation_to_scalar(b, intrin);
> +      break;
> +
> +   case nir_intrinsic_load_subgroup_eq_mask:
> +   case nir_intrinsic_load_subgroup_ge_mask:
> +   case nir_intrinsic_load_subgroup_gt_mask:
> +   case nir_intrinsic_load_subgroup_le_mask:
> +   case nir_intrinsic_load_subgroup_lt_mask: {
> +      if (!options->lower_subgroup_masks)
> +         return NULL;
> +
> +      nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +
> +      switch (intrin->intrinsic) {
> +      case nir_intrinsic_load_subgroup_eq_mask:
> +         return nir_ishl(b, nir_imm_int64(b, 1ull), count);
> +      case nir_intrinsic_load_subgroup_ge_mask:
> +         return nir_ishl(b, nir_imm_int64(b, ~0ull), count);
> +      case nir_intrinsic_load_subgroup_gt_mask:
> +         return nir_ishl(b, nir_imm_int64(b, ~1ull), count);
> +      case nir_intrinsic_load_subgroup_le_mask:
> +         return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count));
> +      case nir_intrinsic_load_subgroup_lt_mask:
> +         return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count));
> +      default:
> +         unreachable("you seriously can't tell this is unreachable?");
> +      }
> +      break;
> +   }
> +   default:
> +      break;
> +   }
> +
> +   return NULL;
> +}
> +
> +static bool
> +lower_subgroups_impl(nir_function_impl *impl,
> +                     const nir_lower_subgroups_options *options)
> +{
> +   nir_builder b;
> +   nir_builder_init(&b, impl);
> +   bool progress = false;
> +
> +   nir_foreach_block(block, impl) {
> +      nir_foreach_instr_safe(instr, block) {
> +         if (instr->type != nir_instr_type_intrinsic)
> +            continue;
> +
> +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +         b.cursor = nir_before_instr(instr);
> +
> +         nir_ssa_def *lower = lower_subgroups_intrin(&b, intrin, options);
> +         if (!lower)
> +            continue;
> +
> +         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(lower));
> +         nir_instr_remove(instr);
> +         progress = true;
> +      }
> +   }
> +
> +   return progress;
> +}
> +
> +bool
> +nir_lower_subgroups(nir_shader *shader,
> +                    const nir_lower_subgroups_options *options)
> +{
> +   bool progress = false;
> +
> +   nir_foreach_function(function, shader) {
> +      if (!function->impl)
> +         continue;
> +
> +      if (lower_subgroups_impl(function->impl, options)) {
> +         progress = true;
> +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
> +                                               nir_metadata_dominance);
> +      }
> +   }
> +
> +   return progress;
> +}
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c
> index 26a0f96..98c8b1a 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -46,22 +46,14 @@ opt_intrinsics_impl(nir_function_impl *impl)
>   
>            switch (intrin->intrinsic) {
>            case nir_intrinsic_vote_any:
> -         case nir_intrinsic_vote_all: {
> -            nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
> -            if (!val && !b.shader->options->lower_vote_trivial)
> -               continue;
> -
> -            replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
> +         case nir_intrinsic_vote_all:
> +            if (nir_src_as_const_value(intrin->src[0]))
> +               replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
>               break;
> -         }
> -         case nir_intrinsic_vote_eq: {
> -            nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
> -            if (!val && !b.shader->options->lower_vote_trivial)
> -               continue;
> -
> -            replacement = nir_imm_int(&b, NIR_TRUE);
> +         case nir_intrinsic_vote_eq:
> +            if (nir_src_as_const_value(intrin->src[0]))
> +               replacement = nir_imm_int(&b, NIR_TRUE);
>               break;
> -         }
>            case nir_intrinsic_ballot: {
>               assert(b.shader->options->max_subgroup_size != 0);
>               if (b.shader->options->max_subgroup_size > 32 ||
> @@ -80,37 +72,6 @@ opt_intrinsics_impl(nir_function_impl *impl)
>                                                    nir_imm_int(&b, 0));
>               break;
>            }
> -         case nir_intrinsic_load_subgroup_eq_mask:
> -         case nir_intrinsic_load_subgroup_ge_mask:
> -         case nir_intrinsic_load_subgroup_gt_mask:
> -         case nir_intrinsic_load_subgroup_le_mask:
> -         case nir_intrinsic_load_subgroup_lt_mask: {
> -            if (!b.shader->options->lower_subgroup_masks)
> -               break;
> -
> -            nir_ssa_def *count = nir_load_subgroup_invocation(&b);
> -
> -            switch (intrin->intrinsic) {
> -            case nir_intrinsic_load_subgroup_eq_mask:
> -               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
> -               break;
> -            case nir_intrinsic_load_subgroup_ge_mask:
> -               replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
> -               break;
> -            case nir_intrinsic_load_subgroup_gt_mask:
> -               replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
> -               break;
> -            case nir_intrinsic_load_subgroup_le_mask:
> -               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
> -               break;
> -            case nir_intrinsic_load_subgroup_lt_mask:
> -               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~0ull), count));
> -               break;
> -            default:
> -               unreachable("you seriously can't tell this is unreachable?");
> -            }
> -            break;
> -         }
>            default:
>               break;
>            }
> diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
> index 2f6af7d..a6129e9 100644
> --- a/src/intel/compiler/brw_compiler.c
> +++ b/src/intel/compiler/brw_compiler.c
> @@ -57,7 +57,6 @@ static const struct nir_shader_compiler_options scalar_nir_options = {
>      .lower_unpack_snorm_4x8 = true,
>      .lower_unpack_unorm_2x16 = true,
>      .lower_unpack_unorm_4x8 = true,
> -   .lower_subgroup_masks = true,
>      .max_subgroup_size = 32,
>      .max_unroll_iterations = 32,
>   };
> @@ -80,7 +79,6 @@ static const struct nir_shader_compiler_options vector_nir_options = {
>      .lower_unpack_unorm_2x16 = true,
>      .lower_extract_byte = true,
>      .lower_extract_word = true,
> -   .lower_vote_trivial = true,
>      .max_unroll_iterations = 32,
>   };
>   
> @@ -99,7 +97,6 @@ static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
>      .lower_unpack_unorm_2x16 = true,
>      .lower_extract_byte = true,
>      .lower_extract_word = true,
> -   .lower_vote_trivial = true,
>      .max_unroll_iterations = 32,
>   };
>   
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index 0a41768..03ee862 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -620,7 +620,6 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
>   
>      OPT(nir_lower_tex, &tex_options);
>      OPT(nir_normalize_cubemap_coords);
> -   OPT(nir_lower_read_invocation_to_scalar);
>   
>      OPT(nir_lower_global_vars_to_local);
>   
> @@ -637,6 +636,13 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
>   
>      OPT(nir_lower_system_values);
>   
> +   const nir_lower_subgroups_options subgroups_options = {
> +      .lower_to_scalar = true,
> +      .lower_subgroup_masks = true,
So we were lowering subgroup masks only for scalar before and now we do 
for non scalar too?

> +      .lower_vote_trivial = !is_scalar,
> +   };
> +   OPT(nir_lower_subgroups, &subgroups_options);
> +
>      OPT(nir_lower_clip_cull_distance_arrays);
>   
>      nir_variable_mode indirect_mask = 0;




More information about the mesa-dev mailing list