[Mesa-dev] [PATCH v4 39/48] nir: Add a new subgroups lowering pass

Iago Toral itoral at igalia.com
Thu Nov 2 07:40:07 UTC 2017


All 4 patches are:

Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

On Tue, 2017-10-31 at 16:54 -0700, Jason Ekstrand wrote:
> This commit pulls nir_lower_read_invocations_to_scalar along with
> most
> of the guts of nir_opt_intrinsics (which mostly does subgroup
> lowering)
> into a new nir_lower_subgroups pass.  There are various other bits of
> subgroup lowering that we're going to want to do so it makes a bit
> more
> sense to keep it all together in one pass.  We also move it in i965
> to
> happen after nir_lower_system_values to ensure that because we want
> to
> handle the subgroup mask system value intrinsics here.
> 
> Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>
> ---
>  src/compiler/Makefile.sources                      |   2 +-
>  src/compiler/nir/meson.build                       |   2 +-
>  src/compiler/nir/nir.h                             |  12 +-
>  .../nir/nir_lower_read_invocation_to_scalar.c      | 112 ---------
> ----
>  src/compiler/nir/nir_lower_subgroups.c             | 184
> +++++++++++++++++++++
>  src/compiler/nir/nir_opt_intrinsics.c              |  71 +-------
>  src/intel/compiler/brw_compiler.c                  |   3 -
>  src/intel/compiler/brw_nir.c                       |   8 +-
>  8 files changed, 208 insertions(+), 186 deletions(-)
>  delete mode 100644
> src/compiler/nir/nir_lower_read_invocation_to_scalar.c
>  create mode 100644 src/compiler/nir/nir_lower_subgroups.c
> 
> diff --git a/src/compiler/Makefile.sources
> b/src/compiler/Makefile.sources
> index 27cc33a..12d932f 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -233,11 +233,11 @@ NIR_FILES = \
>  	nir/nir_lower_passthrough_edgeflags.c \
>  	nir/nir_lower_patch_vertices.c \
>  	nir/nir_lower_phis_to_scalar.c \
> -	nir/nir_lower_read_invocation_to_scalar.c \
>  	nir/nir_lower_regs_to_ssa.c \
>  	nir/nir_lower_returns.c \
>  	nir/nir_lower_samplers.c \
>  	nir/nir_lower_samplers_as_deref.c \
> +	nir/nir_lower_subgroups.c \
>  	nir/nir_lower_system_values.c \
>  	nir/nir_lower_tex.c \
>  	nir/nir_lower_to_source_mods.c \
> diff --git a/src/compiler/nir/meson.build
> b/src/compiler/nir/meson.build
> index cb88eff..5c22bf6 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -120,11 +120,11 @@ files_libnir = files(
>    'nir_lower_passthrough_edgeflags.c',
>    'nir_lower_patch_vertices.c',
>    'nir_lower_phis_to_scalar.c',
> -  'nir_lower_read_invocation_to_scalar.c',
>    'nir_lower_regs_to_ssa.c',
>    'nir_lower_returns.c',
>    'nir_lower_samplers.c',
>    'nir_lower_samplers_as_deref.c',
> +  'nir_lower_subgroups.c',
>    'nir_lower_system_values.c',
>    'nir_lower_tex.c',
>    'nir_lower_to_source_mods.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index dd833cf..8c3a20c 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1835,9 +1835,6 @@ typedef struct nir_shader_compiler_options {
>     bool lower_extract_byte;
>     bool lower_extract_word;
>  
> -   bool lower_vote_trivial;
> -   bool lower_subgroup_masks;
> -
>     /**
>      * Does the driver support real 32-bit integers?  (Otherwise,
> integers
>      * are simulated by floats.)
> @@ -2462,6 +2459,15 @@ bool nir_lower_samplers(nir_shader *shader,
>  bool nir_lower_samplers_as_deref(nir_shader *shader,
>                                   const struct gl_shader_program
> *shader_program);
>  
> +typedef struct nir_lower_subgroups_options {
> +   bool lower_to_scalar:1;
> +   bool lower_vote_trivial:1;
> +   bool lower_subgroup_masks:1;
> +} nir_lower_subgroups_options;
> +
> +bool nir_lower_subgroups(nir_shader *shader,
> +                         const nir_lower_subgroups_options
> *options);
> +
>  bool nir_lower_system_values(nir_shader *shader);
>  
>  typedef struct nir_lower_tex_options {
> diff --git a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
> b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
> deleted file mode 100644
> index 69e7c0a..0000000
> --- a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
> +++ /dev/null
> @@ -1,112 +0,0 @@
> -/*
> - * Copyright © 2017 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person
> obtaining a
> - * copy of this software and associated documentation files (the
> "Software"),
> - * to deal in the Software without restriction, including without
> limitation
> - * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom
> the
> - * Software is furnished to do so, subject to the following
> conditions:
> - *
> - * The above copyright notice and this permission notice (including
> the next
> - * paragraph) shall be included in all copies or substantial
> portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> - * IN THE SOFTWARE.
> - */
> -
> -#include "nir.h"
> -#include "nir_builder.h"
> -
> -/** @file nir_lower_read_invocation_to_scalar.c
> - *
> - * Replaces
> nir_intrinsic_read_invocation/nir_intrinsic_read_first_invocation
> - * operations with num_components != 1 with individual per-channel
> operations.
> - */
> -
> -static void
> -lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr
> *intrin)
> -{
> -   b->cursor = nir_before_instr(&intrin->instr);
> -
> -   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], intrin-
> >num_components);
> -   nir_ssa_def *reads[4];
> -
> -   for (unsigned i = 0; i < intrin->num_components; i++) {
> -      nir_intrinsic_instr *chan_intrin =
> -         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
> -      nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
> -                        1, intrin->dest.ssa.bit_size, NULL);
> -      chan_intrin->num_components = 1;
> -
> -      /* value */
> -      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value,
> i));
> -      /* invocation */
> -      if (intrin->intrinsic == nir_intrinsic_read_invocation)
> -         nir_src_copy(&chan_intrin->src[1], &intrin->src[1],
> chan_intrin);
> -
> -      nir_builder_instr_insert(b, &chan_intrin->instr);
> -
> -      reads[i] = &chan_intrin->dest.ssa;
> -   }
> -
> -   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> -                            nir_src_for_ssa(nir_vec(b, reads,
> -                                                    intrin-
> >num_components)));
> -   nir_instr_remove(&intrin->instr);
> -}
> -
> -static bool
> -nir_lower_read_invocation_to_scalar_impl(nir_function_impl *impl)
> -{
> -   bool progress = false;
> -   nir_builder b;
> -   nir_builder_init(&b, impl);
> -
> -   nir_foreach_block(block, impl) {
> -      nir_foreach_instr_safe(instr, block) {
> -         if (instr->type != nir_instr_type_intrinsic)
> -            continue;
> -
> -         nir_intrinsic_instr *intrin =
> nir_instr_as_intrinsic(instr);
> -
> -         if (intrin->num_components == 1)
> -            continue;
> -
> -         switch (intrin->intrinsic) {
> -         case nir_intrinsic_read_invocation:
> -         case nir_intrinsic_read_first_invocation:
> -            lower_read_invocation_to_scalar(&b, intrin);
> -            progress = true;
> -            break;
> -         default:
> -            break;
> -         }
> -      }
> -   }
> -
> -   if (progress) {
> -      nir_metadata_preserve(impl, nir_metadata_block_index |
> -                                  nir_metadata_dominance);
> -   }
> -   return progress;
> -}
> -
> -bool
> -nir_lower_read_invocation_to_scalar(nir_shader *shader)
> -{
> -   bool progress = false;
> -
> -   nir_foreach_function(function, shader) {
> -      if (function->impl)
> -         progress |=
> nir_lower_read_invocation_to_scalar_impl(function->impl);
> -   }
> -
> -   return progress;
> -}
> diff --git a/src/compiler/nir/nir_lower_subgroups.c
> b/src/compiler/nir/nir_lower_subgroups.c
> new file mode 100644
> index 0000000..0d11dc9
> --- /dev/null
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -0,0 +1,184 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> the
> + * Software is furnished to do so, subject to the following
> conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the next
> + * paragraph) shall be included in all copies or substantial
> portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +/**
> + * \file nir_opt_intrinsics.c
> + */
> +
> +static nir_ssa_def *
> +lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr
> *intrin)
> +{
> +   /* This is safe to call on scalar things but it would be silly */
> +   assert(intrin->dest.ssa.num_components > 1);
> +
> +   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0],
> +                                           intrin->num_components);
> +   nir_ssa_def *reads[4];
> +
> +   for (unsigned i = 0; i < intrin->num_components; i++) {
> +      nir_intrinsic_instr *chan_intrin =
> +         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
> +      nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
> +                        1, intrin->dest.ssa.bit_size, NULL);
> +      chan_intrin->num_components = 1;
> +
> +      /* value */
> +      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value,
> i));
> +      /* invocation */
> +      if (intrin->intrinsic == nir_intrinsic_read_invocation)
> +         nir_src_copy(&chan_intrin->src[1], &intrin->src[1],
> chan_intrin);
> +
> +      nir_builder_instr_insert(b, &chan_intrin->instr);
> +
> +      reads[i] = &chan_intrin->dest.ssa;
> +   }
> +
> +   return nir_vec(b, reads, intrin->num_components);
> +}
> +
> +static nir_ssa_def *
> +high_subgroup_mask(nir_builder *b,
> +                   nir_ssa_def *count,
> +                   uint64_t base_mask)
> +{
> +   /* group_mask could probably be calculated more efficiently but
> we want to
> +    * be sure not to shift by 64 if the subgroup size is 64 because
> the GLSL
> +    * shift operator is undefined in that case. In any case if we
> were worried
> +    * about efficency this should probably be done further down
> because the
> +    * subgroup size is likely to be known at compile time.
> +    */
> +   nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
> +   nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
> +   nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64),
> subgroup_size);
> +   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
> +   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b,
> base_mask), count);
> +
> +   return nir_iand(b, higher_bits, group_mask);
> +}
> +
> +static nir_ssa_def *
> +lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
> +                       const nir_lower_subgroups_options *options)
> +{
> +   switch (intrin->intrinsic) {
> +   case nir_intrinsic_vote_any:
> +   case nir_intrinsic_vote_all:
> +      if (options->lower_vote_trivial)
> +         return nir_ssa_for_src(b, intrin->src[0], 1);
> +      break;
> +
> +   case nir_intrinsic_vote_eq:
> +      if (options->lower_vote_trivial)
> +         return nir_imm_int(b, NIR_TRUE);
> +      break;
> +
> +   case nir_intrinsic_read_invocation:
> +   case nir_intrinsic_read_first_invocation:
> +      if (options->lower_to_scalar && intrin->num_components > 1)
> +         return lower_read_invocation_to_scalar(b, intrin);
> +      break;
> +
> +   case nir_intrinsic_load_subgroup_eq_mask:
> +   case nir_intrinsic_load_subgroup_ge_mask:
> +   case nir_intrinsic_load_subgroup_gt_mask:
> +   case nir_intrinsic_load_subgroup_le_mask:
> +   case nir_intrinsic_load_subgroup_lt_mask: {
> +      if (!options->lower_subgroup_masks)
> +         return NULL;
> +
> +      nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +
> +      switch (intrin->intrinsic) {
> +      case nir_intrinsic_load_subgroup_eq_mask:
> +         return nir_ishl(b, nir_imm_int64(b, 1ull), count);
> +      case nir_intrinsic_load_subgroup_ge_mask:
> +         return high_subgroup_mask(b, count, ~0ull);
> +      case nir_intrinsic_load_subgroup_gt_mask:
> +         return high_subgroup_mask(b, count, ~1ull);
> +      case nir_intrinsic_load_subgroup_le_mask:
> +         return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull),
> count));
> +      case nir_intrinsic_load_subgroup_lt_mask:
> +         return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull),
> count));
> +      default:
> +         unreachable("you seriously can't tell this is
> unreachable?");
> +      }
> +      break;
> +   }
> +   default:
> +      break;
> +   }
> +
> +   return NULL;
> +}
> +
> +static bool
> +lower_subgroups_impl(nir_function_impl *impl,
> +                     const nir_lower_subgroups_options *options)
> +{
> +   nir_builder b;
> +   nir_builder_init(&b, impl);
> +   bool progress = false;
> +
> +   nir_foreach_block(block, impl) {
> +      nir_foreach_instr_safe(instr, block) {
> +         if (instr->type != nir_instr_type_intrinsic)
> +            continue;
> +
> +         nir_intrinsic_instr *intrin =
> nir_instr_as_intrinsic(instr);
> +         b.cursor = nir_before_instr(instr);
> +
> +         nir_ssa_def *lower = lower_subgroups_intrin(&b, intrin,
> options);
> +         if (!lower)
> +            continue;
> +
> +         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
> nir_src_for_ssa(lower));
> +         nir_instr_remove(instr);
> +         progress = true;
> +      }
> +   }
> +
> +   return progress;
> +}
> +
> +bool
> +nir_lower_subgroups(nir_shader *shader,
> +                    const nir_lower_subgroups_options *options)
> +{
> +   bool progress = false;
> +
> +   nir_foreach_function(function, shader) {
> +      if (!function->impl)
> +         continue;
> +
> +      if (lower_subgroups_impl(function->impl, options)) {
> +         progress = true;
> +         nir_metadata_preserve(function->impl,
> nir_metadata_block_index |
> +                                               nir_metadata_dominanc
> e);
> +      }
> +   }
> +
> +   return progress;
> +}
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c
> b/src/compiler/nir/nir_opt_intrinsics.c
> index d5fdc51..98c8b1a 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -28,26 +28,6 @@
>   * \file nir_opt_intrinsics.c
>   */
>  
> -static nir_ssa_def *
> -high_subgroup_mask(nir_builder *b,
> -                   nir_ssa_def *count,
> -                   uint64_t base_mask)
> -{
> -   /* group_mask could probably be calculated more efficiently but
> we want to
> -    * be sure not to shift by 64 if the subgroup size is 64 because
> the GLSL
> -    * shift operator is undefined in that case. In any case if we
> were worried
> -    * about efficency this should probably be done further down
> because the
> -    * subgroup size is likely to be known at compile time.
> -    */
> -   nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
> -   nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
> -   nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64),
> subgroup_size);
> -   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
> -   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b,
> base_mask), count);
> -
> -   return nir_iand(b, higher_bits, group_mask);
> -}
> -
>  static bool
>  opt_intrinsics_impl(nir_function_impl *impl)
>  {
> @@ -66,22 +46,14 @@ opt_intrinsics_impl(nir_function_impl *impl)
>  
>           switch (intrin->intrinsic) {
>           case nir_intrinsic_vote_any:
> -         case nir_intrinsic_vote_all: {
> -            nir_const_value *val = nir_src_as_const_value(intrin-
> >src[0]);
> -            if (!val && !b.shader->options->lower_vote_trivial)
> -               continue;
> -
> -            replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
> +         case nir_intrinsic_vote_all:
> +            if (nir_src_as_const_value(intrin->src[0]))
> +               replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
>              break;
> -         }
> -         case nir_intrinsic_vote_eq: {
> -            nir_const_value *val = nir_src_as_const_value(intrin-
> >src[0]);
> -            if (!val && !b.shader->options->lower_vote_trivial)
> -               continue;
> -
> -            replacement = nir_imm_int(&b, NIR_TRUE);
> +         case nir_intrinsic_vote_eq:
> +            if (nir_src_as_const_value(intrin->src[0]))
> +               replacement = nir_imm_int(&b, NIR_TRUE);
>              break;
> -         }
>           case nir_intrinsic_ballot: {
>              assert(b.shader->options->max_subgroup_size != 0);
>              if (b.shader->options->max_subgroup_size > 32 ||
> @@ -100,37 +72,6 @@ opt_intrinsics_impl(nir_function_impl *impl)
>                                                   nir_imm_int(&b,
> 0));
>              break;
>           }
> -         case nir_intrinsic_load_subgroup_eq_mask:
> -         case nir_intrinsic_load_subgroup_ge_mask:
> -         case nir_intrinsic_load_subgroup_gt_mask:
> -         case nir_intrinsic_load_subgroup_le_mask:
> -         case nir_intrinsic_load_subgroup_lt_mask: {
> -            if (!b.shader->options->lower_subgroup_masks)
> -               break;
> -
> -            nir_ssa_def *count = nir_load_subgroup_invocation(&b);
> -
> -            switch (intrin->intrinsic) {
> -            case nir_intrinsic_load_subgroup_eq_mask:
> -               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull),
> count);
> -               break;
> -            case nir_intrinsic_load_subgroup_ge_mask:
> -               replacement = high_subgroup_mask(&b, count, ~0ull);
> -               break;
> -            case nir_intrinsic_load_subgroup_gt_mask:
> -               replacement = high_subgroup_mask(&b, count, ~1ull);
> -               break;
> -            case nir_intrinsic_load_subgroup_le_mask:
> -               replacement = nir_inot(&b, nir_ishl(&b,
> nir_imm_int64(&b, ~1ull), count));
> -               break;
> -            case nir_intrinsic_load_subgroup_lt_mask:
> -               replacement = nir_inot(&b, nir_ishl(&b,
> nir_imm_int64(&b, ~0ull), count));
> -               break;
> -            default:
> -               unreachable("you seriously can't tell this is
> unreachable?");
> -            }
> -            break;
> -         }
>           default:
>              break;
>           }
> diff --git a/src/intel/compiler/brw_compiler.c
> b/src/intel/compiler/brw_compiler.c
> index 2f6af7d..a6129e9 100644
> --- a/src/intel/compiler/brw_compiler.c
> +++ b/src/intel/compiler/brw_compiler.c
> @@ -57,7 +57,6 @@ static const struct nir_shader_compiler_options
> scalar_nir_options = {
>     .lower_unpack_snorm_4x8 = true,
>     .lower_unpack_unorm_2x16 = true,
>     .lower_unpack_unorm_4x8 = true,
> -   .lower_subgroup_masks = true,
>     .max_subgroup_size = 32,
>     .max_unroll_iterations = 32,
>  };
> @@ -80,7 +79,6 @@ static const struct nir_shader_compiler_options
> vector_nir_options = {
>     .lower_unpack_unorm_2x16 = true,
>     .lower_extract_byte = true,
>     .lower_extract_word = true,
> -   .lower_vote_trivial = true,
>     .max_unroll_iterations = 32,
>  };
>  
> @@ -99,7 +97,6 @@ static const struct nir_shader_compiler_options
> vector_nir_options_gen6 = {
>     .lower_unpack_unorm_2x16 = true,
>     .lower_extract_byte = true,
>     .lower_extract_word = true,
> -   .lower_vote_trivial = true,
>     .max_unroll_iterations = 32,
>  };
>  
> diff --git a/src/intel/compiler/brw_nir.c
> b/src/intel/compiler/brw_nir.c
> index e5ff6de..f599f74 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -620,7 +620,6 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
>  
>     OPT(nir_lower_tex, &tex_options);
>     OPT(nir_normalize_cubemap_coords);
> -   OPT(nir_lower_read_invocation_to_scalar);
>  
>     OPT(nir_lower_global_vars_to_local);
>  
> @@ -637,6 +636,13 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
>  
>     OPT(nir_lower_system_values);
>  
> +   const nir_lower_subgroups_options subgroups_options = {
> +      .lower_to_scalar = true,
> +      .lower_subgroup_masks = true,
> +      .lower_vote_trivial = !is_scalar,
> +   };
> +   OPT(nir_lower_subgroups, &subgroups_options);
> +
>     OPT(nir_lower_clip_cull_distance_arrays);
>  
>     nir_variable_mode indirect_mask = 0;


More information about the mesa-dev mailing list