[Mesa-dev] [PATCH v2 43/52] nir: Add a new subgroups lowering pass

Jason Ekstrand jason at jlekstrand.net
Wed Oct 25 21:37:54 UTC 2017


On Fri, Oct 13, 2017 at 3:52 AM, Lionel Landwerlin <
lionel.g.landwerlin at intel.com> wrote:

> On 13/10/17 06:48, Jason Ekstrand wrote:
>
>> This commit pulls nir_lower_read_invocations_to_scalar along with most
>> of the guts of nir_opt_intrinsics (which mostly does subgroup lowering)
>> into a new nir_lower_subgroups pass.  There are various other bits of
>> subgroup lowering that we're going to want to do so it makes a bit more
>> sense to keep it all together in one pass.  We also move it in i965 to
>> happen after nir_lower_system_values to ensure that because we want to
>> handle the subgroup mask system value intrinsics here.
>> ---
>>   src/compiler/Makefile.sources                      |   2 +-
>>   src/compiler/nir/nir.h                             |  12 +-
>>   .../nir/nir_lower_read_invocation_to_scalar.c      | 112 --------------
>>   src/compiler/nir/nir_lower_subgroups.c             | 161
>> +++++++++++++++++++++
>>   src/compiler/nir/nir_opt_intrinsics.c              |  51 +------
>>   src/intel/compiler/brw_compiler.c                  |   3 -
>>   src/intel/compiler/brw_nir.c                       |   8 +-
>>   7 files changed, 184 insertions(+), 165 deletions(-)
>>   delete mode 100644 src/compiler/nir/nir_lower_rea
>> d_invocation_to_scalar.c
>>   create mode 100644 src/compiler/nir/nir_lower_subgroups.c
>>
>> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.source
>> s
>> index 2724a41..912c003 100644
>> --- a/src/compiler/Makefile.sources
>> +++ b/src/compiler/Makefile.sources
>> @@ -232,11 +232,11 @@ NIR_FILES = \
>>         nir/nir_lower_passthrough_edgeflags.c \
>>         nir/nir_lower_patch_vertices.c \
>>         nir/nir_lower_phis_to_scalar.c \
>> -       nir/nir_lower_read_invocation_to_scalar.c \
>>         nir/nir_lower_regs_to_ssa.c \
>>         nir/nir_lower_returns.c \
>>         nir/nir_lower_samplers.c \
>>         nir/nir_lower_samplers_as_deref.c \
>> +       nir/nir_lower_subgroups.c \
>>         nir/nir_lower_system_values.c \
>>         nir/nir_lower_tex.c \
>>         nir/nir_lower_to_source_mods.c \
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index 5af1503..1154c42 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -1831,9 +1831,6 @@ typedef struct nir_shader_compiler_options {
>>      bool lower_extract_byte;
>>      bool lower_extract_word;
>>   -   bool lower_vote_trivial;
>> -   bool lower_subgroup_masks;
>> -
>>      /**
>>       * Does the driver support real 32-bit integers?  (Otherwise,
>> integers
>>       * are simulated by floats.)
>> @@ -2460,6 +2457,15 @@ bool nir_lower_samplers(nir_shader *shader,
>>   bool nir_lower_samplers_as_deref(nir_shader *shader,
>>                                    const struct gl_shader_program
>> *shader_program);
>>   +typedef struct nir_lower_subgroups_options {
>> +   bool lower_to_scalar:1;
>> +   bool lower_vote_trivial:1;
>> +   bool lower_subgroup_masks:1;
>> +} nir_lower_subgroups_options;
>> +
>> +bool nir_lower_subgroups(nir_shader *shader,
>> +                         const nir_lower_subgroups_options *options);
>> +
>>   bool nir_lower_system_values(nir_shader *shader);
>>     typedef struct nir_lower_tex_options {
>> diff --git a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
>> b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
>> deleted file mode 100644
>> index 69e7c0a..0000000
>> --- a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
>> +++ /dev/null
>> @@ -1,112 +0,0 @@
>> -/*
>> - * Copyright © 2017 Intel Corporation
>> - *
>> - * Permission is hereby granted, free of charge, to any person obtaining
>> a
>> - * copy of this software and associated documentation files (the
>> "Software"),
>> - * to deal in the Software without restriction, including without
>> limitation
>> - * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> - * and/or sell copies of the Software, and to permit persons to whom the
>> - * Software is furnished to do so, subject to the following conditions:
>> - *
>> - * The above copyright notice and this permission notice (including the
>> next
>> - * paragraph) shall be included in all copies or substantial portions of
>> the
>> - * Software.
>> - *
>> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> - * IN THE SOFTWARE.
>> - */
>> -
>> -#include "nir.h"
>> -#include "nir_builder.h"
>> -
>> -/** @file nir_lower_read_invocation_to_scalar.c
>> - *
>> - * Replaces nir_intrinsic_read_invocation/nir_intrinsic_read_first_invoc
>> ation
>> - * operations with num_components != 1 with individual per-channel
>> operations.
>> - */
>> -
>> -static void
>> -lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr
>> *intrin)
>> -{
>> -   b->cursor = nir_before_instr(&intrin->instr);
>> -
>> -   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0],
>> intrin->num_components);
>> -   nir_ssa_def *reads[4];
>> -
>> -   for (unsigned i = 0; i < intrin->num_components; i++) {
>> -      nir_intrinsic_instr *chan_intrin =
>> -         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
>> -      nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
>> -                        1, intrin->dest.ssa.bit_size, NULL);
>> -      chan_intrin->num_components = 1;
>> -
>> -      /* value */
>> -      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
>> -      /* invocation */
>> -      if (intrin->intrinsic == nir_intrinsic_read_invocation)
>> -         nir_src_copy(&chan_intrin->src[1], &intrin->src[1],
>> chan_intrin);
>> -
>> -      nir_builder_instr_insert(b, &chan_intrin->instr);
>> -
>> -      reads[i] = &chan_intrin->dest.ssa;
>> -   }
>> -
>> -   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>> -                            nir_src_for_ssa(nir_vec(b, reads,
>> -
>> intrin->num_components)));
>> -   nir_instr_remove(&intrin->instr);
>> -}
>> -
>> -static bool
>> -nir_lower_read_invocation_to_scalar_impl(nir_function_impl *impl)
>> -{
>> -   bool progress = false;
>> -   nir_builder b;
>> -   nir_builder_init(&b, impl);
>> -
>> -   nir_foreach_block(block, impl) {
>> -      nir_foreach_instr_safe(instr, block) {
>> -         if (instr->type != nir_instr_type_intrinsic)
>> -            continue;
>> -
>> -         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>> -
>> -         if (intrin->num_components == 1)
>> -            continue;
>> -
>> -         switch (intrin->intrinsic) {
>> -         case nir_intrinsic_read_invocation:
>> -         case nir_intrinsic_read_first_invocation:
>> -            lower_read_invocation_to_scalar(&b, intrin);
>> -            progress = true;
>> -            break;
>> -         default:
>> -            break;
>> -         }
>> -      }
>> -   }
>> -
>> -   if (progress) {
>> -      nir_metadata_preserve(impl, nir_metadata_block_index |
>> -                                  nir_metadata_dominance);
>> -   }
>> -   return progress;
>> -}
>> -
>> -bool
>> -nir_lower_read_invocation_to_scalar(nir_shader *shader)
>> -{
>> -   bool progress = false;
>> -
>> -   nir_foreach_function(function, shader) {
>> -      if (function->impl)
>> -         progress |= nir_lower_read_invocation_to_s
>> calar_impl(function->impl);
>> -   }
>> -
>> -   return progress;
>> -}
>> diff --git a/src/compiler/nir/nir_lower_subgroups.c
>> b/src/compiler/nir/nir_lower_subgroups.c
>> new file mode 100644
>> index 0000000..02738c4
>> --- /dev/null
>> +++ b/src/compiler/nir/nir_lower_subgroups.c
>> @@ -0,0 +1,161 @@
>> +/*
>> + * Copyright © 2017 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> next
>> + * paragraph) shall be included in all copies or substantial portions of
>> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "nir.h"
>> +#include "nir_builder.h"
>> +
>> +/**
>> + * \file nir_opt_intrinsics.c
>> + */
>> +
>> +static nir_ssa_def *
>> +lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr
>> *intrin)
>> +{
>> +   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0],
>> +                                           intrin->num_components);
>> +   nir_ssa_def *reads[4];
>> +
>> +   for (unsigned i = 0; i < intrin->num_components; i++) {
>> +      nir_intrinsic_instr *chan_intrin =
>> +         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
>> +      nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest,
>> +                        1, intrin->dest.ssa.bit_size, NULL);
>> +      chan_intrin->num_components = 1;
>> +
>> +      /* value */
>> +      chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
>> +      /* invocation */
>> +      if (intrin->intrinsic == nir_intrinsic_read_invocation)
>> +         nir_src_copy(&chan_intrin->src[1], &intrin->src[1],
>> chan_intrin);
>> +
>> +      nir_builder_instr_insert(b, &chan_intrin->instr);
>> +
>> +      reads[i] = &chan_intrin->dest.ssa;
>> +   }
>> +
>> +   return nir_vec(b, reads, intrin->num_components);
>> +}
>> +
>> +static nir_ssa_def *
>> +lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
>> +                       const nir_lower_subgroups_options *options)
>> +{
>> +   switch (intrin->intrinsic) {
>> +   case nir_intrinsic_vote_any:
>> +   case nir_intrinsic_vote_all:
>> +      if (options->lower_vote_trivial)
>> +         return nir_ssa_for_src(b, intrin->src[0], 1);
>> +      break;
>> +
>> +   case nir_intrinsic_vote_eq:
>> +      if (options->lower_vote_trivial)
>> +         return nir_imm_int(b, NIR_TRUE);
>> +      break;
>> +
>> +   case nir_intrinsic_read_invocation:
>> +   case nir_intrinsic_read_first_invocation:
>> +      if (options->lower_to_scalar)
>> +         return lower_read_invocation_to_scalar(b, intrin);
>> +      break;
>> +
>> +   case nir_intrinsic_load_subgroup_eq_mask:
>> +   case nir_intrinsic_load_subgroup_ge_mask:
>> +   case nir_intrinsic_load_subgroup_gt_mask:
>> +   case nir_intrinsic_load_subgroup_le_mask:
>> +   case nir_intrinsic_load_subgroup_lt_mask: {
>> +      if (!options->lower_subgroup_masks)
>> +         return NULL;
>> +
>> +      nir_ssa_def *count = nir_load_subgroup_invocation(b);
>> +
>> +      switch (intrin->intrinsic) {
>> +      case nir_intrinsic_load_subgroup_eq_mask:
>> +         return nir_ishl(b, nir_imm_int64(b, 1ull), count);
>> +      case nir_intrinsic_load_subgroup_ge_mask:
>> +         return nir_ishl(b, nir_imm_int64(b, ~0ull), count);
>> +      case nir_intrinsic_load_subgroup_gt_mask:
>> +         return nir_ishl(b, nir_imm_int64(b, ~1ull), count);
>> +      case nir_intrinsic_load_subgroup_le_mask:
>> +         return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count));
>> +      case nir_intrinsic_load_subgroup_lt_mask:
>> +         return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count));
>> +      default:
>> +         unreachable("you seriously can't tell this is unreachable?");
>> +      }
>> +      break;
>> +   }
>> +   default:
>> +      break;
>> +   }
>> +
>> +   return NULL;
>> +}
>> +
>> +static bool
>> +lower_subgroups_impl(nir_function_impl *impl,
>> +                     const nir_lower_subgroups_options *options)
>> +{
>> +   nir_builder b;
>> +   nir_builder_init(&b, impl);
>> +   bool progress = false;
>> +
>> +   nir_foreach_block(block, impl) {
>> +      nir_foreach_instr_safe(instr, block) {
>> +         if (instr->type != nir_instr_type_intrinsic)
>> +            continue;
>> +
>> +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>> +         b.cursor = nir_before_instr(instr);
>> +
>> +         nir_ssa_def *lower = lower_subgroups_intrin(&b, intrin,
>> options);
>> +         if (!lower)
>> +            continue;
>> +
>> +         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
>> nir_src_for_ssa(lower));
>> +         nir_instr_remove(instr);
>> +         progress = true;
>> +      }
>> +   }
>> +
>> +   return progress;
>> +}
>> +
>> +bool
>> +nir_lower_subgroups(nir_shader *shader,
>> +                    const nir_lower_subgroups_options *options)
>> +{
>> +   bool progress = false;
>> +
>> +   nir_foreach_function(function, shader) {
>> +      if (!function->impl)
>> +         continue;
>> +
>> +      if (lower_subgroups_impl(function->impl, options)) {
>> +         progress = true;
>> +         nir_metadata_preserve(function->impl, nir_metadata_block_index
>> |
>> +                                               nir_metadata_dominance);
>> +      }
>> +   }
>> +
>> +   return progress;
>> +}
>> diff --git a/src/compiler/nir/nir_opt_intrinsics.c
>> b/src/compiler/nir/nir_opt_intrinsics.c
>> index 26a0f96..98c8b1a 100644
>> --- a/src/compiler/nir/nir_opt_intrinsics.c
>> +++ b/src/compiler/nir/nir_opt_intrinsics.c
>> @@ -46,22 +46,14 @@ opt_intrinsics_impl(nir_function_impl *impl)
>>              switch (intrin->intrinsic) {
>>            case nir_intrinsic_vote_any:
>> -         case nir_intrinsic_vote_all: {
>> -            nir_const_value *val = nir_src_as_const_value(intrin-
>> >src[0]);
>> -            if (!val && !b.shader->options->lower_vote_trivial)
>> -               continue;
>> -
>> -            replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
>> +         case nir_intrinsic_vote_all:
>> +            if (nir_src_as_const_value(intrin->src[0]))
>> +               replacement = nir_ssa_for_src(&b, intrin->src[0], 1);
>>               break;
>> -         }
>> -         case nir_intrinsic_vote_eq: {
>> -            nir_const_value *val = nir_src_as_const_value(intrin-
>> >src[0]);
>> -            if (!val && !b.shader->options->lower_vote_trivial)
>> -               continue;
>> -
>> -            replacement = nir_imm_int(&b, NIR_TRUE);
>> +         case nir_intrinsic_vote_eq:
>> +            if (nir_src_as_const_value(intrin->src[0]))
>> +               replacement = nir_imm_int(&b, NIR_TRUE);
>>               break;
>> -         }
>>            case nir_intrinsic_ballot: {
>>               assert(b.shader->options->max_subgroup_size != 0);
>>               if (b.shader->options->max_subgroup_size > 32 ||
>> @@ -80,37 +72,6 @@ opt_intrinsics_impl(nir_function_impl *impl)
>>                                                    nir_imm_int(&b, 0));
>>               break;
>>            }
>> -         case nir_intrinsic_load_subgroup_eq_mask:
>> -         case nir_intrinsic_load_subgroup_ge_mask:
>> -         case nir_intrinsic_load_subgroup_gt_mask:
>> -         case nir_intrinsic_load_subgroup_le_mask:
>> -         case nir_intrinsic_load_subgroup_lt_mask: {
>> -            if (!b.shader->options->lower_subgroup_masks)
>> -               break;
>> -
>> -            nir_ssa_def *count = nir_load_subgroup_invocation(&b);
>> -
>> -            switch (intrin->intrinsic) {
>> -            case nir_intrinsic_load_subgroup_eq_mask:
>> -               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull),
>> count);
>> -               break;
>> -            case nir_intrinsic_load_subgroup_ge_mask:
>> -               replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull),
>> count);
>> -               break;
>> -            case nir_intrinsic_load_subgroup_gt_mask:
>> -               replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull),
>> count);
>> -               break;
>> -            case nir_intrinsic_load_subgroup_le_mask:
>> -               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b,
>> ~1ull), count));
>> -               break;
>> -            case nir_intrinsic_load_subgroup_lt_mask:
>> -               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b,
>> ~0ull), count));
>> -               break;
>> -            default:
>> -               unreachable("you seriously can't tell this is
>> unreachable?");
>> -            }
>> -            break;
>> -         }
>>            default:
>>               break;
>>            }
>> diff --git a/src/intel/compiler/brw_compiler.c
>> b/src/intel/compiler/brw_compiler.c
>> index 2f6af7d..a6129e9 100644
>> --- a/src/intel/compiler/brw_compiler.c
>> +++ b/src/intel/compiler/brw_compiler.c
>> @@ -57,7 +57,6 @@ static const struct nir_shader_compiler_options
>> scalar_nir_options = {
>>      .lower_unpack_snorm_4x8 = true,
>>      .lower_unpack_unorm_2x16 = true,
>>      .lower_unpack_unorm_4x8 = true,
>> -   .lower_subgroup_masks = true,
>>      .max_subgroup_size = 32,
>>      .max_unroll_iterations = 32,
>>   };
>> @@ -80,7 +79,6 @@ static const struct nir_shader_compiler_options
>> vector_nir_options = {
>>      .lower_unpack_unorm_2x16 = true,
>>      .lower_extract_byte = true,
>>      .lower_extract_word = true,
>> -   .lower_vote_trivial = true,
>>      .max_unroll_iterations = 32,
>>   };
>>   @@ -99,7 +97,6 @@ static const struct nir_shader_compiler_options
>> vector_nir_options_gen6 = {
>>      .lower_unpack_unorm_2x16 = true,
>>      .lower_extract_byte = true,
>>      .lower_extract_word = true,
>> -   .lower_vote_trivial = true,
>>      .max_unroll_iterations = 32,
>>   };
>>   diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
>> index 0a41768..03ee862 100644
>> --- a/src/intel/compiler/brw_nir.c
>> +++ b/src/intel/compiler/brw_nir.c
>> @@ -620,7 +620,6 @@ brw_preprocess_nir(const struct brw_compiler
>> *compiler, nir_shader *nir)
>>        OPT(nir_lower_tex, &tex_options);
>>      OPT(nir_normalize_cubemap_coords);
>> -   OPT(nir_lower_read_invocation_to_scalar);
>>        OPT(nir_lower_global_vars_to_local);
>>   @@ -637,6 +636,13 @@ brw_preprocess_nir(const struct brw_compiler
>> *compiler, nir_shader *nir)
>>        OPT(nir_lower_system_values);
>>   +   const nir_lower_subgroups_options subgroups_options = {
>> +      .lower_to_scalar = true,
>> +      .lower_subgroup_masks = true,
>>
> So we were lowering subgroup masks only for scalar before and now we do
> for non scalar too?
>

I suppose we are.  Shouldn't hurt anything though.


> +      .lower_vote_trivial = !is_scalar,
>> +   };
>> +   OPT(nir_lower_subgroups, &subgroups_options);
>> +
>>      OPT(nir_lower_clip_cull_distance_arrays);
>>        nir_variable_mode indirect_mask = 0;
>>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171025/95304912/attachment-0001.html>


More information about the mesa-dev mailing list