[Mesa-dev] [PATCH v2 3/7] nir: lower 64bit subgroup shuffle intrinsics

Fri Mar 16 16:38:16 UTC 2018

On Fri, Mar 16, 2018 at 2:50 AM, Daniel Schürmann <
daniel.schuermann at campus.tu-berlin.de> wrote:

> Signed-off-by: Daniel Schürmann <daniel.schuermann at campus.tu-berlin.de>
> ---
>  src/compiler/nir/nir.h                 |  1 +
>  src/compiler/nir/nir_lower_subgroups.c | 83
> +++++++++++++++++++++++++++-------
>  2 files changed, 67 insertions(+), 17 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 6a51b7c4ab..0e3c026efa 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2564,6 +2564,7 @@ typedef struct nir_lower_subgroups_options {
>     bool lower_vote_eq_to_ballot:1;
>     bool lower_subgroup_masks:1;
>     bool lower_shuffle:1;
> +   bool lower_shuffle_to_32bit:1;
>     bool lower_quad:1;
>  } nir_lower_subgroups_options;
>
> diff --git a/src/compiler/nir/nir_lower_subgroups.c
> b/src/compiler/nir/nir_lower_subgroups.c
> index 9dc7be7947..669168e830 100644
> --- a/src/compiler/nir/nir_lower_subgroups.c
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -28,6 +28,37 @@
>   * \file nir_opt_intrinsics.c
>   */
>
> +static nir_intrinsic_instr *ac_lower_subgroups_64bit_split_intrinsic(nir_builder
> *b, nir_intrinsic_instr *intrin, unsigned int component) {
>

Please put "static nir_intrinsic_instr *" and the "{" each on their own
line and wrap things so that we don't go over 80 characters.  Also, please
drop the ac_ prefix as this is no longer in radv code.

> +   nir_ssa_def *comp;
> +   if (component == 0)
> +      comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
> +   else
> +      comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
> +
> +   nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader,
> intrin->intrinsic);
> +   nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
> +   intr->src[0] = nir_src_for_ssa(comp);
> +
> +   intr->const_index[0] = intrin->const_index[0];
> +   intr->const_index[1] = intrin->const_index[1];
> +   if (intrin->intrinsic == nir_intrinsic_read_invocation ||
> +      intrin->intrinsic == nir_intrinsic_shuffle ||
> +      intrin->intrinsic == nir_intrinsic_quad_broadcast) {
>

You can use nir_intrinsic_infos[intrin->intrinsic].num_srcs to make this a
bit more general.

> +      nir_src_copy(&intr->src[1], &intrin->src[1], intr);
> +   }
> +   intr->num_components = 1;
> +   nir_builder_instr_insert(b, &intr->instr);
> +   return intr;
> +}
> +
> +static nir_ssa_def *
> +lower_64bit_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin) {

"{" goes on it's own line.  Also, how about "lower_subgroup_op_to_32bit" to
match "lower_subgroup_op_to_scalar" below.

> +   assert(intrin->src[0].ssa->bit_size == 64);
> +   nir_intrinsic_instr *intr_x = ac_lower_subgroups_64bit_split_intrinsic(b,
> intrin, 0);
> +   nir_intrinsic_instr *intr_y = ac_lower_subgroups_64bit_split_intrinsic(b,
> intrin, 1);
> +   return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
> +}
> +
>  static nir_ssa_def *
>  ballot_type_to_uint(nir_builder *b, nir_ssa_def *value, unsigned
> bit_size)
>  {
> @@ -80,7 +111,8 @@ uint_to_ballot_type(nir_builder *b, nir_ssa_def *value,
>  }
>
>  static nir_ssa_def *
> -lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
> +lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin,
> +                        bool lower_shuffle_to_32bit)
>

Just call this lower_to_32bit as it doesn't necessarily have anything to do
with shuffles.  Also, please align the parameter to the (

>  {
>     /* This is safe to call on scalar things but it would be silly */
>     assert(intrin->dest.ssa.num_components > 1);
> @@ -107,9 +139,12 @@ lower_subgroup_op_to_scalar(nir_builder *b,
> nir_intrinsic_instr *intrin)
>        chan_intrin->const_index[0] = intrin->const_index[0];
>        chan_intrin->const_index[1] = intrin->const_index[1];
>
> -      nir_builder_instr_insert(b, &chan_intrin->instr);
> -
> -      reads[i] = &chan_intrin->dest.ssa;
> +      if (lower_shuffle_to_32bit && chan_intrin->src[0].ssa->bit_size ==
> 64) {
> +         reads[i] = lower_64bit_to_32bit(b, chan_intrin);
> +      } else {
> +         nir_builder_instr_insert(b, &chan_intrin->instr);
> +         reads[i] = &chan_intrin->dest.ssa;
> +      }
>     }
>
>     return nir_vec(b, reads, intrin->num_components);
> @@ -158,13 +193,19 @@ lower_vote_eq_to_ballot(nir_builder *b,
> nir_intrinsic_instr *intrin,
>                          1, value->bit_size, NULL);
>        rfi->num_components = 1;
>        rfi->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
> -      nir_builder_instr_insert(b, &rfi->instr);
> +      nir_ssa_def *first_lane;
> +      if (options->lower_shuffle_to_32bit && rfi->src[0].ssa->bit_size
> == 64) {
>

I don't really see how read_first_invocation is related to shuffles

> +         first_lane = lower_64bit_to_32bit(b, rfi);
> +      } else {
> +         nir_builder_instr_insert(b, &rfi->instr);
> +         first_lane = &rfi->dest.ssa;
> +      }
>
>        nir_ssa_def *is_eq;
>        if (intrin->intrinsic == nir_intrinsic_vote_feq) {
> -         is_eq = nir_feq(b, &rfi->dest.ssa, nir_channel(b, value, i));
> +         is_eq = nir_feq(b, first_lane, nir_channel(b, value, i));
>        } else {
> -         is_eq = nir_ieq(b, &rfi->dest.ssa, nir_channel(b, value, i));
> +         is_eq = nir_ieq(b, first_lane, nir_channel(b, value, i));
>        }
>
>        if (all_eq == NULL) {
> @@ -188,7 +229,7 @@ lower_vote_eq_to_ballot(nir_builder *b,
> nir_intrinsic_instr *intrin,
>
>  static nir_ssa_def *
>  lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
> -              bool lower_to_scalar)
> +                        const nir_lower_subgroups_options *options)
>

Please align to the (

>  {
>     nir_ssa_def *index = nir_load_subgroup_invocation(b);
>     switch (intrin->intrinsic) {
> @@ -240,8 +281,10 @@ lower_shuffle(nir_builder *b, nir_intrinsic_instr
> *intrin,
>                       intrin->dest.ssa.num_components,
>                       intrin->dest.ssa.bit_size, NULL);
>
> -   if (lower_to_scalar && shuffle->num_components > 1) {
> -      return lower_subgroup_op_to_scalar(b, shuffle);
> +   if (options->lower_to_scalar && shuffle->num_components > 1) {
> +      return lower_subgroup_op_to_scalar(b, shuffle,
> options->lower_shuffle_to_32bit);
> +   } else if (options->lower_shuffle_to_32bit &&
> shuffle->src[0].ssa->bit_size == 64) {
> +      return lower_64bit_to_32bit(b, shuffle);
>     } else {
>        nir_builder_instr_insert(b, &shuffle->instr);
>        return &shuffle->dest.ssa;
> @@ -279,7 +322,9 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>     case nir_intrinsic_read_invocation:
>     case nir_intrinsic_read_first_invocation:
>        if (options->lower_to_scalar && intrin->num_components > 1)
> -         return lower_subgroup_op_to_scalar(b, intrin);
> +         return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
> +         else if (options->lower_shuffle_to_32bit &&
> intrin->src[0].ssa->bit_size == 64)
> +         return lower_64bit_to_32bit(b, intrin);
>        break;
>
>     case nir_intrinsic_load_subgroup_eq_mask:
> @@ -400,16 +445,18 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>
>     case nir_intrinsic_shuffle:
>        if (options->lower_to_scalar && intrin->num_components > 1)
> -         return lower_subgroup_op_to_scalar(b, intrin);
> +         return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
> +         else if (options->lower_shuffle_to_32bit &&
> intrin->src[0].ssa->bit_size == 64)
>

This needs to be dedented.

> +         return lower_64bit_to_32bit(b, intrin);
>        break;
>
>     case nir_intrinsic_shuffle_xor:
>     case nir_intrinsic_shuffle_up:
>     case nir_intrinsic_shuffle_down:
>        if (options->lower_shuffle)
> -         return lower_shuffle(b, intrin, options->lower_to_scalar);
> +         return lower_shuffle(b, intrin, options);
>        else if (options->lower_to_scalar && intrin->num_components > 1)
> -         return lower_subgroup_op_to_scalar(b, intrin);
> +         return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
>

I think you need an "else if (options->lower_shuffle_to_32bit &&
intrin->src[0].ssa->bit_size == 64)" case here as well.

>        break;
>
>     case nir_intrinsic_quad_broadcast:
> @@ -417,16 +464,18 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>     case nir_intrinsic_quad_swap_vertical:
>     case nir_intrinsic_quad_swap_diagonal:
>        if (options->lower_quad)
> -         return lower_shuffle(b, intrin, options->lower_to_scalar);
> +         return lower_shuffle(b, intrin, options);
>        else if (options->lower_to_scalar && intrin->num_components > 1)
> -         return lower_subgroup_op_to_scalar(b, intrin);
> +         return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
> +         else if (options->lower_shuffle_to_32bit &&
> intrin->src[0].ssa->bit_size == 64)
>

dedent, please.

> +         return lower_64bit_to_32bit(b, intrin);
>        break;
>
>     case nir_intrinsic_reduce:
>     case nir_intrinsic_inclusive_scan:
>     case nir_intrinsic_exclusive_scan:
>        if (options->lower_to_scalar && intrin->num_components > 1)
> -         return lower_subgroup_op_to_scalar(b, intrin);
> +         return lower_subgroup_op_to_scalar(b, intrin, false);
>        break;
>
>     default:
> --
> 2.14.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180316/b235e719/attachment-0001.html>