[Mesa-dev] [PATCH v2 3/7] nir: lower 64bit subgroup shuffle intrinsics
Jason Ekstrand
jason at jlekstrand.net
Fri Mar 16 16:38:16 UTC 2018
On Fri, Mar 16, 2018 at 2:50 AM, Daniel Schürmann <
daniel.schuermann at campus.tu-berlin.de> wrote:
> Signed-off-by: Daniel Schürmann <daniel.schuermann at campus.tu-berlin.de>
> ---
> src/compiler/nir/nir.h | 1 +
> src/compiler/nir/nir_lower_subgroups.c | 83
> +++++++++++++++++++++++++++-------
> 2 files changed, 67 insertions(+), 17 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 6a51b7c4ab..0e3c026efa 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2564,6 +2564,7 @@ typedef struct nir_lower_subgroups_options {
> bool lower_vote_eq_to_ballot:1;
> bool lower_subgroup_masks:1;
> bool lower_shuffle:1;
> + bool lower_shuffle_to_32bit:1;
> bool lower_quad:1;
> } nir_lower_subgroups_options;
>
> diff --git a/src/compiler/nir/nir_lower_subgroups.c
> b/src/compiler/nir/nir_lower_subgroups.c
> index 9dc7be7947..669168e830 100644
> --- a/src/compiler/nir/nir_lower_subgroups.c
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -28,6 +28,37 @@
> * \file nir_opt_intrinsics.c
> */
>
> +static nir_intrinsic_instr *ac_lower_subgroups_64bit_split_intrinsic(nir_builder
> *b, nir_intrinsic_instr *intrin, unsigned int component) {
>
Please put "static nir_intrinsic_instr *" and the "{" each on their own
line and wrap things so that we don't go over 80 characters. Also, please
drop the ac_ prefix as this is no longer in radv code.
> + nir_ssa_def *comp;
> + if (component == 0)
> + comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
> + else
> + comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
> +
> + nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader,
> intrin->intrinsic);
> + nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
> + intr->src[0] = nir_src_for_ssa(comp);
> +
> + intr->const_index[0] = intrin->const_index[0];
> + intr->const_index[1] = intrin->const_index[1];
> + if (intrin->intrinsic == nir_intrinsic_read_invocation ||
> + intrin->intrinsic == nir_intrinsic_shuffle ||
> + intrin->intrinsic == nir_intrinsic_quad_broadcast) {
>
You can use nir_intrinsic_infos[intrin->intrinsic].num_srcs to make this a
bit more general.
> + nir_src_copy(&intr->src[1], &intrin->src[1], intr);
> + }
> + intr->num_components = 1;
> + nir_builder_instr_insert(b, &intr->instr);
> + return intr;
> +}
> +
> +static nir_ssa_def *
> +lower_64bit_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin) {
"{" goes on it's own line. Also, how about "lower_subgroup_op_to_32bit" to
match "lower_subgroup_op_to_scalar" below.
> + assert(intrin->src[0].ssa->bit_size == 64);
> + nir_intrinsic_instr *intr_x = ac_lower_subgroups_64bit_split_intrinsic(b,
> intrin, 0);
> + nir_intrinsic_instr *intr_y = ac_lower_subgroups_64bit_split_intrinsic(b,
> intrin, 1);
> + return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
> +}
> +
> static nir_ssa_def *
> ballot_type_to_uint(nir_builder *b, nir_ssa_def *value, unsigned
> bit_size)
> {
> @@ -80,7 +111,8 @@ uint_to_ballot_type(nir_builder *b, nir_ssa_def *value,
> }
>
> static nir_ssa_def *
> -lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
> +lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin,
> + bool lower_shuffle_to_32bit)
>
Just call this lower_to_32bit as it doesn't necessarily have anything to do
with shuffles. Also, please align the parameter to the (
> {
> /* This is safe to call on scalar things but it would be silly */
> assert(intrin->dest.ssa.num_components > 1);
> @@ -107,9 +139,12 @@ lower_subgroup_op_to_scalar(nir_builder *b,
> nir_intrinsic_instr *intrin)
> chan_intrin->const_index[0] = intrin->const_index[0];
> chan_intrin->const_index[1] = intrin->const_index[1];
>
> - nir_builder_instr_insert(b, &chan_intrin->instr);
> -
> - reads[i] = &chan_intrin->dest.ssa;
> + if (lower_shuffle_to_32bit && chan_intrin->src[0].ssa->bit_size ==
> 64) {
> + reads[i] = lower_64bit_to_32bit(b, chan_intrin);
> + } else {
> + nir_builder_instr_insert(b, &chan_intrin->instr);
> + reads[i] = &chan_intrin->dest.ssa;
> + }
> }
>
> return nir_vec(b, reads, intrin->num_components);
> @@ -158,13 +193,19 @@ lower_vote_eq_to_ballot(nir_builder *b,
> nir_intrinsic_instr *intrin,
> 1, value->bit_size, NULL);
> rfi->num_components = 1;
> rfi->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
> - nir_builder_instr_insert(b, &rfi->instr);
> + nir_ssa_def *first_lane;
> + if (options->lower_shuffle_to_32bit && rfi->src[0].ssa->bit_size
> == 64) {
>
I don't really see how read_first_invocation is related to shuffles
> + first_lane = lower_64bit_to_32bit(b, rfi);
> + } else {
> + nir_builder_instr_insert(b, &rfi->instr);
> + first_lane = &rfi->dest.ssa;
> + }
>
> nir_ssa_def *is_eq;
> if (intrin->intrinsic == nir_intrinsic_vote_feq) {
> - is_eq = nir_feq(b, &rfi->dest.ssa, nir_channel(b, value, i));
> + is_eq = nir_feq(b, first_lane, nir_channel(b, value, i));
> } else {
> - is_eq = nir_ieq(b, &rfi->dest.ssa, nir_channel(b, value, i));
> + is_eq = nir_ieq(b, first_lane, nir_channel(b, value, i));
> }
>
> if (all_eq == NULL) {
> @@ -188,7 +229,7 @@ lower_vote_eq_to_ballot(nir_builder *b,
> nir_intrinsic_instr *intrin,
>
> static nir_ssa_def *
> lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
> - bool lower_to_scalar)
> + const nir_lower_subgroups_options *options)
>
Please align to the (
> {
> nir_ssa_def *index = nir_load_subgroup_invocation(b);
> switch (intrin->intrinsic) {
> @@ -240,8 +281,10 @@ lower_shuffle(nir_builder *b, nir_intrinsic_instr
> *intrin,
> intrin->dest.ssa.num_components,
> intrin->dest.ssa.bit_size, NULL);
>
> - if (lower_to_scalar && shuffle->num_components > 1) {
> - return lower_subgroup_op_to_scalar(b, shuffle);
> + if (options->lower_to_scalar && shuffle->num_components > 1) {
> + return lower_subgroup_op_to_scalar(b, shuffle,
> options->lower_shuffle_to_32bit);
> + } else if (options->lower_shuffle_to_32bit &&
> shuffle->src[0].ssa->bit_size == 64) {
> + return lower_64bit_to_32bit(b, shuffle);
> } else {
> nir_builder_instr_insert(b, &shuffle->instr);
> return &shuffle->dest.ssa;
> @@ -279,7 +322,9 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
> case nir_intrinsic_read_invocation:
> case nir_intrinsic_read_first_invocation:
> if (options->lower_to_scalar && intrin->num_components > 1)
> - return lower_subgroup_op_to_scalar(b, intrin);
> + return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
> + else if (options->lower_shuffle_to_32bit &&
> intrin->src[0].ssa->bit_size == 64)
> + return lower_64bit_to_32bit(b, intrin);
> break;
>
> case nir_intrinsic_load_subgroup_eq_mask:
> @@ -400,16 +445,18 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>
> case nir_intrinsic_shuffle:
> if (options->lower_to_scalar && intrin->num_components > 1)
> - return lower_subgroup_op_to_scalar(b, intrin);
> + return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
> + else if (options->lower_shuffle_to_32bit &&
> intrin->src[0].ssa->bit_size == 64)
>
This needs to be dedented.
> + return lower_64bit_to_32bit(b, intrin);
> break;
>
> case nir_intrinsic_shuffle_xor:
> case nir_intrinsic_shuffle_up:
> case nir_intrinsic_shuffle_down:
> if (options->lower_shuffle)
> - return lower_shuffle(b, intrin, options->lower_to_scalar);
> + return lower_shuffle(b, intrin, options);
> else if (options->lower_to_scalar && intrin->num_components > 1)
> - return lower_subgroup_op_to_scalar(b, intrin);
> + return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
>
I think you need an "else if (options->lower_shuffle_to_32bit &&
intrin->src[0].ssa->bit_size == 64)" case here as well.
> break;
>
> case nir_intrinsic_quad_broadcast:
> @@ -417,16 +464,18 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
> case nir_intrinsic_quad_swap_vertical:
> case nir_intrinsic_quad_swap_diagonal:
> if (options->lower_quad)
> - return lower_shuffle(b, intrin, options->lower_to_scalar);
> + return lower_shuffle(b, intrin, options);
> else if (options->lower_to_scalar && intrin->num_components > 1)
> - return lower_subgroup_op_to_scalar(b, intrin);
> + return lower_subgroup_op_to_scalar(b, intrin,
> options->lower_shuffle_to_32bit);
> + else if (options->lower_shuffle_to_32bit &&
> intrin->src[0].ssa->bit_size == 64)
>
dedent, please.
> + return lower_64bit_to_32bit(b, intrin);
> break;
>
> case nir_intrinsic_reduce:
> case nir_intrinsic_inclusive_scan:
> case nir_intrinsic_exclusive_scan:
> if (options->lower_to_scalar && intrin->num_components > 1)
> - return lower_subgroup_op_to_scalar(b, intrin);
> + return lower_subgroup_op_to_scalar(b, intrin, false);
> break;
>
> default:
> --
> 2.14.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180316/b235e719/attachment-0001.html>
More information about the mesa-dev
mailing list