[Mesa-dev] [PATCH v2 3/7] nir: lower 64bit subgroup shuffle intrinsics
Daniel Schürmann
daniel.schuermann at campus.tu-berlin.de
Fri Mar 16 09:50:24 UTC 2018
Signed-off-by: Daniel Schürmann <daniel.schuermann at campus.tu-berlin.de>
---
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_lower_subgroups.c | 83 +++++++++++++++++++++++++++-------
2 files changed, 67 insertions(+), 17 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 6a51b7c4ab..0e3c026efa 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2564,6 +2564,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_vote_eq_to_ballot:1;
bool lower_subgroup_masks:1;
bool lower_shuffle:1;
+ bool lower_shuffle_to_32bit:1;
bool lower_quad:1;
} nir_lower_subgroups_options;
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c
index 9dc7be7947..669168e830 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -28,6 +28,37 @@
* \file nir_opt_intrinsics.c
*/
+static nir_intrinsic_instr *ac_lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, unsigned int component) {
+ nir_ssa_def *comp;
+ if (component == 0)
+ comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+ else
+ comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
+
+ nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
+ intr->src[0] = nir_src_for_ssa(comp);
+
+ intr->const_index[0] = intrin->const_index[0];
+ intr->const_index[1] = intrin->const_index[1];
+ if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+ intrin->intrinsic == nir_intrinsic_shuffle ||
+ intrin->intrinsic == nir_intrinsic_quad_broadcast) {
+ nir_src_copy(&intr->src[1], &intrin->src[1], intr);
+ }
+ intr->num_components = 1;
+ nir_builder_instr_insert(b, &intr->instr);
+ return intr;
+}
+
+static nir_ssa_def *
+lower_64bit_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin) {
+ assert(intrin->src[0].ssa->bit_size == 64);
+ nir_intrinsic_instr *intr_x = ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 0);
+ nir_intrinsic_instr *intr_y = ac_lower_subgroups_64bit_split_intrinsic(b, intrin, 1);
+ return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
+}
+
static nir_ssa_def *
ballot_type_to_uint(nir_builder *b, nir_ssa_def *value, unsigned bit_size)
{
@@ -80,7 +111,8 @@ uint_to_ballot_type(nir_builder *b, nir_ssa_def *value,
}
static nir_ssa_def *
-lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
+lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin,
+ bool lower_shuffle_to_32bit)
{
/* This is safe to call on scalar things but it would be silly */
assert(intrin->dest.ssa.num_components > 1);
@@ -107,9 +139,12 @@ lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
chan_intrin->const_index[0] = intrin->const_index[0];
chan_intrin->const_index[1] = intrin->const_index[1];
- nir_builder_instr_insert(b, &chan_intrin->instr);
-
- reads[i] = &chan_intrin->dest.ssa;
+ if (lower_shuffle_to_32bit && chan_intrin->src[0].ssa->bit_size == 64) {
+ reads[i] = lower_64bit_to_32bit(b, chan_intrin);
+ } else {
+ nir_builder_instr_insert(b, &chan_intrin->instr);
+ reads[i] = &chan_intrin->dest.ssa;
+ }
}
return nir_vec(b, reads, intrin->num_components);
@@ -158,13 +193,19 @@ lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
1, value->bit_size, NULL);
rfi->num_components = 1;
rfi->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
- nir_builder_instr_insert(b, &rfi->instr);
+ nir_ssa_def *first_lane;
+ if (options->lower_shuffle_to_32bit && rfi->src[0].ssa->bit_size == 64) {
+ first_lane = lower_64bit_to_32bit(b, rfi);
+ } else {
+ nir_builder_instr_insert(b, &rfi->instr);
+ first_lane = &rfi->dest.ssa;
+ }
nir_ssa_def *is_eq;
if (intrin->intrinsic == nir_intrinsic_vote_feq) {
- is_eq = nir_feq(b, &rfi->dest.ssa, nir_channel(b, value, i));
+ is_eq = nir_feq(b, first_lane, nir_channel(b, value, i));
} else {
- is_eq = nir_ieq(b, &rfi->dest.ssa, nir_channel(b, value, i));
+ is_eq = nir_ieq(b, first_lane, nir_channel(b, value, i));
}
if (all_eq == NULL) {
@@ -188,7 +229,7 @@ lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
static nir_ssa_def *
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
- bool lower_to_scalar)
+ const nir_lower_subgroups_options *options)
{
nir_ssa_def *index = nir_load_subgroup_invocation(b);
switch (intrin->intrinsic) {
@@ -240,8 +281,10 @@ lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
- if (lower_to_scalar && shuffle->num_components > 1) {
- return lower_subgroup_op_to_scalar(b, shuffle);
+ if (options->lower_to_scalar && shuffle->num_components > 1) {
+ return lower_subgroup_op_to_scalar(b, shuffle, options->lower_shuffle_to_32bit);
+ } else if (options->lower_shuffle_to_32bit && shuffle->src[0].ssa->bit_size == 64) {
+ return lower_64bit_to_32bit(b, shuffle);
} else {
nir_builder_instr_insert(b, &shuffle->instr);
return &shuffle->dest.ssa;
@@ -279,7 +322,9 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
case nir_intrinsic_read_invocation:
case nir_intrinsic_read_first_invocation:
if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_32bit);
+ else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
+ return lower_64bit_to_32bit(b, intrin);
break;
case nir_intrinsic_load_subgroup_eq_mask:
@@ -400,16 +445,18 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
case nir_intrinsic_shuffle:
if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_32bit);
+ else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
+ return lower_64bit_to_32bit(b, intrin);
break;
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
if (options->lower_shuffle)
- return lower_shuffle(b, intrin, options->lower_to_scalar);
+ return lower_shuffle(b, intrin, options);
else if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_32bit);
break;
case nir_intrinsic_quad_broadcast:
@@ -417,16 +464,18 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal:
if (options->lower_quad)
- return lower_shuffle(b, intrin, options->lower_to_scalar);
+ return lower_shuffle(b, intrin, options);
else if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_32bit);
+ else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
+ return lower_64bit_to_32bit(b, intrin);
break;
case nir_intrinsic_reduce:
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_exclusive_scan:
if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, false);
break;
default:
--
2.14.1
More information about the mesa-dev
mailing list