[Mesa-dev] [PATCH 3/7] ac: lower 64bit subgroup intrinsics
Daniel Schürmann
daniel.schuermann at campus.tu-berlin.de
Thu Mar 8 17:10:40 UTC 2018
---
src/amd/common/ac_lower_subgroups.c | 50 ++++++++++++++++++++++++++++++++++---
1 file changed, 46 insertions(+), 4 deletions(-)
diff --git a/src/amd/common/ac_lower_subgroups.c b/src/amd/common/ac_lower_subgroups.c
index d0782b481b..2be48e2ba1 100644
--- a/src/amd/common/ac_lower_subgroups.c
+++ b/src/amd/common/ac_lower_subgroups.c
@@ -26,9 +26,45 @@
#include "ac_nir_to_llvm.h"
+static nir_ssa_def *ac_lower_subgroups_64bit(nir_builder *b, nir_intrinsic_instr *intrin) {
+ assert(intrin->src[0].ssa->bit_size == 64);
+ nir_ssa_def * x = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+ nir_ssa_def * y = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
+ nir_intrinsic_instr *intr_x = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
+ nir_intrinsic_instr *intr_y = nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
+ nir_ssa_dest_init(&intr_x->instr, &intr_x->dest, 1, 32, NULL);
+ nir_ssa_dest_init(&intr_y->instr, &intr_y->dest, 1, 32, NULL);
+ intr_x->src[0] = nir_src_for_ssa(x);
+ intr_y->src[0] = nir_src_for_ssa(y);
+ intr_x->const_index[0] = intr_y->const_index[0] = intrin->const_index[0];
+ intr_x->const_index[1] = intr_y->const_index[1] = intrin->const_index[1];
+ if (intrin->intrinsic == nir_intrinsic_read_invocation ||
+ intrin->intrinsic == nir_intrinsic_shuffle ||
+ intrin->intrinsic == nir_intrinsic_quad_broadcast) {
+ nir_src_copy(&intr_x->src[1], &intrin->src[1], intr_x);
+ nir_src_copy(&intr_y->src[1], &intrin->src[1], intr_y);
+ }
+ intr_x->num_components = 1;
+ intr_y->num_components = 1;
+ nir_builder_instr_insert(b, &intr_x->instr);
+ nir_builder_instr_insert(b, &intr_y->instr);
+ return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
+}
+
static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin)
{
switch(intrin->intrinsic) {
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_quad_broadcast:
+ case nir_intrinsic_quad_swap_horizontal:
+ case nir_intrinsic_quad_swap_vertical:
+ case nir_intrinsic_quad_swap_diagonal:
+ if (intrin->src[0].ssa->bit_size == 64)
+ return ac_lower_subgroups_64bit(b, intrin);
+ else
+ return NULL;
case nir_intrinsic_vote_ieq:
case nir_intrinsic_vote_feq: {
nir_intrinsic_instr *rfi =
@@ -37,12 +73,18 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_inst
1, intrin->src[0].ssa->bit_size, NULL);
nir_src_copy(&rfi->src[0], &intrin->src[0], rfi);
rfi->num_components = 1;
-
+ nir_ssa_def *first_lane;
+ if (intrin->src[0].ssa->bit_size == 64) {
+ first_lane = ac_lower_subgroups_64bit(b, rfi);
+ } else {
+ nir_builder_instr_insert(b, &rfi->instr);
+ first_lane = &rfi->dest.ssa;
+ }
nir_ssa_def *is_ne;
if (intrin->intrinsic == nir_intrinsic_vote_feq)
- is_ne = nir_fne(b, &rfi->dest.ssa, intrin->src[0].ssa);
+ is_ne = nir_fne(b, first_lane, intrin->src[0].ssa);
else
- is_ne = nir_ine(b, &rfi->dest.ssa, intrin->src[0].ssa);
+ is_ne = nir_ine(b, first_lane, intrin->src[0].ssa);
nir_intrinsic_instr *ballot =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_ballot);
@@ -50,7 +92,7 @@ static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_inst
1, 64, NULL);
ballot->src[0] = nir_src_for_ssa(is_ne);
ballot->num_components = 1;
-
+ nir_builder_instr_insert(b, &ballot->instr);
return nir_ieq(b, &ballot->dest.ssa, nir_imm_int64(b, 0));
}
default:
--
2.14.1
More information about the mesa-dev
mailing list