Mesa (main): nir: Add support for lowering shuffle to a waterfall loop
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Feb 1 17:01:27 UTC 2022
Module: Mesa
Branch: main
Commit: 503a5bae59712fcc5617d84740439d602e9ba4d2
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=503a5bae59712fcc5617d84740439d602e9ba4d2
Author: Connor Abbott <cwabbott0 at gmail.com>
Date: Tue Jan 4 15:44:31 2022 +0100
nir: Add support for lowering shuffle to a waterfall loop
Qualcomm doesn't natively support shuffle, but it does natively support
relative shuffles where the delta is a constant. Therefore we'll expose
emulated support for both. Add support for this emulation of
subgroupShuffle() to NIR.
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Reviewed-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14412>
---
src/compiler/nir/nir.h | 1 +
src/compiler/nir/nir_lower_subgroups.c | 81 +++++++++++++++++++++++++++++++++-
2 files changed, 81 insertions(+), 1 deletion(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 7c60d29c103..91660a2f836 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4682,6 +4682,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_relative_shuffle:1;
bool lower_shuffle_to_32bit:1;
bool lower_shuffle_to_swizzle_amd:1;
+ bool lower_shuffle:1;
bool lower_quad:1;
bool lower_quad_broadcast_dynamic:1;
bool lower_quad_broadcast_dynamic_to_const:1;
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c
index ed38056f8d1..ec64f94edad 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -305,6 +305,83 @@ lower_to_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
}
}
+static const struct glsl_type *
+glsl_type_for_ssa(nir_ssa_def *def)
+{
+ const struct glsl_type *comp_type = def->bit_size == 1 ? glsl_bool_type() :
+ glsl_uintN_t_type(def->bit_size);
+ return glsl_replace_vector_type(comp_type, def->num_components);
+}
+
+/* Lower nir_intrinsic_shuffle to a waterfall loop + nir_read_invocation.
+ */
+static nir_ssa_def *
+lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+ assert(intrin->src[0].is_ssa);
+ assert(intrin->src[1].is_ssa);
+ nir_ssa_def *val = intrin->src[0].ssa;
+ nir_ssa_def *id = intrin->src[1].ssa;
+
+ /* The loop is something like:
+ *
+ * while (true) {
+ * first_id = readFirstInvocation(gl_SubgroupInvocationID);
+ * first_val = readFirstInvocation(val);
+ * first_result = readInvocation(val, readFirstInvocation(id));
+ * if (id == first_id)
+ * result = first_val;
+ * if (elect()) {
+ * if (id > gl_SubgroupInvocationID) {
+ * result = first_result;
+ * }
+ * break;
+ * }
+ * }
+ *
+ * The idea is to guarantee, on each iteration of the loop, that anything
+ * reading from first_id gets the correct value, so that we can then kill
+ * it off by breaking out of the loop. Before doing that we also have to
+ * ensure that first_id invocation gets the correct value. It only won't be
+ * assigned the correct value already if the invocation it's reading from
+ * isn't already killed off, that is, if it's later than its own ID.
+ * Invocations where id <= gl_SubgroupInvocationID will be assigned their
+ * result in the first if, and invocations where id >
+ * gl_SubgroupInvocationID will be assigned their result in the second if.
+ *
+ * We do this more complicated loop rather than looping over all id's
+ * explicitly because at this point we don't know the "actual" subgroup
+ * size and at the moment there's no way to get at it, which means we may
+ * loop over always-inactive invocations.
+ */
+
+ nir_ssa_def *subgroup_id = nir_load_subgroup_invocation(b);
+
+ nir_variable *result =
+ nir_local_variable_create(b->impl, glsl_type_for_ssa(val), "result");
+
+ nir_loop *loop = nir_push_loop(b); {
+ nir_ssa_def *first_id = nir_read_first_invocation(b, subgroup_id);
+ nir_ssa_def *first_val = nir_read_first_invocation(b, val);
+ nir_ssa_def *first_result =
+ nir_read_invocation(b, val, nir_read_first_invocation(b, id));
+
+ nir_if *nif = nir_push_if(b, nir_ieq(b, id, first_id)); {
+ nir_store_var(b, result, first_val, BITFIELD_MASK(val->num_components));
+ } nir_pop_if(b, nif);
+
+ nir_if *nif2 = nir_push_if(b, nir_elect(b, 1)); {
+ nir_if *nif3 = nir_push_if(b, nir_ult(b, subgroup_id, id)); {
+ nir_store_var(b, result, first_result, BITFIELD_MASK(val->num_components));
+ } nir_pop_if(b, nif3);
+
+ nir_jump(b, nir_jump_break);
+ } nir_pop_if(b, nif2);
+ } nir_pop_loop(b, loop);
+
+ return nir_load_var(b, result);
+}
+
static bool
lower_subgroups_filter(const nir_instr *instr, const void *_options)
{
@@ -702,7 +779,9 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
}
case nir_intrinsic_shuffle:
- if (options->lower_to_scalar && intrin->num_components > 1)
+ if (options->lower_shuffle)
+ return lower_shuffle(b, intrin);
+ else if (options->lower_to_scalar && intrin->num_components > 1)
return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_32bit);
else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)
return lower_subgroup_op_to_32bit(b, intrin);
More information about the mesa-commit
mailing list