[Mesa-dev] [PATCH 17/17] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Sat Jun 10 00:04:29 UTC 2017
On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott
<connora at valvesoftware.com> wrote:
> From: Connor Abbott <cwabbott0 at gmail.com>
>
> Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
> ---
> src/amd/common/ac_nir_to_llvm.c | 75 +++++++++++++++++++++++++++++++++++++++++
> src/amd/vulkan/radv_device.c | 8 +++++
> src/amd/vulkan/radv_pipeline.c | 2 ++
> 3 files changed, 85 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 5bbd1c5..111e575 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
> case nir_intrinsic_load_patch_vertices_in:
> result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
> break;
> + case nir_intrinsic_ballot:
> + result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
> + break;
> + case nir_intrinsic_read_first_invocation: {
> + LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> + ac_build_optimization_barrier(&ctx->ac, &src0);
> + LLVMValueRef srcs[1] = { src0 };
> + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readfirstlane",
> + ctx->i32, srcs, 1,
> + AC_FUNC_ATTR_NOUNWIND |
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_CONVERGENT);
> + break;
> + }
> + case nir_intrinsic_read_invocation: {
> + LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> + ac_build_optimization_barrier(&ctx->ac, &src0);
> + LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
> + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
> + ctx->i32, srcs, 2,
> + AC_FUNC_ATTR_NOUNWIND |
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_CONVERGENT);
> + break;
> + }
> + case nir_intrinsic_load_subgroup_invocation:
> + result = ac_get_thread_id(&ctx->ac);
> + break;
> + case nir_intrinsic_load_subgroup_size:
> + result = LLVMConstInt(ctx->i32, 64, 0);
> + break;
> + case nir_intrinsic_all_invocations:
> + result = LLVMBuildSExt(ctx->builder,
> + ac_build_vote_all(&ctx->ac,
> + get_src(ctx, instr->src[0])),
> + ctx->i32, "");
How well does LLVM optimize this? I've always found the boolean as
int32 with -1 and 0 an awkward mapping to LLVM, and am wondering
whether LLVM is able to optimize the SExt away or if a select might be
better.
> + break;
> + case nir_intrinsic_any_invocations:
> + result = LLVMBuildSExt(ctx->builder,
> + ac_build_vote_any(&ctx->ac,
> + get_src(ctx, instr->src[0])),
> + ctx->i32, "");
> + break;
> + case nir_intrinsic_all_invocations_equal:
> + result = LLVMBuildSExt(ctx->builder,
> + ac_build_vote_eq(&ctx->ac,
> + get_src(ctx, instr->src[0])),
> + ctx->i32, "");
> + break;
> + case nir_intrinsic_load_subgroup_eq_mask: {
> + LLVMValueRef id = ac_get_thread_id(&ctx->ac);
> + id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
> + result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
> + break;
> + }
> + case nir_intrinsic_load_subgroup_ge_mask:
> + case nir_intrinsic_load_subgroup_gt_mask:
> + case nir_intrinsic_load_subgroup_le_mask:
> + case nir_intrinsic_load_subgroup_lt_mask: {
> + LLVMValueRef id = ac_get_thread_id(&ctx->ac);
> + if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask ||
> + instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) {
> + /* All bits set except LSB */
> + result = LLVMConstInt(ctx->i64, -2, 0);
> + } else {
> + /* All bits set */
> + result = LLVMConstInt(ctx->i64, -1, 0);
> + }
> + id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
> + result = LLVMBuildShl(ctx->builder, result, id, "");
> + if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask ||
> + instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask)
> + result = LLVMBuildNot(ctx->builder, result, "");
> + break;
> + }
> default:
> fprintf(stderr, "Unknown intrinsic: ");
> nir_print_instr(&instr->instr, stderr);
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index e9bf44c..ea50acc 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -127,6 +127,14 @@ static const VkExtensionProperties common_device_extensions[] = {
> .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
> .specVersion = 1,
> },
> + {
> + .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
> + .specVersion = 1,
> + },
> + {
> + .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
> + .specVersion = 1,
> + },
> };
>
> static VkResult
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 39cbd5a..242890a 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
> .image_write_without_format = true,
> .tessellation = true,
> .int64 = true,
> + .shader_ballot = true,
> + .shader_group_vote = true,
> };
> entry_point = spirv_to_nir(spirv, module->size / 4,
> spec_entries, num_spec_entries,
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list