[Mesa-dev] [PATCH 17/17] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

Connor Abbott cwabbott0 at gmail.com
Sat Jun 10 00:26:31 UTC 2017


On Fri, Jun 9, 2017 at 5:04 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott
> <connora at valvesoftware.com> wrote:
>> From: Connor Abbott <cwabbott0 at gmail.com>
>>
>> Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 75 +++++++++++++++++++++++++++++++++++++++++
>>  src/amd/vulkan/radv_device.c    |  8 +++++
>>  src/amd/vulkan/radv_pipeline.c  |  2 ++
>>  3 files changed, 85 insertions(+)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
>> index 5bbd1c5..111e575 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
>>         case nir_intrinsic_load_patch_vertices_in:
>>                 result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
>>                 break;
>> +       case nir_intrinsic_ballot:
>> +               result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
>> +               break;
>> +       case nir_intrinsic_read_first_invocation: {
>> +               LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>> +               ac_build_optimization_barrier(&ctx->ac, &src0);
>> +               LLVMValueRef srcs[1] = { src0 };
>> +               result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readfirstlane",
>> +                                           ctx->i32, srcs, 1,
>> +                                           AC_FUNC_ATTR_NOUNWIND |
>> +                                           AC_FUNC_ATTR_READNONE |
>> +                                           AC_FUNC_ATTR_CONVERGENT);
>> +               break;
>> +        }
>> +       case nir_intrinsic_read_invocation: {
>> +               LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>> +               ac_build_optimization_barrier(&ctx->ac, &src0);
>> +               LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
>> +               result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
>> +                                           ctx->i32, srcs, 2,
>> +                                           AC_FUNC_ATTR_NOUNWIND |
>> +                                           AC_FUNC_ATTR_READNONE |
>> +                                           AC_FUNC_ATTR_CONVERGENT);
>> +               break;
>> +        }
>> +       case nir_intrinsic_load_subgroup_invocation:
>> +               result = ac_get_thread_id(&ctx->ac);
>> +               break;
>> +       case nir_intrinsic_load_subgroup_size:
>> +               result = LLVMConstInt(ctx->i32, 64, 0);
>> +               break;
>> +       case nir_intrinsic_all_invocations:
>> +               result = LLVMBuildSExt(ctx->builder,
>> +                                      ac_build_vote_all(&ctx->ac,
>> +                                                        get_src(ctx, instr->src[0])),
>> +                                      ctx->i32, "");
>
> How well does LLVM optimize this? I've always found the boolean as
> int32 with -1 and 0 an awkward mapping to LLVM, and am wondering
> whether LLVM is able to optimize the SExt away or if a select might be
> better.

>From looking at the shader dump of my test, LLVM seems to be able to
optimize it away. In fact, it's what radeonsi uses for all their
comparisons (since TGSI also uses -1 for true), so I'd expect it to be
at least as good as a select; it might be better.

It might be interesting to make booleans have a bit-size of 1, like in
LLVM... it would probably require a lot of churn, though.

>
>
>> +               break;
>> +       case nir_intrinsic_any_invocations:
>> +               result = LLVMBuildSExt(ctx->builder,
>> +                                      ac_build_vote_any(&ctx->ac,
>> +                                                        get_src(ctx, instr->src[0])),
>> +                                      ctx->i32, "");
>> +               break;
>> +       case nir_intrinsic_all_invocations_equal:
>> +               result = LLVMBuildSExt(ctx->builder,
>> +                                      ac_build_vote_eq(&ctx->ac,
>> +                                                        get_src(ctx, instr->src[0])),
>> +                                      ctx->i32, "");
>> +               break;
>> +       case nir_intrinsic_load_subgroup_eq_mask: {
>> +               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
>> +               id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
>> +               result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
>> +               break;
>> +       }
>> +       case nir_intrinsic_load_subgroup_ge_mask:
>> +       case nir_intrinsic_load_subgroup_gt_mask:
>> +       case nir_intrinsic_load_subgroup_le_mask:
>> +       case nir_intrinsic_load_subgroup_lt_mask: {
>> +               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
>> +               if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask ||
>> +                   instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) {
>> +                       /* All bits set except LSB */
>> +                       result = LLVMConstInt(ctx->i64, -2, 0);
>> +               } else {
>> +                       /* All bits set */
>> +                       result = LLVMConstInt(ctx->i64, -1, 0);
>> +               }
>> +               id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
>> +               result = LLVMBuildShl(ctx->builder, result, id, "");
>> +               if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask ||
>> +                   instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask)
>> +                       result = LLVMBuildNot(ctx->builder, result, "");
>> +               break;
>> +       }
>>         default:
>>                 fprintf(stderr, "Unknown intrinsic: ");
>>                 nir_print_instr(&instr->instr, stderr);
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index e9bf44c..ea50acc 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -127,6 +127,14 @@ static const VkExtensionProperties common_device_extensions[] = {
>>                 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
>>                 .specVersion = 1,
>>         },
>> +       {
>> +               .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
>> +               .specVersion = 1,
>> +       },
>> +       {
>> +               .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
>> +               .specVersion = 1,
>> +       },
>>  };
>>
>>  static VkResult
>> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
>> index 39cbd5a..242890a 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>>                         .image_write_without_format = true,
>>                         .tessellation = true,
>>                         .int64 = true,
>> +                       .shader_ballot = true,
>> +                       .shader_group_vote = true,
>>                 };
>>                 entry_point = spirv_to_nir(spirv, module->size / 4,
>>                                            spec_entries, num_spec_entries,
>> --
>> 2.9.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list