[Mesa-dev] [PATCH 17/17] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

Nicolai Hähnle nhaehnle at gmail.com
Mon Jun 12 09:29:24 UTC 2017


On 10.06.2017 02:26, Connor Abbott wrote:
> On Fri, Jun 9, 2017 at 5:04 PM, Bas Nieuwenhuizen
> <bas at basnieuwenhuizen.nl> wrote:
>> On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott
>> <connora at valvesoftware.com> wrote:
>>> From: Connor Abbott <cwabbott0 at gmail.com>
>>>
>>> Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
>>> ---
>>>   src/amd/common/ac_nir_to_llvm.c | 75 +++++++++++++++++++++++++++++++++++++++++
>>>   src/amd/vulkan/radv_device.c    |  8 +++++
>>>   src/amd/vulkan/radv_pipeline.c  |  2 ++
>>>   3 files changed, 85 insertions(+)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
>>> index 5bbd1c5..111e575 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
>>>          case nir_intrinsic_load_patch_vertices_in:
>>>                  result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
>>>                  break;
>>> +       case nir_intrinsic_ballot:
>>> +               result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
>>> +               break;
>>> +       case nir_intrinsic_read_first_invocation: {
>>> +               LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>>> +               ac_build_optimization_barrier(&ctx->ac, &src0);
>>> +               LLVMValueRef srcs[1] = { src0 };
>>> +               result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readfirstlane",
>>> +                                           ctx->i32, srcs, 1,
>>> +                                           AC_FUNC_ATTR_NOUNWIND |
>>> +                                           AC_FUNC_ATTR_READNONE |
>>> +                                           AC_FUNC_ATTR_CONVERGENT);
>>> +               break;
>>> +        }
>>> +       case nir_intrinsic_read_invocation: {
>>> +               LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>>> +               ac_build_optimization_barrier(&ctx->ac, &src0);
>>> +               LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
>>> +               result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
>>> +                                           ctx->i32, srcs, 2,
>>> +                                           AC_FUNC_ATTR_NOUNWIND |
>>> +                                           AC_FUNC_ATTR_READNONE |
>>> +                                           AC_FUNC_ATTR_CONVERGENT);
>>> +               break;
>>> +        }
>>> +       case nir_intrinsic_load_subgroup_invocation:
>>> +               result = ac_get_thread_id(&ctx->ac);
>>> +               break;
>>> +       case nir_intrinsic_load_subgroup_size:
>>> +               result = LLVMConstInt(ctx->i32, 64, 0);
>>> +               break;
>>> +       case nir_intrinsic_all_invocations:
>>> +               result = LLVMBuildSExt(ctx->builder,
>>> +                                      ac_build_vote_all(&ctx->ac,
>>> +                                                        get_src(ctx, instr->src[0])),
>>> +                                      ctx->i32, "");
>>
>> How well does LLVM optimize this? I've always found the boolean as
>> int32 with -1 and 0 an awkward mapping to LLVM, and am wondering
>> whether LLVM is able to optimize the SExt away or if a select might be
>> better.
> 
>  From looking at the shader dump of my test, LLVM seems to be able to
> optimize it away. In fact, it's what radeonsi uses for all their
> comparisons (since TGSI also uses -1 for true), so I'd expect it to be
> at least as good as a select; it might be better.
> 
> It might be interesting to make booleans have a bit-size of 1, like in
> LLVM... it would probably require a lot of churn, though.

If NIR ever allows 1-bit channels, it'd make sense. Apart from that, 
it's just not important, because InstCombine seems to be able to 
optimize it all away.

Cheers,
Nicolai



>>
>>
>>> +               break;
>>> +       case nir_intrinsic_any_invocations:
>>> +               result = LLVMBuildSExt(ctx->builder,
>>> +                                      ac_build_vote_any(&ctx->ac,
>>> +                                                        get_src(ctx, instr->src[0])),
>>> +                                      ctx->i32, "");
>>> +               break;
>>> +       case nir_intrinsic_all_invocations_equal:
>>> +               result = LLVMBuildSExt(ctx->builder,
>>> +                                      ac_build_vote_eq(&ctx->ac,
>>> +                                                        get_src(ctx, instr->src[0])),
>>> +                                      ctx->i32, "");
>>> +               break;
>>> +       case nir_intrinsic_load_subgroup_eq_mask: {
>>> +               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
>>> +               id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
>>> +               result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
>>> +               break;
>>> +       }
>>> +       case nir_intrinsic_load_subgroup_ge_mask:
>>> +       case nir_intrinsic_load_subgroup_gt_mask:
>>> +       case nir_intrinsic_load_subgroup_le_mask:
>>> +       case nir_intrinsic_load_subgroup_lt_mask: {
>>> +               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
>>> +               if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask ||
>>> +                   instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) {
>>> +                       /* All bits set except LSB */
>>> +                       result = LLVMConstInt(ctx->i64, -2, 0);
>>> +               } else {
>>> +                       /* All bits set */
>>> +                       result = LLVMConstInt(ctx->i64, -1, 0);
>>> +               }
>>> +               id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
>>> +               result = LLVMBuildShl(ctx->builder, result, id, "");
>>> +               if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask ||
>>> +                   instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask)
>>> +                       result = LLVMBuildNot(ctx->builder, result, "");
>>> +               break;
>>> +       }
>>>          default:
>>>                  fprintf(stderr, "Unknown intrinsic: ");
>>>                  nir_print_instr(&instr->instr, stderr);
>>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>>> index e9bf44c..ea50acc 100644
>>> --- a/src/amd/vulkan/radv_device.c
>>> +++ b/src/amd/vulkan/radv_device.c
>>> @@ -127,6 +127,14 @@ static const VkExtensionProperties common_device_extensions[] = {
>>>                  .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
>>>                  .specVersion = 1,
>>>          },
>>> +       {
>>> +               .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
>>> +               .specVersion = 1,
>>> +       },
>>> +       {
>>> +               .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
>>> +               .specVersion = 1,
>>> +       },
>>>   };
>>>
>>>   static VkResult
>>> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
>>> index 39cbd5a..242890a 100644
>>> --- a/src/amd/vulkan/radv_pipeline.c
>>> +++ b/src/amd/vulkan/radv_pipeline.c
>>> @@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>>>                          .image_write_without_format = true,
>>>                          .tessellation = true,
>>>                          .int64 = true,
>>> +                       .shader_ballot = true,
>>> +                       .shader_group_vote = true,
>>>                  };
>>>                  entry_point = spirv_to_nir(spirv, module->size / 4,
>>>                                             spec_entries, num_spec_entries,
>>> --
>>> 2.9.4
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list