[Mesa-dev] [PATCH] radv: apply the indexing workaround for atomic buffer operations on GFX9

Fri May 3 13:15:55 UTC 2019

On Fri, May 3, 2019 at 11:42 AM Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
>
> Because the new raw/struct intrinsics are buggy with LLVM 8
> (they weren't marked as source of divergence), we fallback to the
> old instrinsics for atomic buffer operations. This means we need
> to apply the indexing workaround for GFX9.

Can you make it more clear that we only delayed atomics to LLVM 9 and
not load/store. I was confused on why we needed another variable.

Otherwise r-b
>
> The fact that we need another workaround is painful but we should
> be able to clean up that a bit once LLVM 7 support will be dropped.
>
> This fixes a GPU hang with AC Odyssey and some rendering problems
> with Nioh.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573
> Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+")
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/common/ac_nir_to_llvm.c   | 12 +++++++-----
>  src/amd/common/ac_shader_abi.h    |  1 +
>  src/amd/vulkan/radv_nir_to_llvm.c |  6 ++++++
>  3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index c92eaaca31d..151e0d0f961 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2417,10 +2417,12 @@ static void get_image_coords(struct ac_nir_context *ctx,
>  }
>
>  static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
> -                                                const nir_intrinsic_instr *instr, bool write)
> +                                                const nir_intrinsic_instr *instr,
> +                                               bool write, bool atomic)
>  {
>         LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
> -       if (ctx->abi->gfx9_stride_size_workaround) {
> +       if (ctx->abi->gfx9_stride_size_workaround ||
> +           (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
>                 LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
>                 LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
>                 stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
> @@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
>                 unsigned num_channels = util_last_bit(mask);
>                 LLVMValueRef rsrc, vindex;
>
> -               rsrc = get_image_buffer_descriptor(ctx, instr, false);
> +               rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
>                 vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
>                                                  ctx->ac.i32_0, "");
>
> @@ -2520,7 +2522,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
>         args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
>
>         if (dim == GLSL_SAMPLER_DIM_BUF) {
> -               LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
> +               LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
>                 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
>                 unsigned src_channels = ac_get_llvm_num_components(src);
>                 LLVMValueRef vindex;
> @@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
>         params[param_count++] = get_src(ctx, instr->src[3]);
>
>         if (dim == GLSL_SAMPLER_DIM_BUF) {
> -               params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
> +               params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
>                 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
>                                                                 ctx->ac.i32_0, ""); /* vindex */
>                 params[param_count++] = ctx->ac.i32_0; /* voffset */
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index 108fe58ce57..8debb1ff986 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -203,6 +203,7 @@ struct ac_shader_abi {
>         /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
>         * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
>         bool gfx9_stride_size_workaround;
> +       bool gfx9_stride_size_workaround_for_atomic;
>  };
>
>  #endif /* AC_SHADER_ABI_H */
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
> index 796d78e34f4..d83f0bd547f 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -3687,6 +3687,12 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
>         ctx.abi.clamp_shadow_reference = false;
>         ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
>
> +       /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
> +        * we fallback to the old intrinsics for atomic buffer image operations
> +        * and thus we need to apply the indexing workaround...
> +        */
> +       ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900;
> +
>         if (shader_count >= 2)
>                 ac_init_exec_full_mask(&ctx.ac);
>
> --
> 2.21.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev