[Mesa-dev] [PATCH] radv: apply the indexing workaround for atomic buffer operations on GFX9
Samuel Pitoiset
samuel.pitoiset at gmail.com
Wed May 8 08:56:18 UTC 2019
Err no, my mistake.
I will write a backport.
On 5/8/19 10:54 AM, Samuel Pitoiset wrote:
> You mean 19.1?
>
> On 5/7/19 8:29 PM, Dylan Baker wrote:
>> Hi Samuel,
>>
>> This doesn't apply cleanly on 19.0, and I'm not sure how to resolve
>> the diff.
>> Could you provide a packport please?
>>
>> Thanks,
>> Dylan
>>
>> Quoting Samuel Pitoiset (2019-05-03 02:45:34)
>>> Because the new raw/struct intrinsics are buggy with LLVM 8
>>> (they weren't marked as source of divergence), we fallback to the
>>> old instrinsics for atomic buffer operations. This means we need
>>> to apply the indexing workaround for GFX9.
>>>
>>> The fact that we need another workaround is painful but we should
>>> be able to clean up that a bit once LLVM 7 support will be dropped.
>>>
>>> This fixes a GPU hang with AC Odyssey and some rendering problems
>>> with Nioh.
>>>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573
>>> Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image
>>> atomic intrinsics with LLVM 9+")
>>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>>> ---
>>> src/amd/common/ac_nir_to_llvm.c | 12 +++++++-----
>>> src/amd/common/ac_shader_abi.h | 1 +
>>> src/amd/vulkan/radv_nir_to_llvm.c | 6 ++++++
>>> 3 files changed, 14 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>>> b/src/amd/common/ac_nir_to_llvm.c
>>> index c92eaaca31d..151e0d0f961 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -2417,10 +2417,12 @@ static void get_image_coords(struct
>>> ac_nir_context *ctx,
>>> }
>>> static LLVMValueRef get_image_buffer_descriptor(struct
>>> ac_nir_context *ctx,
>>> - const
>>> nir_intrinsic_instr *instr, bool write)
>>> + const
>>> nir_intrinsic_instr *instr,
>>> + bool write, bool
>>> atomic)
>>> {
>>> LLVMValueRef rsrc = get_image_descriptor(ctx, instr,
>>> AC_DESC_BUFFER, write);
>>> - if (ctx->abi->gfx9_stride_size_workaround) {
>>> + if (ctx->abi->gfx9_stride_size_workaround ||
>>> + (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
>>> LLVMValueRef elem_count =
>>> LLVMBuildExtractElement(ctx->ac.builder, rsrc,
>>> LLVMConstInt(ctx->ac.i32, 2, 0), "");
>>> LLVMValueRef stride =
>>> LLVMBuildExtractElement(ctx->ac.builder, rsrc,
>>> LLVMConstInt(ctx->ac.i32, 1, 0), "");
>>> stride = LLVMBuildLShr(ctx->ac.builder, stride,
>>> LLVMConstInt(ctx->ac.i32, 16, 0), "");
>>> @@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct
>>> ac_nir_context *ctx,
>>> unsigned num_channels = util_last_bit(mask);
>>> LLVMValueRef rsrc, vindex;
>>> - rsrc = get_image_buffer_descriptor(ctx, instr,
>>> false);
>>> + rsrc = get_image_buffer_descriptor(ctx, instr,
>>> false, false);
>>> vindex = LLVMBuildExtractElement(ctx->ac.builder,
>>> get_src(ctx, instr->src[1]),
>>> ctx->ac.i32_0, "");
>>> @@ -2520,7 +2522,7 @@ static void visit_image_store(struct
>>> ac_nir_context *ctx,
>>> args.cache_policy = get_cache_policy(ctx, access, true,
>>> writeonly_memory);
>>> if (dim == GLSL_SAMPLER_DIM_BUF) {
>>> - LLVMValueRef rsrc = get_image_buffer_descriptor(ctx,
>>> instr, true);
>>> + LLVMValueRef rsrc = get_image_buffer_descriptor(ctx,
>>> instr, true, false);
>>> LLVMValueRef src = ac_to_float(&ctx->ac,
>>> get_src(ctx, instr->src[3]));
>>> unsigned src_channels =
>>> ac_get_llvm_num_components(src);
>>> LLVMValueRef vindex;
>>> @@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct
>>> ac_nir_context *ctx,
>>> params[param_count++] = get_src(ctx, instr->src[3]);
>>> if (dim == GLSL_SAMPLER_DIM_BUF) {
>>> - params[param_count++] =
>>> get_image_buffer_descriptor(ctx, instr, true);
>>> + params[param_count++] =
>>> get_image_buffer_descriptor(ctx, instr, true, true);
>>> params[param_count++] =
>>> LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
>>> ctx->ac.i32_0, ""); /* vindex */
>>> params[param_count++] = ctx->ac.i32_0; /* voffset */
>>> diff --git a/src/amd/common/ac_shader_abi.h
>>> b/src/amd/common/ac_shader_abi.h
>>> index 108fe58ce57..8debb1ff986 100644
>>> --- a/src/amd/common/ac_shader_abi.h
>>> +++ b/src/amd/common/ac_shader_abi.h
>>> @@ -203,6 +203,7 @@ struct ac_shader_abi {
>>> /* Whether to workaround GFX9 ignoring the stride for the
>>> buffer size if IDXEN=0
>>> * and LLVM optimizes an indexed load with constant index to
>>> IDXEN=0. */
>>> bool gfx9_stride_size_workaround;
>>> + bool gfx9_stride_size_workaround_for_atomic;
>>> };
>>> #endif /* AC_SHADER_ABI_H */
>>> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
>>> b/src/amd/vulkan/radv_nir_to_llvm.c
>>> index 796d78e34f4..d83f0bd547f 100644
>>> --- a/src/amd/vulkan/radv_nir_to_llvm.c
>>> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
>>> @@ -3687,6 +3687,12 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct
>>> ac_llvm_compiler *ac_llvm,
>>> ctx.abi.clamp_shadow_reference = false;
>>> ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class ==
>>> GFX9 && HAVE_LLVM < 0x800;
>>> + /* Because the new raw/struct atomic intrinsics are buggy
>>> with LLVM 8,
>>> + * we fallback to the old intrinsics for atomic buffer image
>>> operations
>>> + * and thus we need to apply the indexing workaround...
>>> + */
>>> + ctx.abi.gfx9_stride_size_workaround_for_atomic =
>>> ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900;
>>> +
>>> if (shader_count >= 2)
>>> ac_init_exec_full_mask(&ctx.ac);
>>> --
>>> 2.21.0
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list