[Mesa-dev] [PATCH 6/6] ac: use llvm.amdgcn.s.buffer.load

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Sun Jan 13 15:16:05 UTC 2019


I think this is done with

https://patchwork.freedesktop.org/series/55025/

?

On Sat, Jan 12, 2019 at 12:53 AM Marek Olšák <maraeo at gmail.com> wrote:
>
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/amd/common/ac_llvm_build.c                 | 18 +++++++++++++-----
>  .../drivers/radeonsi/si_shader_tgsi_mem.c      |  4 ++--
>  2 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 76047148a6a..c0d90ada2be 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
>         if (allow_smem && !glc && !slc) {
>                 assert(vindex == NULL);
>
>                 LLVMValueRef result[8];
>
>                 for (int i = 0; i < num_channels; i++) {
>                         if (i) {
>                                 offset = LLVMBuildAdd(ctx->builder, offset,
>                                                       LLVMConstInt(ctx->i32, 4, 0), "");
>                         }
> -                       LLVMValueRef args[2] = {rsrc, offset};
> -                       result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
> -                                                      ctx->f32, args, 2,
> -                                                      AC_FUNC_ATTR_READNONE |
> -                                                      AC_FUNC_ATTR_LEGACY);
> +
> +                       if (HAVE_LLVM >= 0x0800) {
> +                               LLVMValueRef args[3] = {rsrc, offset, ctx->i32_0};
> +                               result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.i32",
> +                                                              ctx->f32, args, 3,
> +                                                              AC_FUNC_ATTR_READNONE);
> +                       } else {
> +                               LLVMValueRef args[2] = {rsrc, offset};
> +                               result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
> +                                                              ctx->f32, args, 2,
> +                                                              AC_FUNC_ATTR_READNONE |
> +                                                              AC_FUNC_ATTR_LEGACY);
> +                       }
>                 }
>                 if (num_channels == 1)
>                         return result[0];
>
>                 if (num_channels == 3)
>                         result[num_channels++] = LLVMGetUndef(ctx->f32);
>                 return ac_build_gather_values(ctx, result, num_channels);
>         }
>
>         return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index 727def56f65..2f49685c642 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -533,24 +533,24 @@ static void load_emit(
>                                                 info->images_store |
>                                                 info->images_atomic,
>                                                 info->uses_bindless_buffer_store |
>                                                 info->uses_bindless_buffer_atomic,
>                                                 info->uses_bindless_image_store |
>                                                 info->uses_bindless_image_atomic);
>         args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
>
>         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
>                 /* Don't use SMEM for shader buffer loads, because LLVM doesn't
> -                * select SMEM for SI.load.const with a non-constant offset, and
> +                * select SMEM for amdgcn.s.buffer.load with a non-constant offset, and
>                  * constant offsets practically don't exist with shader buffers.
>                  *
> -                * Also, SI.load.const doesn't use inst_offset when it's lowered
> +                * Also, amdgcn.s.buffer.load doesn't use inst_offset when it's lowered
>                  * to VMEM, so we just end up with more VALU instructions in the end
>                  * and no benefit.
>                  *
>                  * TODO: Remove this line once LLVM can select SMEM with a non-constant
>                  *       offset, and can derive inst_offset when VMEM is selected.
>                  *       After that, si_memory_barrier should invalidate sL1 for shader
>                  *       buffers.
>                  */
>                 emit_data->output[emit_data->chan] =
>                         ac_build_buffer_load(&ctx->ac, args.resource,
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list