[Mesa-dev] [PATCH 3/8] ac: add support for 16bit UBO loads

Marek Olšák maraeo at gmail.com
Tue Jul 24 00:45:45 UTC 2018


On Thu, Jul 19, 2018 at 8:48 AM, Daniel Schürmann
<daniel.schuermann at campus.tu-berlin.de> wrote:
> Signed-off-by: Daniel Schürmann <daniel.schuermann at campus.tu-berlin.de>
> ---
>  src/amd/common/ac_llvm_build.c  | 25 +++++++++++++++++++++++++
>  src/amd/common/ac_llvm_build.h  |  8 ++++++++
>  src/amd/common/ac_nir_to_llvm.c | 21 ++++++++++++++++++---
>  3 files changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 4078b005e5..54b7e98701 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1103,6 +1103,31 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
>                                            can_speculate, true);
>  }
>
> +LLVMValueRef
> +ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
> +                           LLVMValueRef rsrc,
> +                           LLVMValueRef vindex,
> +                           LLVMValueRef voffset,
> +                               LLVMValueRef soffset,
> +                               LLVMValueRef immoffset)
> +{
> +       const char *name = "llvm.amdgcn.tbuffer.load.i32";
> +       LLVMTypeRef type = ctx->i32;
> +       LLVMValueRef params[] = {
> +                               rsrc,
> +                               vindex,
> +                               voffset,
> +                               soffset,
> +                               immoffset,
> +                               LLVMConstInt(ctx->i32, V_008F0C_BUF_DATA_FORMAT_16, false),
> +                               LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, false),
> +                               ctx->i1false,
> +                               ctx->i1false,
> +       };
> +       LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
> +       return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
> +}
> +
>  /**
>   * Set range metadata on an instruction.  This can only be used on load and
>   * call instructions.  If you know an instruction can only produce the values
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 4e7cbcd5fa..c5753037e7 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -252,6 +252,14 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
>                                                    bool glc,
>                                                    bool can_speculate);
>
> +LLVMValueRef
> +ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
> +                           LLVMValueRef rsrc,
> +                           LLVMValueRef vindex,
> +                           LLVMValueRef voffset,
> +                               LLVMValueRef soffset,
> +                               LLVMValueRef immoffset);
> +
>  LLVMValueRef
>  ac_get_thread_id(struct ac_llvm_context *ctx);
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 43a0b86420..d7a52a536c 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1663,9 +1663,24 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
>         if (instr->dest.ssa.bit_size == 64)
>                 num_components *= 2;
>
> -       ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
> -                                  NULL, 0, false, false, true, true);
> -       ret = ac_trim_vector(&ctx->ac, ret, num_components);
> +       if (instr->dest.ssa.bit_size == 16) {
> +               LLVMValueRef results[num_components];
> +               for (unsigned i = 0; i < num_components; ++i) {
> +                       results[i] = ac_build_tbuffer_load_short(&ctx->ac,
> +                                                                rsrc,
> +                                                                ctx->ac.i32_0,
> +                                                                offset,
> +                                                                ctx->ac.i32_0,
> +                                                                LLVMConstInt(ctx->ac.i32, 2 * i, 0));
> +               }

FYI, tbuffer.load is significantly slower than SI.load.const. If the
offset is aligned to 4, SI.load.const + FPExt or ZExt/SExt would be
faster.

Marek

> +               ret = ac_build_gather_values(&ctx->ac, results, num_components);
> +       } else {
> +               ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
> +                                          NULL, 0, false, false, true, true);
> +
> +               ret = ac_trim_vector(&ctx->ac, ret, num_components);
> +       }
> +
>         return LLVMBuildBitCast(ctx->ac.builder, ret,
>                                 get_def_type(ctx, &instr->dest.ssa), "");
>  }
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list