[Mesa-dev] [PATCH 3/7] radv: add support for 16bit UBO loads
Marek Olšák
maraeo at gmail.com
Thu Feb 8 15:05:07 UTC 2018
On Wed, Feb 7, 2018 at 7:47 PM, Daniel Schürmann
<daniel.schuermann at campus.tu-berlin.de> wrote:
> Signed-off-by: Daniel Schürmann <daniel.schuermann at campus.tu-berlin.de>
> ---
> src/amd/common/ac_llvm_build.c | 25 +++++++++++++++++++++++++
> src/amd/common/ac_llvm_build.h | 8 ++++++++
> src/amd/common/ac_nir_to_llvm.c | 14 ++++++++++++--
> 3 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index d0b288f51d..197dc40eae 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1046,6 +1046,31 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
> can_speculate, true);
> }
>
> +LLVMValueRef
> +ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
> + LLVMValueRef rsrc,
> + LLVMValueRef vindex,
> + LLVMValueRef voffset,
> + LLVMValueRef soffset,
> + LLVMValueRef immoffset)
> +{
> + const char *name = "llvm.amdgcn.tbuffer.load.i32";
> + LLVMTypeRef type = ctx->i32;
> + LLVMValueRef params[] = {
> + rsrc,
> + vindex,
> + voffset,
> + soffset,
> + immoffset,
> + LLVMConstInt(ctx->i32, V_008F0C_BUF_DATA_FORMAT_16, false),
> + LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, false),
> + ctx->i1false,
> + ctx->i1false,
> + };
> + LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
> + return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
> +}
> +
> /**
> * Set range metadata on an instruction. This can only be used on load and
> * call instructions. If you know an instruction can only produce the values
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 47c843fb4b..da49b06502 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -225,6 +225,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
> bool glc,
> bool can_speculate);
>
> +LLVMValueRef
> +ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
> + LLVMValueRef rsrc,
> + LLVMValueRef vindex,
> + LLVMValueRef voffset,
> + LLVMValueRef soffset,
> + LLVMValueRef immoffset);
> +
> LLVMValueRef
> ac_get_thread_id(struct ac_llvm_context *ctx);
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 31f16d3acf..2b48e92949 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2697,9 +2697,19 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
> if (instr->dest.ssa.bit_size == 64)
> num_components *= 2;
>
> - ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
> + if (instr->dest.ssa.bit_size == 16) {
> + LLVMValueRef results[num_components];
> + for (unsigned i = 0; i < num_components; ++i) {
> + results[i] = ac_build_tbuffer_load_short(&ctx->ac, rsrc,
> + ctx->ac.i32_0, ctx->ac.i32_0,
> + offset, LLVMConstInt(ctx->ac.i32, 2 * i, 0));
> + }
For num_components >= 2, it would be better to load it as i32 and then
bitcast to v2i16, assuming the offset is aligned to 4.
Marek
More information about the mesa-dev
mailing list