[Mesa-dev] [PATCH] ac: split 16-bit ssbo loads that may not be dword aligned
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Dec 13 08:38:18 UTC 2018
This refactoring is really not easy to read as is. Can you please split
this in different parts? Maybe refactor first, then fix 16-bit ssbo loads?
If we want this to be backported, we will just need to squash the
different parts and send a separate patch to mesa-stable.
On 12/6/18 12:13 AM, Rhys Perry wrote:
> This ends up refactoring visit_load_buffer() a little.
>
> Fixes: 7e7ee826982 ('ac: add support for 16bit buffer loads')
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108114
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
> Unfortunately I was not able to test this patch on a Polaris due to hardware
> issues. It fixed the deqp-vk tests without regressions on Vega though.
>
> src/amd/common/ac_llvm_build.c | 8 ++--
> src/amd/common/ac_nir_to_llvm.c | 80 ++++++++++++++++-----------------
> src/compiler/nir/nir.h | 2 +-
> 3 files changed, 45 insertions(+), 45 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index abc18da13d..154cc696a2 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -2943,9 +2943,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
> if (count == num_components)
> return value;
>
> - LLVMValueRef masks[] = {
> - ctx->i32_0, ctx->i32_1,
> - LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
> + LLVMValueRef masks[MAX2(count, 2)];
> + masks[0] = ctx->i32_0;
> + masks[1] = ctx->i32_1;
> + for (unsigned i = 2; i < count; i++)
> + masks[i] = LLVMConstInt(ctx->i32, i, false);
>
> if (count == 1)
> return LLVMBuildExtractElement(ctx->builder, value, masks[0],
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index a109f5a815..4a4c09cf5f 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1623,37 +1623,45 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
> static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
> const nir_intrinsic_instr *instr)
> {
> - LLVMValueRef results[2];
> - int load_bytes;
> int elem_size_bytes = instr->dest.ssa.bit_size / 8;
> int num_components = instr->num_components;
> - int num_bytes = num_components * elem_size_bytes;
> enum gl_access_qualifier access = nir_intrinsic_access(instr);
> LLVMValueRef glc = ctx->ac.i1false;
>
> if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
> glc = ctx->ac.i1true;
>
> - for (int i = 0; i < num_bytes; i += load_bytes) {
> - load_bytes = MIN2(num_bytes - i, 16);
> - const char *load_name;
> - LLVMTypeRef data_type;
> - LLVMValueRef offset = get_src(ctx, instr->src[1]);
> - LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
> - LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
> - get_src(ctx, instr->src[0]), false);
> - LLVMValueRef vindex = ctx->ac.i32_0;
> + LLVMValueRef offset = get_src(ctx, instr->src[1]);
> + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
> + get_src(ctx, instr->src[0]), false);
> + LLVMValueRef vindex = ctx->ac.i32_0;
> +
> + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
> + LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
>
> - int idx = i ? 1 : 0;
> + LLVMValueRef results[4];
> + for (int i = 0; i < num_components;) {
> + int num_elems = num_components - i;
> + if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
> + num_elems = 1;
> + if (num_elems * elem_size_bytes > 16)
> + num_elems = 16 / elem_size_bytes;
> + int load_bytes = num_elems * elem_size_bytes;
> +
> + LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
> +
> + LLVMValueRef ret;
> if (load_bytes == 2) {
> - results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
> - rsrc,
> - vindex,
> - offset,
> - ctx->ac.i32_0,
> - immoffset,
> - glc);
> + ret = ac_build_tbuffer_load_short(&ctx->ac,
> + rsrc,
> + vindex,
> + offset,
> + ctx->ac.i32_0,
> + immoffset,
> + glc);
> } else {
> + const char *load_name;
> + LLVMTypeRef data_type;
> switch (load_bytes) {
> case 16:
> case 12:
> @@ -1679,33 +1687,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
> glc,
> ctx->ac.i1false,
> };
> - results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
> - unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
> - LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
> - results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
> + ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
> }
> - }
>
> - assume(results[0]);
> - LLVMValueRef ret = results[0];
> - if (num_bytes > 16 || num_components == 3) {
> - LLVMValueRef masks[] = {
> - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
> - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
> - };
> + LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
> + ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
> + ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
>
> - if (num_bytes > 16 && num_components == 3) {
> - /* we end up with a v2i64 and i64 but shuffle fails on that */
> - results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
> - }
> + LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
> + ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
>
> - LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
> - ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
> - results[num_bytes > 16 ? 1 : 0], swizzle, "");
> + for (unsigned j = 0; j < num_elems; j++) {
> + results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
> + }
> + i += num_elems;
> }
>
> - return LLVMBuildBitCast(ctx->ac.builder, ret,
> - get_def_type(ctx, &instr->dest.ssa), "");
> + return ac_build_gather_values(&ctx->ac, results, num_components);
> }
>
> static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index db935c8496..ba85fc0547 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1376,7 +1376,7 @@ nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
> * to satisfy X % align == 0.
> */
> static inline unsigned
> -nir_intrinsic_align(nir_intrinsic_instr *intrin)
> +nir_intrinsic_align(const nir_intrinsic_instr *intrin)
> {
> const unsigned align_mul = nir_intrinsic_align_mul(intrin);
> const unsigned align_offset = nir_intrinsic_align_offset(intrin);
>
More information about the mesa-dev
mailing list