[Mesa-stable] [BACKPORT staging/18.2] ac: split 16-bit ssbo loads that may not be dword aligned

Juan A. Suarez Romero jasuarez at igalia.com
Wed Dec 19 15:02:11 UTC 2018


On Mon, 2018-12-17 at 14:41 +0000, Rhys Perry wrote:
> This ends up refactoring visit_load_buffer() a little.
> 
> Fixes: 7e7ee826982 ('ac: add support for 16bit buffer loads')
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108114
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> This is a backport of https://patchwork.freedesktop.org/series/54010/ for 18.2.8
> A backport for 18.3.2 is currently being tested.
> 

Thanks for the backport. Enqueued for next 18.2.

	J.A.

>  src/amd/common/ac_llvm_build.c  |  8 ++--
>  src/amd/common/ac_nir_to_llvm.c | 78 ++++++++++++++++-----------------
>  2 files changed, 43 insertions(+), 43 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index c85d2816ba..b3ac78221d 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -2729,9 +2729,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
>  	if (count == num_components)
>  		return value;
>  
> -	LLVMValueRef masks[] = {
> -	    LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
> -	    LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
> +	LLVMValueRef masks[MAX2(count, 2)];
> +	masks[0] = ctx->i32_0;
> +	masks[1] = ctx->i32_1;
> +	for (unsigned i = 2; i < count; i++)
> +		masks[i] = LLVMConstInt(ctx->i32, i, false);
>  
>  	if (count == 1)
>  		return LLVMBuildExtractElement(ctx->builder, value, masks[0],
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2cb08be2b3..1463d09684 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1621,31 +1621,39 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
>  static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
>                                        const nir_intrinsic_instr *instr)
>  {
> -	LLVMValueRef results[2];
> -	int load_bytes;
>  	int elem_size_bytes = instr->dest.ssa.bit_size / 8;
>  	int num_components = instr->num_components;
> -	int num_bytes = num_components * elem_size_bytes;
>  
> -	for (int i = 0; i < num_bytes; i += load_bytes) {
> -		load_bytes = MIN2(num_bytes - i, 16);
> -		const char *load_name;
> -		LLVMTypeRef data_type;
> -		LLVMValueRef offset = get_src(ctx, instr->src[1]);
> -		LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
> -		LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
> -							get_src(ctx, instr->src[0]), false);
> -		LLVMValueRef vindex = ctx->ac.i32_0;
> +	LLVMValueRef offset = get_src(ctx, instr->src[1]);
> +	LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
> +						get_src(ctx, instr->src[0]), false);
> +	LLVMValueRef vindex = ctx->ac.i32_0;
> +
> +	LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
> +	LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
>  
> -		int idx = i ? 1 : 0;
> +	LLVMValueRef results[4];
> +	for (int i = 0; i < num_components;) {
> +		int num_elems = num_components - i;
> +		if (elem_size_bytes < 4)
> +			num_elems = 1;
> +		if (num_elems * elem_size_bytes > 16)
> +			num_elems = 16 / elem_size_bytes;
> +		int load_bytes = num_elems * elem_size_bytes;
> +
> +		LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
> +
> +		LLVMValueRef ret;
>  		if (load_bytes == 2) {
> -			results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
> -								   rsrc,
> -								   vindex,
> -								   offset,
> -								   ctx->ac.i32_0,
> -								   immoffset);
> +			ret = ac_build_tbuffer_load_short(&ctx->ac,
> +							  rsrc,
> +							  vindex,
> +							  offset,
> +							  ctx->ac.i32_0,
> +							  immoffset);
>  		} else {
> +			const char *load_name;
> +			LLVMTypeRef data_type;
>  			switch (load_bytes) {
>  			case 16:
>  			case 12:
> @@ -1671,33 +1679,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
>  				ctx->ac.i1false,
>  				ctx->ac.i1false,
>  			};
> -			results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
> -			unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
> -			LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
> -			results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
> +			ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
>  		}
> -	}
>  
> -	assume(results[0]);
> -	LLVMValueRef ret = results[0];
> -	if (num_bytes > 16 || num_components == 3) {
> -		LLVMValueRef masks[] = {
> -		        LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
> -		        LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
> -		};
> +		LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
> +		ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
> +		ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
>  
> -		if (num_bytes > 16 && num_components == 3) {
> -			/* we end up with a v2i64 and i64 but shuffle fails on that */
> -			results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
> -		}
> +		LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
> +		ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
>  
> -		LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
> -		ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
> -					     results[num_bytes > 16 ? 1 : 0], swizzle, "");
> +		for (unsigned j = 0; j < num_elems; j++) {
> +			results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
> +		}
> +		i += num_elems;
>  	}
>  
> -	return LLVMBuildBitCast(ctx->ac.builder, ret,
> -	                        get_def_type(ctx, &instr->dest.ssa), "");
> +	return ac_build_gather_values(&ctx->ac, results, num_components);
>  }
>  
>  static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,



More information about the mesa-stable mailing list