[Mesa-dev] [PATCH 2/2] ac/nir: Fix vector extraction if source vector has >4 elements.

Timothy Arceri tarceri at itsqueeze.com
Wed Jan 17 23:42:17 UTC 2018


On 18/01/18 00:34, Bas Nieuwenhuizen wrote:
> Fixes: 91074bb11bda "radv/ac: Implement Float64 SSBO stores."
> ---
>   src/amd/common/ac_nir_to_llvm.c | 46 +++++++++++++++++++++++++++--------------
>   1 file changed, 31 insertions(+), 15 deletions(-)
> 
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 12f7772a5c..513289c838 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2418,6 +2418,36 @@ static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
>   	return new_mask;
>   }
>   
> +static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
> +                                         unsigned start, unsigned count)
> +{
> +	LLVMTypeRef type = LLVMTypeOf(src);
> +
> +	if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
> +		assert(start == 0);
> +		assert(count == 1);
> +		return src;
> +	}
> +
> +	unsigned src_elements = LLVMGetVectorSize(type);
> +	assert(start < src_elements);
> +	assert(start + count <= src_elements);
> +
> +	if (start == 0 && count == src_elements)
> +		return src;
> +
> +	if (count == 1)
> +		return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
> +
> +	assert(count <= 8);
> +	LLVMValueRef indices[8];
> +	for (unsigned i = 0; i < count; ++i)
> +		indices[i] = LLVMConstInt(ctx->i32, i, false);
> +
> +	LLVMValueRef swizzle = LLVMConstVector(indices, count);
> +	return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
> +}
> +
>   static void visit_store_ssbo(struct ac_nir_context *ctx,
>                                nir_intrinsic_instr *instr)
>   {
> @@ -2467,28 +2497,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
>   
>   		if (count == 4) {
>   			store_name = "llvm.amdgcn.buffer.store.v4f32";
> -			data = base_data;
>   		} else if (count == 2) {
> -			tmp = LLVMBuildExtractElement(ctx->ac.builder,

The tmp declaration needs to be removed above all this to avoid an 
unused var warning.


> -						      base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
> -			data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(ctx->ac.v2f32), tmp,
> -						      ctx->ac.i32_0, "");
> -
> -			tmp = LLVMBuildExtractElement(ctx->ac.builder,
> -						      base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
> -			data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
> -						      ctx->ac.i32_1, "");
>   			store_name = "llvm.amdgcn.buffer.store.v2f32";
>   
>   		} else {
>   			assert(count == 1);
> -			if (ac_get_llvm_num_components(base_data) > 1)
> -				data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
> -							       LLVMConstInt(ctx->ac.i32, start, false), "");
> -			else
> -				data = base_data;
>   			store_name = "llvm.amdgcn.buffer.store.f32";
>   		}
> +		data = extract_vector_range(base_data, start, count);

This needs to be:

	data = extract_vector_range(&ctx->ac, base_data, start, count);
>   
>   		offset = base_offset;
>   		if (start != 0) {

Did you have more changes locally that didn't get sent? The test still 
fails for me:

R600_DEBUG=nir 
/home/tarceri/git/piglit/bin/arb_gpu_shader_fp64-layout-std140-fp64-shader 
-auto -fbo

expected[6] = 10. Read value: 6.3333333333333
expected[7] = 3. Read value: 8
PIGLIT: {"result": "fail" }


Also the std430 layout test has further issues:

R600_DEBUG=nir 
/home/tarceri/git/piglit/bin/arb_gpu_shader_fp64-layout-std430-fp64-shader 
-auto -fbo

Invalid insertelement operands!
   %36 = insertelement <4 x i64> %35, i32 undef, i32 1
Invalid insertelement operands!
   %37 = insertelement <4 x i64> %36, i32 undef, i32 2
LLVM ERROR: Broken function found, compilation aborted!



More information about the mesa-dev mailing list