[Mesa-dev] [PATCH 2/2] ac/nir: Fix vector extraction if source vector has >4 elements.
Timothy Arceri
tarceri at itsqueeze.com
Wed Jan 17 23:42:17 UTC 2018
On 18/01/18 00:34, Bas Nieuwenhuizen wrote:
> Fixes: 91074bb11bda "radv/ac: Implement Float64 SSBO stores."
> ---
> src/amd/common/ac_nir_to_llvm.c | 46 +++++++++++++++++++++++++++--------------
> 1 file changed, 31 insertions(+), 15 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 12f7772a5c..513289c838 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2418,6 +2418,36 @@ static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
> return new_mask;
> }
>
> +static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
> + unsigned start, unsigned count)
> +{
> + LLVMTypeRef type = LLVMTypeOf(src);
> +
> + if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
> + assert(start == 0);
> + assert(count == 1);
> + return src;
> + }
> +
> + unsigned src_elements = LLVMGetVectorSize(type);
> + assert(start < src_elements);
> + assert(start + count <= src_elements);
> +
> + if (start == 0 && count == src_elements)
> + return src;
> +
> + if (count == 1)
> + return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
> +
> + assert(count <= 8);
> + LLVMValueRef indices[8];
> + for (unsigned i = 0; i < count; ++i)
> + indices[i] = LLVMConstInt(ctx->i32, i, false);
> +
> + LLVMValueRef swizzle = LLVMConstVector(indices, count);
> + return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
> +}
> +
> static void visit_store_ssbo(struct ac_nir_context *ctx,
> nir_intrinsic_instr *instr)
> {
> @@ -2467,28 +2497,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
>
> if (count == 4) {
> store_name = "llvm.amdgcn.buffer.store.v4f32";
> - data = base_data;
> } else if (count == 2) {
> - tmp = LLVMBuildExtractElement(ctx->ac.builder,
The tmp declaration needs to be removed above all this to avoid an
unused var warning.
> - base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
> - data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(ctx->ac.v2f32), tmp,
> - ctx->ac.i32_0, "");
> -
> - tmp = LLVMBuildExtractElement(ctx->ac.builder,
> - base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
> - data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
> - ctx->ac.i32_1, "");
> store_name = "llvm.amdgcn.buffer.store.v2f32";
>
> } else {
> assert(count == 1);
> - if (ac_get_llvm_num_components(base_data) > 1)
> - data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
> - LLVMConstInt(ctx->ac.i32, start, false), "");
> - else
> - data = base_data;
> store_name = "llvm.amdgcn.buffer.store.f32";
> }
> + data = extract_vector_range(base_data, start, count);
This needs to be:
data = extract_vector_range(&ctx->ac, base_data, start, count);
>
> offset = base_offset;
> if (start != 0) {
Did you have more changes locally that didn't get sent? The test still
fails for me:
R600_DEBUG=nir
/home/tarceri/git/piglit/bin/arb_gpu_shader_fp64-layout-std140-fp64-shader
-auto -fbo
expected[6] = 10. Read value: 6.3333333333333
expected[7] = 3. Read value: 8
PIGLIT: {"result": "fail" }
Also the std430 layout test has further issues:
R600_DEBUG=nir
/home/tarceri/git/piglit/bin/arb_gpu_shader_fp64-layout-std430-fp64-shader
-auto -fbo
Invalid insertelement operands!
%36 = insertelement <4 x i64> %35, i32 undef, i32 1
Invalid insertelement operands!
%37 = insertelement <4 x i64> %36, i32 undef, i32 2
LLVM ERROR: Broken function found, compilation aborted!
More information about the mesa-dev
mailing list