[Mesa-dev] [PATCH] R600: Add lit tests for texture sampling instruction selection.

Tue Feb 12 06:22:49 PST 2013

On Tue, Feb 12, 2013 at 12:59:14PM +0100, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
> 
> 
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
>  test/CodeGen/R600/llvm.AMDGPU.tex.ll | 42 +++++++++++++++++++++
>  test/CodeGen/R600/llvm.SI.sample.ll  | 71 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 113 insertions(+)
>  create mode 100644 test/CodeGen/R600/llvm.AMDGPU.tex.ll
>  create mode 100644 test/CodeGen/R600/llvm.SI.sample.ll
> 
> diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
> new file mode 100644
> index 0000000..74331fa
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
> @@ -0,0 +1,42 @@
> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
> +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
> +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
> +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
> +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
> +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
> +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
> +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
> +

There should be a space here between the opcode name and the destination
register, but this is a bug in the R600_TEX instruction definition and can
be fixed in a different patch.

> +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
> +   %addr = load <4 x float> addrspace(1)* %in
> +   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
> +   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
> +   %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
> +   %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4)
> +   %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5)
> +   %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6)
> +   %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7)
> +   %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8)
> +   %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9)
> +   %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10)
> +   %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11)
> +   %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12)
> +   %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13)
> +   %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14)
> +   %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15)
> +   %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16)
> +   store <4 x float> %res16, <4 x float> addrspace(1)* %out
> +   ret void
> +}
> +
> +declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
> diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
> new file mode 100644
> index 0000000..34d1935
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.SI.sample.ll
> @@ -0,0 +1,71 @@
> +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
> +
> +;CHECK: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE_C
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE_C
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE_C
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE_C
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE_C
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE_C
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +;CHECK-NEXT: S_WAITCNT 1792
> +;CHECK-NEXT: IMAGE_SAMPLE
> +

We really need to fix instruction defs on SI, so they display the instruction
operands, but again this can be done in another patch.

Thanks for adding more tests!

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

> +define void @test() {
> +   %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 1)
> +   %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 2)
> +   %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 3)
> +   %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 4)
> +   %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 5)
> +   %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 6)
> +   %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 7)
> +   %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 8)
> +   %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 9)
> +   %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 10)
> +   %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 11)
> +   %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 12)
> +   %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 13)
> +   %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 14)
> +   %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 15)
> +   %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
> +      <8 x i32> undef, <4 x i32> undef, i32 16)
> +   ret void
> +}
> +
> +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32)
> -- 
> 1.8.1.3
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev