[Mesa-dev] [PATCH] R600/SI: Split global vector loads with more than 4 elements
Aaron Watry
awatry at gmail.com
Mon Feb 10 18:48:12 PST 2014
Hi Tom,
This definitely fixes some issues that I've been seeing with int8/16
vload8() and vload16() in CL. vstore8/vstore16 are still broken, but
at least the loads are working now (I've only tested int, but I can
give a full test run if you want/need).
For reference, the tests that failed before were in:
piglit/test/cl/program/execute/vload-int.cl
comment out the vload3 tests to get the rest to work. Previously int3,
int8, and int16 all failed, now just the int3 tests fail to build.
If you're curious, the vstore issues start like the following and can
be reproduced with the attached test case:
LLVM ERROR: Cannot select: 0x12fa1b0: v4i32 = extract_subvector
0x1c55240, 0x12f8ba0 [ORD=28] [ID=38]
0x1c55240: v8i32 = BUILD_VECTOR 0x1c55140, 0x12f7a90, 0x12f7f90,
0x12f8490, 0x12f8aa0, 0x12f8fa0, 0x12f94a0, 0x12f9ab0 [ORD=24] [ID=35]
--Aaron
On Mon, Feb 10, 2014 at 3:32 PM, Tom Stellard <tom at stellard.net> wrote:
> From: Tom Stellard <thomas.stellard at amd.com>
>
> ---
> lib/Target/R600/SIISelLowering.cpp | 8 +-
> test/CodeGen/R600/load.ll | 178 +++++++++++++++++++------------------
> 2 files changed, 98 insertions(+), 88 deletions(-)
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 9537405..eb08a13 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -478,9 +478,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> case ISD::LOAD: {
> LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
> - if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
> - Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
> - Op.getValueType().isVector()) {
> + if (Op.getValueType().isVector() &&
> + (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
> + Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
> + (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
> + Op.getValueType().getVectorNumElements() > 4))) {
> SDValue MergedValues[2] = {
> SplitVectorLoad(Op, DAG),
> Load->getChain()
> diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
> index 0153524..1486c4d 100644
> --- a/test/CodeGen/R600/load.ll
> +++ b/test/CodeGen/R600/load.ll
> @@ -1,16 +1,15 @@
> -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
> -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s
> -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
> +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
> +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
> +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
>
> ;===------------------------------------------------------------------------===;
> ; GLOBAL ADDRESS SPACE
> ;===------------------------------------------------------------------------===;
>
> ; Load an i8 value from the global address space.
> -; R600-CHECK-LABEL: @load_i8
> +; FUNC-LABEL: @load_i8
> ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
>
> -; SI-CHECK-LABEL: @load_i8
> ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
> define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> %1 = load i8 addrspace(1)* %in
> @@ -19,13 +18,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i8_sext
> +; FUNC-LABEL: @load_i8_sext
> ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
> ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
> ; R600-CHECK: 24
> ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
> ; R600-CHECK: 24
> -; SI-CHECK-LABEL: @load_i8_sext
> ; SI-CHECK: BUFFER_LOAD_SBYTE
> define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> entry:
> @@ -35,10 +33,9 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i8
> +; FUNC-LABEL: @load_v2i8
> ; R600-CHECK: VTX_READ_8
> ; R600-CHECK: VTX_READ_8
> -; SI-CHECK-LABEL: @load_v2i8
> ; SI-CHECK: BUFFER_LOAD_UBYTE
> ; SI-CHECK: BUFFER_LOAD_UBYTE
> define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
> @@ -49,7 +46,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i8_sext
> +; FUNC-LABEL: @load_v2i8_sext
> ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
> ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
> ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
> @@ -60,7 +57,6 @@ entry:
> ; R600-CHECK-DAG: 24
> ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
> ; R600-CHECK-DAG: 24
> -; SI-CHECK-LABEL: @load_v2i8_sext
> ; SI-CHECK: BUFFER_LOAD_SBYTE
> ; SI-CHECK: BUFFER_LOAD_SBYTE
> define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
> @@ -71,12 +67,11 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i8
> +; FUNC-LABEL: @load_v4i8
> ; R600-CHECK: VTX_READ_8
> ; R600-CHECK: VTX_READ_8
> ; R600-CHECK: VTX_READ_8
> ; R600-CHECK: VTX_READ_8
> -; SI-CHECK-LABEL: @load_v4i8
> ; SI-CHECK: BUFFER_LOAD_UBYTE
> ; SI-CHECK: BUFFER_LOAD_UBYTE
> ; SI-CHECK: BUFFER_LOAD_UBYTE
> @@ -89,7 +84,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i8_sext
> +; FUNC-LABEL: @load_v4i8_sext
> ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
> ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
> ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
> @@ -110,7 +105,6 @@ entry:
> ; R600-CHECK-DAG: 24
> ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
> ; R600-CHECK-DAG: 24
> -; SI-CHECK-LABEL: @load_v4i8_sext
> ; SI-CHECK: BUFFER_LOAD_SBYTE
> ; SI-CHECK: BUFFER_LOAD_SBYTE
> ; SI-CHECK: BUFFER_LOAD_SBYTE
> @@ -124,9 +118,8 @@ entry:
> }
>
> ; Load an i16 value from the global address space.
> -; R600-CHECK-LABEL: @load_i16
> +; FUNC-LABEL: @load_i16
> ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
> -; SI-CHECK-LABEL: @load_i16
> ; SI-CHECK: BUFFER_LOAD_USHORT
> define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
> entry:
> @@ -136,13 +129,12 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i16_sext
> +; FUNC-LABEL: @load_i16_sext
> ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
> ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
> ; R600-CHECK: 16
> ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
> ; R600-CHECK: 16
> -; SI-CHECK-LABEL: @load_i16_sext
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
> entry:
> @@ -152,10 +144,9 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i16
> +; FUNC-LABEL: @load_v2i16
> ; R600-CHECK: VTX_READ_16
> ; R600-CHECK: VTX_READ_16
> -; SI-CHECK-LABEL: @load_v2i16
> ; SI-CHECK: BUFFER_LOAD_USHORT
> ; SI-CHECK: BUFFER_LOAD_USHORT
> define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
> @@ -166,7 +157,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i16_sext
> +; FUNC-LABEL: @load_v2i16_sext
> ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
> ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
> ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
> @@ -177,7 +168,6 @@ entry:
> ; R600-CHECK-DAG: 16
> ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
> ; R600-CHECK-DAG: 16
> -; SI-CHECK-LABEL: @load_v2i16_sext
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
> @@ -188,12 +178,11 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i16
> +; FUNC-LABEL: @load_v4i16
> ; R600-CHECK: VTX_READ_16
> ; R600-CHECK: VTX_READ_16
> ; R600-CHECK: VTX_READ_16
> ; R600-CHECK: VTX_READ_16
> -; SI-CHECK-LABEL: @load_v4i16
> ; SI-CHECK: BUFFER_LOAD_USHORT
> ; SI-CHECK: BUFFER_LOAD_USHORT
> ; SI-CHECK: BUFFER_LOAD_USHORT
> @@ -206,7 +195,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i16_sext
> +; FUNC-LABEL: @load_v4i16_sext
> ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
> ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
> ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
> @@ -227,7 +216,6 @@ entry:
> ; R600-CHECK-DAG: 16
> ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
> ; R600-CHECK-DAG: 16
> -; SI-CHECK-LABEL: @load_v4i16_sext
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> @@ -241,10 +229,9 @@ entry:
> }
>
> ; load an i32 value from the global address space.
> -; R600-CHECK-LABEL: @load_i32
> +; FUNC-LABEL: @load_i32
> ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
>
> -; SI-CHECK-LABEL: @load_i32
> ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
> define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
> entry:
> @@ -254,10 +241,9 @@ entry:
> }
>
> ; load a f32 value from the global address space.
> -; R600-CHECK-LABEL: @load_f32
> +; FUNC-LABEL: @load_f32
> ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
>
> -; SI-CHECK-LABEL: @load_f32
> ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
> define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
> entry:
> @@ -267,10 +253,9 @@ entry:
> }
>
> ; load a v2f32 value from the global address space
> -; R600-CHECK-LABEL: @load_v2f32
> +; FUNC-LABEL: @load_v2f32
> ; R600-CHECK: VTX_READ_64
>
> -; SI-CHECK-LABEL: @load_v2f32
> ; SI-CHECK: BUFFER_LOAD_DWORDX2
> define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
> entry:
> @@ -279,11 +264,10 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i64
> +; FUNC-LABEL: @load_i64
> ; R600-CHECK: MEM_RAT
> ; R600-CHECK: MEM_RAT
>
> -; SI-CHECK-LABEL: @load_i64
> ; SI-CHECK: BUFFER_LOAD_DWORDX2
> define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
> entry:
> @@ -292,13 +276,12 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i64_sext
> +; FUNC-LABEL: @load_i64_sext
> ; R600-CHECK: MEM_RAT
> ; R600-CHECK: MEM_RAT
> ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
> ; R600-CHECK: 31
> -; SI-CHECK-LABEL: @load_i64_sext
> -; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]]
> +; SI-CHECK: BUFFER_LOAD_DWORD
>
> define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
> entry:
> @@ -308,7 +291,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i64_zext
> +; FUNC-LABEL: @load_i64_zext
> ; R600-CHECK: MEM_RAT
> ; R600-CHECK: MEM_RAT
> define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
> @@ -319,18 +302,65 @@ entry:
> ret void
> }
>
> +; FUNC-LABEL: @load_v8i32
> +; R600-CHECK: VTX_READ_128
> +; R600-CHECK: VTX_READ_128
> +; XXX: We should be using DWORDX4 instructions on SI.
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
> +entry:
> + %0 = load <8 x i32> addrspace(1)* %in
> + store <8 x i32> %0, <8 x i32> addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: @load_v16i32
> +; R600-CHECK: VTX_READ_128
> +; R600-CHECK: VTX_READ_128
> +; R600-CHECK: VTX_READ_128
> +; R600-CHECK: VTX_READ_128
> +; XXX: We should be using DWORDX4 instructions on SI.
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +; SI-CHECK: BUFFER_LOAD_DWORD
> +define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
> +entry:
> + %0 = load <16 x i32> addrspace(1)* %in
> + store <16 x i32> %0, <16 x i32> addrspace(1)* %out
> + ret void
> +}
> +
> ;===------------------------------------------------------------------------===;
> ; CONSTANT ADDRESS SPACE
> ;===------------------------------------------------------------------------===;
>
> ; Load a sign-extended i8 value
> -; R600-CHECK-LABEL: @load_const_i8_sext
> +; FUNC-LABEL: @load_const_i8_sext
> ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
> ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
> ; R600-CHECK: 24
> ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
> ; R600-CHECK: 24
> -; SI-CHECK-LABEL: @load_const_i8_sext
> ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
> define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
> entry:
> @@ -341,9 +371,8 @@ entry:
> }
>
> ; Load an aligned i8 value
> -; R600-CHECK-LABEL: @load_const_i8_aligned
> +; FUNC-LABEL: @load_const_i8_aligned
> ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
> -; SI-CHECK-LABEL: @load_const_i8_aligned
> ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
> define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
> entry:
> @@ -354,9 +383,8 @@ entry:
> }
>
> ; Load an un-aligned i8 value
> -; R600-CHECK-LABEL: @load_const_i8_unaligned
> +; FUNC-LABEL: @load_const_i8_unaligned
> ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
> -; SI-CHECK-LABEL: @load_const_i8_unaligned
> ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
> define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
> entry:
> @@ -368,13 +396,12 @@ entry:
> }
>
> ; Load a sign-extended i16 value
> -; R600-CHECK-LABEL: @load_const_i16_sext
> +; FUNC-LABEL: @load_const_i16_sext
> ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
> ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
> ; R600-CHECK: 16
> ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
> ; R600-CHECK: 16
> -; SI-CHECK-LABEL: @load_const_i16_sext
> ; SI-CHECK: BUFFER_LOAD_SSHORT
> define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
> entry:
> @@ -385,9 +412,8 @@ entry:
> }
>
> ; Load an aligned i16 value
> -; R600-CHECK-LABEL: @load_const_i16_aligned
> +; FUNC-LABEL: @load_const_i16_aligned
> ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
> -; SI-CHECK-LABEL: @load_const_i16_aligned
> ; SI-CHECK: BUFFER_LOAD_USHORT
> define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
> entry:
> @@ -398,9 +424,8 @@ entry:
> }
>
> ; Load an un-aligned i16 value
> -; R600-CHECK-LABEL: @load_const_i16_unaligned
> +; FUNC-LABEL: @load_const_i16_unaligned
> ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
> -; SI-CHECK-LABEL: @load_const_i16_unaligned
> ; SI-CHECK: BUFFER_LOAD_USHORT
> define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
> entry:
> @@ -412,10 +437,9 @@ entry:
> }
>
> ; Load an i32 value from the constant address space.
> -; R600-CHECK-LABEL: @load_const_addrspace_i32
> +; FUNC-LABEL: @load_const_addrspace_i32
> ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
>
> -; SI-CHECK-LABEL: @load_const_addrspace_i32
> ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
> define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
> entry:
> @@ -425,10 +449,9 @@ entry:
> }
>
> ; Load a f32 value from the constant address space.
> -; R600-CHECK-LABEL: @load_const_addrspace_f32
> +; FUNC-LABEL: @load_const_addrspace_f32
> ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
>
> -; SI-CHECK-LABEL: @load_const_addrspace_f32
> ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
> define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
> %1 = load float addrspace(2)* %in
> @@ -441,9 +464,8 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
> ;===------------------------------------------------------------------------===;
>
> ; Load an i8 value from the local address space.
> -; R600-CHECK-LABEL: @load_i8_local
> +; FUNC-LABEL: @load_i8_local
> ; R600-CHECK: LDS_UBYTE_READ_RET
> -; SI-CHECK-LABEL: @load_i8_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_U8
> @@ -454,10 +476,9 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i8_sext_local
> +; FUNC-LABEL: @load_i8_sext_local
> ; R600-CHECK: LDS_UBYTE_READ_RET
> ; R600-CHECK: ASHR
> -; SI-CHECK-LABEL: @load_i8_sext_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_I8
> @@ -469,10 +490,9 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i8_local
> +; FUNC-LABEL: @load_v2i8_local
> ; R600-CHECK: LDS_UBYTE_READ_RET
> ; R600-CHECK: LDS_UBYTE_READ_RET
> -; SI-CHECK-LABEL: @load_v2i8_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_U8
> @@ -485,12 +505,11 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i8_sext_local
> +; FUNC-LABEL: @load_v2i8_sext_local
> ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
> ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
> ; R600-CHECK-DAG: ASHR
> ; R600-CHECK-DAG: ASHR
> -; SI-CHECK-LABEL: @load_v2i8_sext_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_I8
> @@ -503,12 +522,11 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i8_local
> +; FUNC-LABEL: @load_v4i8_local
> ; R600-CHECK: LDS_UBYTE_READ_RET
> ; R600-CHECK: LDS_UBYTE_READ_RET
> ; R600-CHECK: LDS_UBYTE_READ_RET
> ; R600-CHECK: LDS_UBYTE_READ_RET
> -; SI-CHECK-LABEL: @load_v4i8_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_U8
> @@ -523,7 +541,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i8_sext_local
> +; FUNC-LABEL: @load_v4i8_sext_local
> ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
> ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
> ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
> @@ -532,7 +550,6 @@ entry:
> ; R600-CHECK-DAG: ASHR
> ; R600-CHECK-DAG: ASHR
> ; R600-CHECK-DAG: ASHR
> -; SI-CHECK-LABEL: @load_v4i8_sext_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_I8
> @@ -548,9 +565,8 @@ entry:
> }
>
> ; Load an i16 value from the local address space.
> -; R600-CHECK-LABEL: @load_i16_local
> +; FUNC-LABEL: @load_i16_local
> ; R600-CHECK: LDS_USHORT_READ_RET
> -; SI-CHECK-LABEL: @load_i16_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_U16
> @@ -562,10 +578,9 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_i16_sext_local
> +; FUNC-LABEL: @load_i16_sext_local
> ; R600-CHECK: LDS_USHORT_READ_RET
> ; R600-CHECK: ASHR
> -; SI-CHECK-LABEL: @load_i16_sext_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_I16
> @@ -577,10 +592,9 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i16_local
> +; FUNC-LABEL: @load_v2i16_local
> ; R600-CHECK: LDS_USHORT_READ_RET
> ; R600-CHECK: LDS_USHORT_READ_RET
> -; SI-CHECK-LABEL: @load_v2i16_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_U16
> @@ -593,12 +607,11 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v2i16_sext_local
> +; FUNC-LABEL: @load_v2i16_sext_local
> ; R600-CHECK-DAG: LDS_USHORT_READ_RET
> ; R600-CHECK-DAG: LDS_USHORT_READ_RET
> ; R600-CHECK-DAG: ASHR
> ; R600-CHECK-DAG: ASHR
> -; SI-CHECK-LABEL: @load_v2i16_sext_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_I16
> @@ -611,12 +624,11 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i16_local
> +; FUNC-LABEL: @load_v4i16_local
> ; R600-CHECK: LDS_USHORT_READ_RET
> ; R600-CHECK: LDS_USHORT_READ_RET
> ; R600-CHECK: LDS_USHORT_READ_RET
> ; R600-CHECK: LDS_USHORT_READ_RET
> -; SI-CHECK-LABEL: @load_v4i16_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_U16
> @@ -631,7 +643,7 @@ entry:
> ret void
> }
>
> -; R600-CHECK-LABEL: @load_v4i16_sext_local
> +; FUNC-LABEL: @load_v4i16_sext_local
> ; R600-CHECK-DAG: LDS_USHORT_READ_RET
> ; R600-CHECK-DAG: LDS_USHORT_READ_RET
> ; R600-CHECK-DAG: LDS_USHORT_READ_RET
> @@ -640,7 +652,6 @@ entry:
> ; R600-CHECK-DAG: ASHR
> ; R600-CHECK-DAG: ASHR
> ; R600-CHECK-DAG: ASHR
> -; SI-CHECK-LABEL: @load_v4i16_sext_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_I16
> @@ -656,9 +667,8 @@ entry:
> }
>
> ; load an i32 value from the local address space.
> -; R600-CHECK-LABEL: @load_i32_local
> +; FUNC-LABEL: @load_i32_local
> ; R600-CHECK: LDS_READ_RET
> -; SI-CHECK-LABEL: @load_i32_local
> ; SI-CHECK-NOT: S_WQM_B64
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_B32
> @@ -670,9 +680,8 @@ entry:
> }
>
> ; load a f32 value from the local address space.
> -; R600-CHECK-LABEL: @load_f32_local
> +; FUNC-LABEL: @load_f32_local
> ; R600-CHECK: LDS_READ_RET
> -; SI-CHECK-LABEL: @load_f32_local
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_B32
> define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
> @@ -683,10 +692,9 @@ entry:
> }
>
> ; load a v2f32 value from the local address space
> -; R600-CHECK-LABEL: @load_v2f32_local
> +; FUNC-LABEL: @load_v2f32_local
> ; R600-CHECK: LDS_READ_RET
> ; R600-CHECK: LDS_READ_RET
> -; SI-CHECK-LABEL: @load_v2f32_local
> ; SI-CHECK: S_MOV_B32 m0
> ; SI-CHECK: DS_READ_B32
> ; SI-CHECK: DS_READ_B32
> --
> 1.8.1.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: store8.ll
Type: application/octet-stream
Size: 3030 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140210/2b334bf9/attachment-0001.obj>
More information about the mesa-dev
mailing list