[Mesa-dev] R600/SI: Support for local memory and derivatives
Tom Stellard
tom at stellard.net
Fri Jun 28 14:37:20 PDT 2013
On Wed, Jun 19, 2013 at 06:28:21PM +0200, Michel Dänzer wrote:
>
> These patches implement enough of local memory support to allow radeonsi
> to use that for computing derivatives, as suggested by Tom.
>
> They also almost allow test/CodeGen/R600/local-memory.ll to generate
> code for SI. Right now it still fails because it tries to copy a VGPR to
> an SGPR, which is not possible.
>
>
Can you add some lit tests for these new intrinsics and also add CHECK
lines for SI to the existing local-memory.ll test.
With the tests added, these patches are:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> --
> Earthling Michel Dänzer | http://www.amd.com
> Libre software enthusiast | Debian, X and DRI developer
> From f4ca359c4536aa53122b654196f2e007d50976f8 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Thu, 21 Feb 2013 16:12:45 +0100
> Subject: [PATCH 1/6] R600/SI: Add intrinsics for texture sampling with user
> derivatives
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 7 ++++++-
> lib/Target/R600/SIIntrinsics.td | 1 +
> 2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 9c96c08..c9eac7d 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -535,7 +535,7 @@ def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
> //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
> def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
> //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
> -//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
> +def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">;
> //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
> def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
> def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
> @@ -1296,6 +1296,11 @@ multiclass SamplePatterns<ValueType addr_type> {
> def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
> def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
> def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
> +
> + def : SamplePattern <int_SI_sampled, IMAGE_SAMPLE_D, addr_type>;
> + def : SampleArrayPattern <int_SI_sampled, IMAGE_SAMPLE_D, addr_type>;
> + def : SampleShadowPattern <int_SI_sampled, IMAGE_SAMPLE_C_D, addr_type>;
> + def : SampleShadowArrayPattern <int_SI_sampled, IMAGE_SAMPLE_C_D, addr_type>;
> }
>
> defm : SamplePatterns<v2i32>;
> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> index 224cd2f..d2643e0 100644
> --- a/lib/Target/R600/SIIntrinsics.td
> +++ b/lib/Target/R600/SIIntrinsics.td
> @@ -23,6 +23,7 @@ let TargetPrefix = "SI", isTarget = 1 in {
>
> def int_SI_sample : Sample;
> def int_SI_sampleb : Sample;
> + def int_SI_sampled : Sample;
> def int_SI_samplel : Sample;
>
> def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
> --
> 1.8.3.1
>
> From 7a0048bb2ab1b661831da2b764bf1a52f66bec15 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Thu, 21 Feb 2013 18:51:38 +0100
> Subject: [PATCH v3 2/6] R600/SI: Initial support for LDS/GDS instructions
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
>
> v3: Drop vdst operand from DS_Store_Helper class, and adapt
> SIInsertWaits::getHwCounts() to handle that. Unfortunately, this seems
> to mess up the asm string output somehow, not sure what's going on
> there.
>
> lib/Target/R600/SIInsertWaits.cpp | 2 ++
> lib/Target/R600/SIInstrFormats.td | 24 ++++++++++++++++++++++++
> lib/Target/R600/SIInstrInfo.td | 23 +++++++++++++++++++++++
> lib/Target/R600/SIInstructions.td | 3 +++
> lib/Target/R600/SILowerControlFlow.cpp | 16 ++++++++++++++++
> 5 files changed, 68 insertions(+)
>
> diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
> index c36e1dc..d31da45 100644
> --- a/lib/Target/R600/SIInsertWaits.cpp
> +++ b/lib/Target/R600/SIInsertWaits.cpp
> @@ -134,6 +134,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
> if (TSFlags & SIInstrFlags::LGKM_CNT) {
>
> MachineOperand &Op = MI.getOperand(0);
> + if (!Op.isReg())
> + Op = MI.getOperand(1);
> assert(Op.isReg() && "First LGKM operand must be a register!");
>
> unsigned Reg = Op.getReg();
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 51f323d..434aa7e 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -281,6 +281,30 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
>
> let Uses = [EXEC] in {
>
> +class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + Enc64 <outs, ins, asm, pattern> {
> +
> + bits<8> vdst;
> + bits<1> gds;
> + bits<8> addr;
> + bits<8> data0;
> + bits<8> data1;
> + bits<8> offset0;
> + bits<8> offset1;
> +
> + let Inst{7-0} = offset0;
> + let Inst{15-8} = offset1;
> + let Inst{17} = gds;
> + let Inst{25-18} = op;
> + let Inst{31-26} = 0x36; //encoding
> + let Inst{39-32} = addr;
> + let Inst{47-40} = data0;
> + let Inst{55-48} = data1;
> + let Inst{63-56} = vdst;
> +
> + let LGKM_CNT = 1;
> +}
> +
> class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> Enc64<outs, ins, asm, pattern> {
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 42fa95f..6abc93e 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -286,6 +286,29 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
> // Vector I/O classes
> //===----------------------------------------------------------------------===//
>
> +class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
> + op,
> + (outs regClass:$vdst),
> + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
> + i8imm:$offset0, i8imm:$offset1),
> + asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
> + []> {
> + let mayLoad = 1;
> + let mayStore = 0;
> +}
> +
> +class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
> + op,
> + (outs),
> + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
> + i8imm:$offset0, i8imm:$offset1),
> + asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
> + []> {
> + let mayStore = 1;
> + let mayLoad = 0;
> + let vdst = 0;
> +}
> +
> class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
> op,
> (outs),
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index c9eac7d..09460d8 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -391,6 +391,9 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
>
> } // End isCompare = 1
>
> +def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
> +def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
> +
> //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
> //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
> //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index 5b434fb..c2e8f02 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -411,6 +411,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> TRI = MF.getTarget().getRegisterInfo();
>
> bool HaveKill = false;
> + bool NeedM0 = false;
> bool NeedWQM = false;
> unsigned Depth = 0;
>
> @@ -482,6 +483,13 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> IndirectDst(MI);
> break;
>
> + case AMDGPU::DS_READ_B32:
> + NeedWQM = true;
> + // Fall through
> + case AMDGPU::DS_WRITE_B32:
> + NeedM0 = true;
> + break;
> +
> case AMDGPU::V_INTERP_P1_F32:
> case AMDGPU::V_INTERP_P2_F32:
> case AMDGPU::V_INTERP_MOV_F32:
> @@ -492,6 +500,14 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> }
> }
>
> + if (NeedM0) {
> + MachineBasicBlock &MBB = MF.front();
> + // Initialize M0 to a value that won't cause LDS access to be discarded
> + // due to offset clamping
> + BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
> + AMDGPU::M0).addImm(0xffffffff);
> + }
> +
> if (NeedWQM) {
> MachineBasicBlock &MBB = MF.front();
> BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
> --
> 1.8.3.1
>
> From e660d22ad2a47d590c4e90f82fdb54848df99681 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Fri, 14 Jun 2013 11:12:53 +0200
> Subject: [PATCH 3/6] R600/SI: Add intrinsic for retrieving the current thread
> ID
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 10 ++++++++--
> lib/Target/R600/SIIntrinsics.td | 1 +
> 2 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 09460d8..61755b4 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -903,8 +903,8 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
> defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
> defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
> //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
> -//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
> -//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
> +defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
> +defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
>
> let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
> defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
> @@ -1575,6 +1575,12 @@ def : Pat <
> (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
> >;
>
> +def : Pat <
> + (int_SI_tid),
> + (V_MBCNT_HI_U32_B32_e32 0xffffffff,
> + (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0))
> +>;
> +
> /********** ================== **********/
> /********** VOP3 Patterns **********/
> /********** ================== **********/
> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> index d2643e0..2fa073e 100644
> --- a/lib/Target/R600/SIIntrinsics.td
> +++ b/lib/Target/R600/SIIntrinsics.td
> @@ -14,6 +14,7 @@
>
> let TargetPrefix = "SI", isTarget = 1 in {
>
> + def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
> def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
> def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
> def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
> --
> 1.8.3.1
>
> From 83945b3ca8825d65adf894612b74eb03b4cc1a85 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Mon, 17 Jun 2013 12:21:29 +0200
> Subject: [PATCH 4/6] R600/SI: Initial local memory support
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/AMDGPUAsmPrinter.cpp | 7 +++++++
> lib/Target/R600/AMDGPUISelLowering.cpp | 4 +---
> lib/Target/R600/R600ISelLowering.cpp | 2 ++
> lib/Target/R600/SIDefines.h | 4 ++++
> lib/Target/R600/SIISelLowering.cpp | 5 +++++
> lib/Target/R600/SIInstructions.td | 15 +++++++++++++++
> 6 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index 996d2a6..e039b77 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -233,7 +233,14 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
>
> OutStreamer.EmitIntValue(RsrcReg, 4);
> OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
> +
> + if (MFI->ShaderType == ShaderType::COMPUTE) {
> + OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
> + OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
> + }
> if (MFI->ShaderType == ShaderType::PIXEL) {
> + OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
> + OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
> OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
> OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
> }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 804f991..f90d084 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -71,8 +71,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::UDIV, MVT::i32, Expand);
> setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
> setOperationAction(ISD::UREM, MVT::i32, Expand);
> -
> - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
> }
>
> //===---------------------------------------------------------------------===//
> @@ -136,7 +134,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
> // XXX: Account for alignment?
> MFI->LDSSize += Size;
>
> - return DAG.getConstant(Offset, MVT::i32);
> + return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
> }
>
> SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index f231926..c71a934 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -106,6 +106,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
> setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
> setTargetDAGCombine(ISD::SELECT_CC);
>
> + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
> +
> setBooleanContents(ZeroOrNegativeOneBooleanContent);
> setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
> setSchedulingPreference(Sched::VLIW);
> diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
> index 716b093..147578c 100644
> --- a/lib/Target/R600/SIDefines.h
> +++ b/lib/Target/R600/SIDefines.h
> @@ -12,11 +12,15 @@
> #define SIDEFINES_H_
>
> #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
> +#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
> +#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
> #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128
> #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228
> #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
> #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
> #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
> +#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
> +#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
> #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
>
> #endif // SIDEFINES_H_
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index d74f401..aa16b70 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -75,6 +75,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>
> setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
>
> + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> +
> setTargetDAGCombine(ISD::SELECT_CC);
>
> setTargetDAGCombine(ISD::SETCC);
> @@ -298,11 +300,14 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
> //===----------------------------------------------------------------------===//
>
> SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> + MachineFunction &MF = DAG.getMachineFunction();
> + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> switch (Op.getOpcode()) {
> default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
> case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
> + case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
> case ISD::INTRINSIC_WO_CHAIN: {
> unsigned IntrinsicID =
> cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 61755b4..e3cfdad 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1590,6 +1590,21 @@ def : Pat <
> (V_MAD_F32 $src0, $src1, $src2)
> >;
>
> +/********** ======================= **********/
> +/********** Load/Store Patterns **********/
> +/********** ======================= **********/
> +
> +def : Pat <
> + (local_load i64:$src0),
> + (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
> + (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
> +>;
> +
> +def : Pat <
> + (local_store i32:$src1, i64:$src0),
> + (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
> +>;
> +
> /********** ================== **********/
> /********** SMRD Patterns **********/
> /********** ================== **********/
> --
> 1.8.3.1
>
> From 06d5a258a018517526f9013ec21eec262074898a Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Wed, 19 Jun 2013 11:01:00 +0200
> Subject: [PATCH 5/6] R600/SI: Add pattern for the AMDGPU.barrier.local
> intrinsic
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index e3cfdad..5a1bf30 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -774,8 +774,17 @@ def S_CBRANCH_EXECNZ : SOPP <
> } // End isBranch = 1
> } // End isTerminator = 1
>
> -//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
> let hasSideEffects = 1 in {
> +def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
> + [(int_AMDGPU_barrier_local)]
> +> {
> + let SIMM16 = 0;
> + let isBarrier = 1;
> + let hasCtrlDep = 1;
> + let mayLoad = 1;
> + let mayStore = 1;
> +}
> +
> def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
> []
> >;
> --
> 1.8.3.1
>
> From 9cc3c1dcf5ebd622f23913526f693531b7441825 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Wed, 19 Jun 2013 11:05:06 +0200
> Subject: [PATCH 6/6] R600/SI: Add pattern for i64 add
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 5a1bf30..48478a5 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -932,6 +932,14 @@ defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
> } // End Uses = [VCC]
> } // End isCommutable = 1, Defs = [VCC]
>
> +// i64 additions aren't supported in hardware, split into two 32bit additions
> +def : Pat <
> + (i64 (add i64:$src0, VReg_64:$src1)),
> + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> + (V_ADD_I32_e32 (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src1, sub0)), sub0),
> + (V_ADDC_U32_e32 (EXTRACT_SUBREG $src0, sub1), (EXTRACT_SUBREG $src1, sub1)), sub1)
> +>;
> +
> defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
> ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
> ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
> --
> 1.8.3.1
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the mesa-dev
mailing list