[Mesa-dev] R600: Various improvements
Tom Stellard
tom at stellard.net
Mon May 13 07:35:54 PDT 2013
On Sun, May 12, 2013 at 07:41:21AM -0700, Vincent Lejeune wrote:
> Hi,
> Patches 2 and 3 factorizes some code from the backend. Patch 3 should avoid some recomputation too, which shouldn't hurt.
> Patch 4 and 5 rework how textures are handled in our backend. It replaces TGSI like intrinsic (ie intrinsic that uses last argument as TextureTarget which has no sense
> from hw pov) to intrinsic closer to hw. The pass could be done in mesa but I rather have it in llvm for now to ensure backward compatibility with llvm 3.3.
>
Hi Vincent,
Just some small comments on patches 4 and 5. With those changes,
this series is:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> From 3974315f153e67913f8cc4b4d52550bf6ab33e59 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sun, 12 May 2013 16:29:50 +0200
> Subject: [PATCH 4/5] R600: Rename 128 bit registers.
>
> ---
> lib/Target/R600/R600Instructions.td | 17 ++++++++---------
> lib/Target/R600/R600RegisterInfo.td | 2 +-
> 2 files changed, 9 insertions(+), 10 deletions(-)
>
What is the reason for renaming these registers? Could you add an
explanation to the commit message?
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 86e4b4a..abaa94b 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -1750,8 +1750,7 @@ let usesCustomInserter = 1 in {
>
> class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
> list<dag> pattern>
> - : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
> - !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
> + : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, name, pattern> {
> let RIM = 0;
> // XXX: Have a separate instruction for non-indexed writes.
> let TYPE = 1;
> @@ -1771,19 +1770,19 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
> // 32-bit store
> def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
> (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
> - 0x1, "RAT_WRITE_CACHELESS_32_eg",
> + 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop",
> [(global_store i32:$rw_gpr, i32:$index_gpr)]
> >;
>
> //128-bit store
> def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
> (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
> - 0xf, "RAT_WRITE_CACHELESS_128",
> + 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop",
> [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
> >;
>
> class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
> - : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
> + : InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>,
> VTX_WORD1_GPR, VTX_WORD0 {
>
> // Static fields
> @@ -1838,7 +1837,7 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
> }
>
> class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
> - : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
> + : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
> pattern> {
>
> let MEGA_FETCH_COUNT = 1;
> @@ -1850,7 +1849,7 @@ class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
> }
>
> class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
> - : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
> + : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
> pattern> {
> let MEGA_FETCH_COUNT = 2;
> let DST_SEL_X = 0;
> @@ -1862,7 +1861,7 @@ class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
> }
>
> class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
> - : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
> + : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
> pattern> {
>
> let MEGA_FETCH_COUNT = 4;
> @@ -1883,7 +1882,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
> }
>
> class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
> - : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
> + : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst),
> pattern> {
>
> let MEGA_FETCH_COUNT = 16;
> diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
> index bfc546b..df6004b 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -35,7 +35,7 @@ foreach Index = 0-127 in {
> Chan>;
> }
> // 128-bit Temporary Registers
> - def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
> + def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
> [!cast<Register>("T"#Index#"_X"),
> !cast<Register>("T"#Index#"_Y"),
> !cast<Register>("T"#Index#"_Z"),
> --
> 1.8.2.1
>
> From 6840d3e3995283e98cd535db36ba24364f690072 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Mon, 6 May 2013 20:05:16 +0200
> Subject: [PATCH 5/5] R600: Improve texture handling
>
> ---
> lib/Target/R600/AMDGPU.h | 1 +
> lib/Target/R600/AMDGPUISelLowering.h | 1 +
> lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +
> lib/Target/R600/CMakeLists.txt | 1 +
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 45 ++++
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 2 +
> lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 95 ++-----
> lib/Target/R600/R600ISelLowering.cpp | 260 +++++++++++++++++--
> lib/Target/R600/R600Instructions.td | 196 +++++++-------
> lib/Target/R600/R600Intrinsics.td | 130 ++++++++++
> lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 286 +++++++++++++++++++++
> test/CodeGen/R600/llvm.AMDGPU.tex.ll | 32 +--
> 12 files changed, 834 insertions(+), 217 deletions(-)
> create mode 100644 lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
>
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index 9792bd8..f9d70c9 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -21,6 +21,7 @@ class FunctionPass;
> class AMDGPUTargetMachine;
>
> // R600 Passes
> +FunctionPass* createR600TextureIntrinsicsReplacer();
> FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
> FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
> FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> index c2a79ea..f108fbc 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -126,6 +126,7 @@ enum {
> SMIN,
> UMIN,
> URECIP,
> + TEXTURE_FETCH,
> EXPORT,
> CONST_ADDRESS,
> REGISTER_LOAD,
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 0ec67ce..9e0edfb 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -110,6 +110,8 @@ AMDGPUPassConfig::addPreISel() {
> if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
> addPass(createAMDGPUStructurizeCFGPass());
> addPass(createSIAnnotateControlFlowPass());
> + } else {
> + addPass(createR600TextureIntrinsicsReplacer());
> }
> return false;
> }
> diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
> index 2ad2047..a4f281a 100644
> --- a/lib/Target/R600/CMakeLists.txt
> +++ b/lib/Target/R600/CMakeLists.txt
> @@ -44,6 +44,7 @@ add_llvm_target(R600CodeGen
> R600MachineScheduler.cpp
> R600Packetizer.cpp
> R600RegisterInfo.cpp
> + R600TextureIntrinsicsReplacer.cpp
> SIAnnotateControlFlow.cpp
> SIInsertWaits.cpp
> SIInstrInfo.cpp
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index bbc6cc5..8de644b 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -198,6 +198,51 @@ void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
> return;
> }
>
> +void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
> + raw_ostream &O) {
> + unsigned Sel = MI->getOperand(OpNo).getImm();
> + switch (Sel) {
> + case 0:
> + O << "X";
> + break;
> + case 1:
> + O << "Y";
> + break;
> + case 2:
> + O << "Z";
> + break;
> + case 3:
> + O << "W";
> + break;
> + case 4:
> + O << "0";
> + break;
> + case 5:
> + O << "1";
> + break;
> + case 7:
> + O << "_";
> + break;
> + default:
> + break;
> + }
> +}
> +
> +void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
> + raw_ostream &O) {
> + unsigned CT = MI->getOperand(OpNo).getImm();
> + switch (CT) {
> + case 0:
> + O << "U";
> + break;
> + case 1:
> + O << "N";
> + break;
> + default:
> + break;
> + }
> +}
> +
> void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
> raw_ostream &O) {
> int KCacheMode = MI->getOperand(OpNo).getImm();
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index c6fd053..4c1dfa6 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -49,6 +49,8 @@ private:
> void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> + void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> + void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> };
>
> diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> index 271a974..7bc200a 100644
> --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -82,21 +82,6 @@ enum FCInstr {
> FC_CONTINUE
> };
>
> -enum TextureTypes {
> - TEXTURE_1D = 1,
> - TEXTURE_2D,
> - TEXTURE_3D,
> - TEXTURE_CUBE,
> - TEXTURE_RECT,
> - TEXTURE_SHADOW1D,
> - TEXTURE_SHADOW2D,
> - TEXTURE_SHADOWRECT,
> - TEXTURE_1D_ARRAY,
> - TEXTURE_2D_ARRAY,
> - TEXTURE_SHADOW1D_ARRAY,
> - TEXTURE_SHADOW2D_ARRAY
> -};
> -
> MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
> const MCRegisterInfo &MRI,
> const MCSubtargetInfo &STI,
> @@ -122,63 +107,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> Emit(InstWord2, OS);
> Emit((u_int32_t) 0, OS);
> } else if (IS_TEX(Desc)) {
> - unsigned Opcode = MI.getOpcode();
> - bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
> - unsigned OpOffset = HasOffsets ? 3 : 0;
> - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
> - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
> -
> - uint32_t SrcSelect[4] = {0, 1, 2, 3};
> - uint32_t Offsets[3] = {0, 0, 0};
> - uint64_t CoordType[4] = {1, 1, 1, 1};
> -
> - if (HasOffsets)
> - for (unsigned i = 0; i < 3; i++) {
> - int SignedOffset = MI.getOperand(i + 2).getImm();
> - Offsets[i] = (SignedOffset & 0x1F);
> - }
> -
> - if (TextureType == TEXTURE_RECT ||
> - TextureType == TEXTURE_SHADOWRECT) {
> - CoordType[ELEMENT_X] = 0;
> - CoordType[ELEMENT_Y] = 0;
> - }
> -
> - if (TextureType == TEXTURE_1D_ARRAY ||
> - TextureType == TEXTURE_SHADOW1D_ARRAY) {
> - if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
> - Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
> - CoordType[ELEMENT_Y] = 0;
> - } else {
> - CoordType[ELEMENT_Z] = 0;
> - SrcSelect[ELEMENT_Z] = ELEMENT_Y;
> - }
> - } else if (TextureType == TEXTURE_2D_ARRAY ||
> - TextureType == TEXTURE_SHADOW2D_ARRAY) {
> - CoordType[ELEMENT_Z] = 0;
> - }
> -
> -
> - if ((TextureType == TEXTURE_SHADOW1D ||
> - TextureType == TEXTURE_SHADOW2D ||
> - TextureType == TEXTURE_SHADOWRECT ||
> - TextureType == TEXTURE_SHADOW1D_ARRAY) &&
> - Opcode != AMDGPU::TEX_SAMPLE_C_L &&
> - Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
> - SrcSelect[ELEMENT_W] = ELEMENT_Z;
> - }
> -
> - uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
> - CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
> - CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
> - uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
> - SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
> - SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
> - Offsets[2] << 10;
> -
> - Emit(Word01, OS);
> - Emit(Word2, OS);
> - Emit((u_int32_t) 0, OS);
> + int64_t Sampler = MI.getOperand(14).getImm();
> +
> + uint32_t SrcSelect[4] = {
> + MI.getOperand(2).getImm(),
> + MI.getOperand(3).getImm(),
> + MI.getOperand(4).getImm(),
> + MI.getOperand(5).getImm()
> + };
> + uint32_t Offsets[3] = {
> + MI.getOperand(6).getImm() & 0x1F,
> + MI.getOperand(7).getImm() & 0x1F,
> + MI.getOperand(8).getImm() & 0x1F
> + };
> +
> + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups);
> + uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
> + SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
> + SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
> + Offsets[2] << 10;
> +
> + Emit(Word01, OS);
> + Emit(Word2, OS);
> + Emit((u_int32_t) 0, OS);
> } else {
> uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
> Emit(Inst, OS);
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index c6e2136..d232022 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -179,23 +179,99 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
> case AMDGPU::TXD: {
> unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
> unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
> -
> + MachineOperand &RID = MI->getOperand(4);
> + MachineOperand &SID = MI->getOperand(5);
> + unsigned TextureId = MI->getOperand(6).getImm();
> + unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
> + unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
> +
> + switch (TextureId) {
> + case 5: // Rect
> + CTX = CTY = 0;
> + break;
> + case 6: // Shadow1D
> + SrcW = SrcZ;
> + break;
> + case 7: // Shadow2D
> + SrcW = SrcZ;
> + break;
> + case 8: // ShadowRect
> + CTX = CTY = 0;
> + SrcW = SrcZ;
> + break;
> + case 9: // 1DArray
> + SrcZ = SrcY;
> + CTZ = 0;
> + break;
> + case 10: // 2DArray
> + CTZ = 0;
> + break;
> + case 11: // Shadow1DArray
> + SrcZ = SrcY;
> + CTZ = 0;
> + break;
> + case 12: // Shadow2DArray
> + CTZ = 0;
> + break;
> + }
> BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
> .addOperand(MI->getOperand(3))
> - .addOperand(MI->getOperand(4))
> - .addOperand(MI->getOperand(5))
> - .addOperand(MI->getOperand(6));
> + .addImm(SrcX)
> + .addImm(SrcY)
> + .addImm(SrcZ)
> + .addImm(SrcW)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(1)
> + .addImm(2)
> + .addImm(3)
> + .addOperand(RID)
> + .addOperand(SID)
> + .addImm(CTX)
> + .addImm(CTY)
> + .addImm(CTZ)
> + .addImm(CTW);
> BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
> .addOperand(MI->getOperand(2))
> - .addOperand(MI->getOperand(4))
> - .addOperand(MI->getOperand(5))
> - .addOperand(MI->getOperand(6));
> + .addImm(SrcX)
> + .addImm(SrcY)
> + .addImm(SrcZ)
> + .addImm(SrcW)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(1)
> + .addImm(2)
> + .addImm(3)
> + .addOperand(RID)
> + .addOperand(SID)
> + .addImm(CTX)
> + .addImm(CTY)
> + .addImm(CTZ)
> + .addImm(CTW);
> BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
> .addOperand(MI->getOperand(0))
> .addOperand(MI->getOperand(1))
> - .addOperand(MI->getOperand(4))
> - .addOperand(MI->getOperand(5))
> - .addOperand(MI->getOperand(6))
> + .addImm(SrcX)
> + .addImm(SrcY)
> + .addImm(SrcZ)
> + .addImm(SrcW)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(1)
> + .addImm(2)
> + .addImm(3)
> + .addOperand(RID)
> + .addOperand(SID)
> + .addImm(CTX)
> + .addImm(CTY)
> + .addImm(CTZ)
> + .addImm(CTW)
> .addReg(T0, RegState::Implicit)
> .addReg(T1, RegState::Implicit);
> break;
> @@ -204,23 +280,100 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
> case AMDGPU::TXD_SHADOW: {
> unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
> unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
> + MachineOperand &RID = MI->getOperand(4);
> + MachineOperand &SID = MI->getOperand(5);
> + unsigned TextureId = MI->getOperand(6).getImm();
> + unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
> + unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
> +
> + switch (TextureId) {
> + case 5: // Rect
> + CTX = CTY = 0;
> + break;
> + case 6: // Shadow1D
> + SrcW = SrcZ;
> + break;
> + case 7: // Shadow2D
> + SrcW = SrcZ;
> + break;
> + case 8: // ShadowRect
> + CTX = CTY = 0;
> + SrcW = SrcZ;
> + break;
> + case 9: // 1DArray
> + SrcZ = SrcY;
> + CTZ = 0;
> + break;
> + case 10: // 2DArray
> + CTZ = 0;
> + break;
> + case 11: // Shadow1DArray
> + SrcZ = SrcY;
> + CTZ = 0;
> + break;
> + case 12: // Shadow2DArray
> + CTZ = 0;
> + break;
> + }
>
> BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
> .addOperand(MI->getOperand(3))
> - .addOperand(MI->getOperand(4))
> - .addOperand(MI->getOperand(5))
> - .addOperand(MI->getOperand(6));
> + .addImm(SrcX)
> + .addImm(SrcY)
> + .addImm(SrcZ)
> + .addImm(SrcW)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(1)
> + .addImm(2)
> + .addImm(3)
> + .addOperand(RID)
> + .addOperand(SID)
> + .addImm(CTX)
> + .addImm(CTY)
> + .addImm(CTZ)
> + .addImm(CTW);
> BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
> .addOperand(MI->getOperand(2))
> - .addOperand(MI->getOperand(4))
> - .addOperand(MI->getOperand(5))
> - .addOperand(MI->getOperand(6));
> + .addImm(SrcX)
> + .addImm(SrcY)
> + .addImm(SrcZ)
> + .addImm(SrcW)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(1)
> + .addImm(2)
> + .addImm(3)
> + .addOperand(RID)
> + .addOperand(SID)
> + .addImm(CTX)
> + .addImm(CTY)
> + .addImm(CTZ)
> + .addImm(CTW);
> BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
> .addOperand(MI->getOperand(0))
> .addOperand(MI->getOperand(1))
> - .addOperand(MI->getOperand(4))
> - .addOperand(MI->getOperand(5))
> - .addOperand(MI->getOperand(6))
> + .addImm(SrcX)
> + .addImm(SrcY)
> + .addImm(SrcZ)
> + .addImm(SrcW)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(0)
> + .addImm(1)
> + .addImm(2)
> + .addImm(3)
> + .addOperand(RID)
> + .addOperand(SID)
> + .addImm(CTX)
> + .addImm(CTY)
> + .addImm(CTZ)
> + .addImm(CTW)
> .addReg(T0, RegState::Implicit)
> .addReg(T1, RegState::Implicit);
> break;
> @@ -400,6 +553,75 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>
> return SDValue(interp, slot % 2);
> }
> + case AMDGPUIntrinsic::R600_tex:
> + case AMDGPUIntrinsic::R600_texc:
> + case AMDGPUIntrinsic::R600_txl:
> + case AMDGPUIntrinsic::R600_txlc:
> + case AMDGPUIntrinsic::R600_txb:
> + case AMDGPUIntrinsic::R600_txbc:
> + case AMDGPUIntrinsic::R600_txf:
> + case AMDGPUIntrinsic::R600_txq:
> + case AMDGPUIntrinsic::R600_ddx:
> + case AMDGPUIntrinsic::R600_ddy: {
> + unsigned TextureOp;
> + switch (IntrinsicID) {
> + case AMDGPUIntrinsic::R600_tex:
> + TextureOp = 0;
> + break;
> + case AMDGPUIntrinsic::R600_texc:
> + TextureOp = 1;
> + break;
> + case AMDGPUIntrinsic::R600_txl:
> + TextureOp = 2;
> + break;
> + case AMDGPUIntrinsic::R600_txlc:
> + TextureOp = 3;
> + break;
> + case AMDGPUIntrinsic::R600_txb:
> + TextureOp = 4;
> + break;
> + case AMDGPUIntrinsic::R600_txbc:
> + TextureOp = 5;
> + break;
> + case AMDGPUIntrinsic::R600_txf:
> + TextureOp = 6;
> + break;
> + case AMDGPUIntrinsic::R600_txq:
> + TextureOp = 7;
> + break;
> + case AMDGPUIntrinsic::R600_ddx:
> + TextureOp = 8;
> + break;
> + case AMDGPUIntrinsic::R600_ddy:
> + TextureOp = 9;
> + break;
> + default:
> + llvm_unreachable("Unknow Texture Operation");
> + }
> +
> + SDValue TexArgs[19] = {
> + DAG.getConstant(TextureOp, MVT::i32),
> + Op.getOperand(1),
> + DAG.getConstant(0, MVT::i32),
> + DAG.getConstant(1, MVT::i32),
> + DAG.getConstant(2, MVT::i32),
> + DAG.getConstant(3, MVT::i32),
> + Op.getOperand(2),
> + Op.getOperand(3),
> + Op.getOperand(4),
> + DAG.getConstant(0, MVT::i32),
> + DAG.getConstant(1, MVT::i32),
> + DAG.getConstant(2, MVT::i32),
> + DAG.getConstant(3, MVT::i32),
> + Op.getOperand(5),
> + Op.getOperand(6),
> + Op.getOperand(7),
> + Op.getOperand(8),
> + Op.getOperand(9),
> + Op.getOperand(10)
> + };
> + return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
> + }
>
> case r600_read_ngroups_x:
> return LowerImplicitParameter(DAG, VT, DL, 0);
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index abaa94b..83bbab1 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -96,6 +96,12 @@ def UP : InstFlag <"printUpdatePred">;
> // Once we start using the packetizer in this backend we should have this
> // default to 0.
> def LAST : InstFlag<"printLast", 1>;
> +def RSel : Operand<i32> {
> + let PrintMethod = "printRSel";
> +}
> +def CT: Operand<i32> {
> + let PrintMethod = "printCT";
> +}
>
> def FRAMEri : Operand<iPTR> {
> let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
> @@ -463,38 +469,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
> pattern,
> itin>;
>
> -class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
> - InstrItinClass itin = AnyALU> :
> - InstR600 <(outs R600_Reg128:$DST_GPR),
> - (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
> - !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
> - pattern,
> - itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
> - let Inst{31-0} = Word0;
> - let Inst{63-32} = Word1;
> -
> - let TEX_INST = inst{4-0};
> - let SRC_REL = 0;
> - let DST_REL = 0;
> - let DST_SEL_X = 0;
> - let DST_SEL_Y = 1;
> - let DST_SEL_Z = 2;
> - let DST_SEL_W = 3;
> - let LOD_BIAS = 0;
> -
> - let INST_MOD = 0;
> - let FETCH_WHOLE_QUAD = 0;
> - let ALT_CONST = 0;
> - let SAMPLER_INDEX_MODE = 0;
> - let RESOURCE_INDEX_MODE = 0;
> -
> - let COORD_TYPE_X = 0;
> - let COORD_TYPE_Y = 0;
> - let COORD_TYPE_Z = 0;
> - let COORD_TYPE_W = 0;
> -
> - let TEXInst = 1;
> - }
> +
>
> } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
>
> @@ -618,6 +593,29 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
> [SDNPVariadic]
> >;
>
> +def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
> +
> +def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
> +
> +multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
> +def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
> + (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
> + (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
> + (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
> + (i32 imm:$DST_SEL_W),
> + (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
> + (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
> + (i32 imm:$COORD_TYPE_W)),
> + (inst R600_Reg128:$SRC_GPR,
> + imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
> + imm:$offsetx, imm:$offsety, imm:$offsetz,
> + imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
> + imm:$DST_SEL_W,
> + imm:$RESOURCE_ID, imm:$SAMPLER_ID,
> + imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
> + imm:$COORD_TYPE_W)>;
> +}
> +
> //===----------------------------------------------------------------------===//
> // Interpolation Instructions
> //===----------------------------------------------------------------------===//
> @@ -1132,92 +1130,70 @@ def CNDGT_INT : R600_3OP <
> // Texture instructions
> //===----------------------------------------------------------------------===//
>
> -def TEX_LD : R600_TEX <
> - 0x03, "TEX_LD",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
> - imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
> - imm:$SAMPLER_ID, imm:$textureTarget))]
> -> {
> -let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
> - "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
> -let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
> - i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
> - i32imm:$textureTarget);
> -}
> -
> -def TEX_GET_TEXTURE_RESINFO : R600_TEX <
> - 0x04, "TEX_GET_TEXTURE_RESINFO",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> ->;
> -
> -def TEX_GET_GRADIENTS_H : R600_TEX <
> - 0x07, "TEX_GET_GRADIENTS_H",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> ->;
> -
> -def TEX_GET_GRADIENTS_V : R600_TEX <
> - 0x08, "TEX_GET_GRADIENTS_V",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> ->;
> -
> -def TEX_SET_GRADIENTS_H : R600_TEX <
> - 0x0B, "TEX_SET_GRADIENTS_H",
> - []
> ->;
> -
> -def TEX_SET_GRADIENTS_V : R600_TEX <
> - 0x0C, "TEX_SET_GRADIENTS_V",
> - []
> ->;
> +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
>
> -def TEX_SAMPLE : R600_TEX <
> - 0x10, "TEX_SAMPLE",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> ->;
> +class R600_TEX <bits<11> inst, string opName> :
> + InstR600 <(outs R600_Reg128:$DST_GPR),
> + (ins R600_Reg128:$SRC_GPR,
> + RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
> + i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
> + RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
> + i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
> + CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
> + CT:$COORD_TYPE_W),
> + !strconcat(opName,
> + " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
> + "$SRC_GPR.$srcx$srcy$srcz$srcw "
> + "RID:$RESOURCE_ID SID:$SAMPLER_ID "
> + "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
> + [],
> + NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
> + let Inst{31-0} = Word0;
> + let Inst{63-32} = Word1;
>
> -def TEX_SAMPLE_C : R600_TEX <
> - 0x18, "TEX_SAMPLE_C",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
> ->;
> + let TEX_INST = inst{4-0};
> + let SRC_REL = 0;
> + let DST_REL = 0;
> + let LOD_BIAS = 0;
>
> -def TEX_SAMPLE_L : R600_TEX <
> - 0x11, "TEX_SAMPLE_L",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> ->;
> + let INST_MOD = 0;
> + let FETCH_WHOLE_QUAD = 0;
> + let ALT_CONST = 0;
> + let SAMPLER_INDEX_MODE = 0;
> + let RESOURCE_INDEX_MODE = 0;
>
> -def TEX_SAMPLE_C_L : R600_TEX <
> - 0x19, "TEX_SAMPLE_C_L",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
> ->;
> + let TEXInst = 1;
> +}
>
> -def TEX_SAMPLE_LB : R600_TEX <
> - 0x12, "TEX_SAMPLE_LB",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> ->;
> +} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
>
> -def TEX_SAMPLE_C_LB : R600_TEX <
> - 0x1A, "TEX_SAMPLE_C_LB",
> - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
> - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
> ->;
>
> -def TEX_SAMPLE_G : R600_TEX <
> - 0x14, "TEX_SAMPLE_G",
> - []
> ->;
>
> -def TEX_SAMPLE_C_G : R600_TEX <
> - 0x1C, "TEX_SAMPLE_C_G",
> - []
> ->;
> +def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
> +def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
> +def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
> +def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
> +def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
> +def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
> +def TEX_LD : R600_TEX <0x03, "TEX_LD">;
> +def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
> +def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
> +def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
> +def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
> +def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
> +def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
> +def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
> +
> +defm : TexPattern<0, TEX_SAMPLE>;
> +defm : TexPattern<1, TEX_SAMPLE_C>;
> +defm : TexPattern<2, TEX_SAMPLE_L>;
> +defm : TexPattern<3, TEX_SAMPLE_C_L>;
> +defm : TexPattern<4, TEX_SAMPLE_LB>;
> +defm : TexPattern<5, TEX_SAMPLE_C_LB>;
> +defm : TexPattern<6, TEX_LD, v4i32>;
> +defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
> +defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
> +defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
>
> //===----------------------------------------------------------------------===//
> // Helper classes for common instructions
> diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
> index dc8980a..7c434bc 100644
> --- a/lib/Target/R600/R600Intrinsics.td
> +++ b/lib/Target/R600/R600Intrinsics.td
> @@ -18,6 +18,136 @@ let TargetPrefix = "R600", isTarget = 1 in {
> Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> def int_R600_load_texbuf :
> Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> + def int_R600_tex :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_texc :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_txl :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_txlc :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_txb :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_txbc :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_txf :
> + Intrinsic<[llvm_v4i32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_txq :
> + Intrinsic<[llvm_v4i32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_ddx :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
> + def int_R600_ddy :
> + Intrinsic<[llvm_v4f32_ty], [
> + llvm_v4f32_ty, // Coord
> + llvm_i32_ty, // offset_x
> + llvm_i32_ty, // offset_y,
> + llvm_i32_ty, // offset_z,
> + llvm_i32_ty, // resource_id
> + llvm_i32_ty, // samplerid
> + llvm_i32_ty, // coord_type_x
> + llvm_i32_ty, // coord_type_y
> + llvm_i32_ty, // coord_type_z
> + llvm_i32_ty // coord_type_w
> + ], [IntrNoMem]>;
You can create an intrinsic class for textures and factor out a lot of
this duplicate code.
> def int_R600_store_swizzle :
> Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
> def int_R600_store_stream_output :
> diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
> new file mode 100644
> index 0000000..938bd51
> --- /dev/null
> +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
> @@ -0,0 +1,286 @@
> +//===-- R600TextureIntrinsicsReplacer.cpp ---------------------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// This pass translates tgsi-like texture intrinsics into R600 texture
> +/// closer to hardware intrinsics.
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "llvm/Analysis/Passes.h"
> +#include "llvm/ADT/Statistic.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/InstVisitor.h"
> +#include "llvm/IR/IRBuilder.h"
> +#include "llvm/IR/GlobalValue.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +class R600TextureIntrinsicsReplacer :
> + public FunctionPass, public InstVisitor<R600TextureIntrinsicsReplacer> {
> + static char ID;
> +
> + Module *Mod;
> + Type *FloatType;
> + Type *Int32Type;
> + Type *V4f32Type;
> + Type *V4i32Type;
> + FunctionType *TexSign;
> + FunctionType *TexQSign;
> +
> + void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD,
> + unsigned SrcSelect[4], unsigned CT[4],
> + bool &useShadowVariant) {
> + enum TextureTypes {
> + TEXTURE_1D = 1,
> + TEXTURE_2D,
> + TEXTURE_3D,
> + TEXTURE_CUBE,
> + TEXTURE_RECT,
> + TEXTURE_SHADOW1D,
> + TEXTURE_SHADOW2D,
> + TEXTURE_SHADOWRECT,
> + TEXTURE_1D_ARRAY,
> + TEXTURE_2D_ARRAY,
> + TEXTURE_SHADOW1D_ARRAY,
> + TEXTURE_SHADOW2D_ARRAY,
> + TEXTURE_SHADOWCUBE,
> + TEXTURE_2D_MSAA,
> + TEXTURE_2D_ARRAY_MSAA,
> + TEXTURE_CUBE_ARRAY,
> + TEXTURE_SHADOWCUBE_ARRAY
> + };
> +
> + switch (TextureType) {
> + case 0:
> + return;
> + case TEXTURE_RECT:
> + case TEXTURE_1D:
> + case TEXTURE_2D:
> + case TEXTURE_3D:
> + case TEXTURE_CUBE:
> + case TEXTURE_1D_ARRAY:
> + case TEXTURE_2D_ARRAY:
> + case TEXTURE_CUBE_ARRAY:
> + case TEXTURE_2D_MSAA:
> + case TEXTURE_2D_ARRAY_MSAA:
> + useShadowVariant = false;
> + break;
> + case TEXTURE_SHADOW1D:
> + case TEXTURE_SHADOW2D:
> + case TEXTURE_SHADOWRECT:
> + case TEXTURE_SHADOW1D_ARRAY:
> + case TEXTURE_SHADOW2D_ARRAY:
> + case TEXTURE_SHADOWCUBE:
> + case TEXTURE_SHADOWCUBE_ARRAY:
> + useShadowVariant = true;
> + break;
> + default:
> + llvm_unreachable("Unknow Texture Type");
> + }
> +
> + if (TextureType == TEXTURE_RECT ||
> + TextureType == TEXTURE_SHADOWRECT) {
> + CT[0] = 0;
> + CT[1] = 0;
> + }
> +
> + if (TextureType == TEXTURE_CUBE_ARRAY ||
> + TextureType == TEXTURE_SHADOWCUBE_ARRAY) {
> + CT[2] = 0;
> + }
> +
> + if (TextureType == TEXTURE_1D_ARRAY ||
> + TextureType == TEXTURE_SHADOW1D_ARRAY) {
> + if (hasLOD && useShadowVariant) {
> + CT[1] = 0;
> + } else {
> + CT[2] = 0;
> + SrcSelect[2] = 1;
> + }
> + } else if (TextureType == TEXTURE_2D_ARRAY ||
> + TextureType == TEXTURE_SHADOW2D_ARRAY) {
> + CT[2] = 0;
> + }
> +
> + if ((TextureType == TEXTURE_SHADOW1D ||
> + TextureType == TEXTURE_SHADOW2D ||
> + TextureType == TEXTURE_SHADOWRECT ||
> + TextureType == TEXTURE_SHADOW1D_ARRAY) &&
> + !(hasLOD && useShadowVariant)) {
> + SrcSelect[3] = 2;
> + }
> + }
> +
> + void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name,
> + unsigned SrcSelect[4], Value *Offset[3], Value *Resource,
> + Value *Sampler, unsigned CT[4], Value *Coord) {
> + IRBuilder<> Builder(&I);
> + Constant *Mask[] = {
> + ConstantInt::get(Int32Type, SrcSelect[0]),
> + ConstantInt::get(Int32Type, SrcSelect[1]),
> + ConstantInt::get(Int32Type, SrcSelect[2]),
> + ConstantInt::get(Int32Type, SrcSelect[3])
> + };
> + Value *SwizzleMask = ConstantVector::get(Mask);
> + Value *SwizzledCoord =
> + Builder.CreateShuffleVector(Coord, Coord, SwizzleMask);
> +
> + Value *Args[] = {
> + SwizzledCoord,
> + Offset[0],
> + Offset[1],
> + Offset[2],
> + Resource,
> + Sampler,
> + ConstantInt::get(Int32Type, CT[0]),
> + ConstantInt::get(Int32Type, CT[1]),
> + ConstantInt::get(Int32Type, CT[2]),
> + ConstantInt::get(Int32Type, CT[3])
> + };
> +
> + Function *F = Mod->getFunction(Name);
> + if (!F) {
> + F = Function::Create(FT, GlobalValue::ExternalLinkage, Name, Mod);
> + F->addFnAttr(Attribute::ReadNone);
> + }
> + I.replaceAllUsesWith(Builder.CreateCall(F, Args));
> + I.eraseFromParent();
> + }
> +
> + void ReplaceTexIntrinsic(CallInst &I, bool hasLOD, FunctionType *FT,
> + const char *VanillaInt,
> + const char *ShadowInt) {
> + Value *Coord = I.getArgOperand(0);
> + Value *ResourceId = I.getArgOperand(1);
> + Value *SamplerId = I.getArgOperand(2);
> +
> + unsigned TextureType =
> + dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
> +
> + unsigned SrcSelect[4] = { 0, 1, 2, 3 };
> + unsigned CT[4] = {1, 1, 1, 1};
> + Value *Offset[3] = {
> + ConstantInt::get(Int32Type, 0),
> + ConstantInt::get(Int32Type, 0),
> + ConstantInt::get(Int32Type, 0)
> + };
> + bool useShadowVariant;
> +
> + getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
> + useShadowVariant);
> +
> + ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect,
> + Offset, ResourceId, SamplerId, CT, Coord);
> + }
> +
> + void ReplaceTXF(CallInst &I) {
> + Value *Coord = I.getArgOperand(0);
> + Value *ResourceId = I.getArgOperand(4);
> + Value *SamplerId = I.getArgOperand(5);
> +
> + unsigned TextureType =
> + dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
> +
> + unsigned SrcSelect[4] = { 0, 1, 2, 3 };
> + unsigned CT[4] = {1, 1, 1, 1};
> + Value *Offset[3] = {
> + I.getArgOperand(1),
> + I.getArgOperand(2),
> + I.getArgOperand(3),
> + };
> + bool useShadowVariant;
> +
> + getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT,
> + useShadowVariant);
> +
> + ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect,
> + Offset, ResourceId, SamplerId, CT, Coord);
> + }
> +
> +public:
> + R600TextureIntrinsicsReplacer():
> + FunctionPass(ID) {
> + }
> +
> + virtual bool doInitialization(Module &M) {
> + LLVMContext &Ctx = M.getContext();
> + Mod = &M;
> + FloatType = Type::getFloatTy(Ctx);
> + Int32Type = Type::getInt32Ty(Ctx);
> + V4f32Type = VectorType::get(FloatType, 4);
> + V4i32Type = VectorType::get(Int32Type, 4);
> + Type *ArgsType[] = {
> + V4f32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + };
> + TexSign = FunctionType::get(V4f32Type, ArgsType);
> + Type *ArgsQType[] = {
> + V4i32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + Int32Type,
> + };
> + TexQSign = FunctionType::get(V4f32Type, ArgsQType);
> + return false;
> + }
> +
> + virtual bool runOnFunction(Function &F) {
> + visit(F);
> + return false;
> + }
> +
> + virtual const char *getPassName() const {
> + return "R600 Texture Intrinsics Replacer";
> + }
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const {
> + }
> +
> + void visitCallInst(CallInst &I) {
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.tex")
> + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc");
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txl")
> + ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txl", "llvm.R600.txlc");
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txb")
> + ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txb", "llvm.R600.txbc");
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txf")
> + ReplaceTXF(I);
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txq")
> + ReplaceTexIntrinsic(I, false, TexQSign, "llvm.R600.txq", "llvm.R600.txq");
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddx")
> + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddx", "llvm.R600.ddx");
> + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddy")
> + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddy", "llvm.R600.ddy");
> + }
> +
> +};
> +
> +char R600TextureIntrinsicsReplacer::ID = 0;
> +
> +}
> +
> +FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
> + return new R600TextureIntrinsicsReplacer();
> +}
> diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
> index 74331fa..4ea82bb 100644
> --- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll
> +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
> @@ -1,21 +1,21 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
> -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
> -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
> -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
> -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
> -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
> -;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
> -;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN
> +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
> +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
> +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
> +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
> +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
>
> define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
> %addr = load <4 x float> addrspace(1)* %in
> --
> 1.8.2.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list