[Mesa-dev] [PATCH] R600: Control Flow support for pre EG gen
Vincent Lejeune
vljn at ovi.com
Sun Apr 7 12:43:43 PDT 2013
---
lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 30 ++--
lib/Target/R600/R600ControlFlowFinalizer.cpp | 84 +++++++--
lib/Target/R600/R600Instructions.td | 198 +++++++++++++++------
3 files changed, 240 insertions(+), 72 deletions(-)
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 927bcbd..469a8ad 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS);
break;
}
- case AMDGPU::CF_TC:
- case AMDGPU::CF_VC:
- case AMDGPU::CF_CALL_FS:
+ case AMDGPU::CF_TC_EG:
+ case AMDGPU::CF_VC_EG:
+ case AMDGPU::CF_CALL_FS_EG:
+ case AMDGPU::CF_TC_R600:
+ case AMDGPU::CF_VC_R600:
+ case AMDGPU::CF_CALL_FS_R600:
return;
- case AMDGPU::WHILE_LOOP:
- case AMDGPU::END_LOOP:
- case AMDGPU::LOOP_BREAK:
- case AMDGPU::CF_CONTINUE:
- case AMDGPU::CF_JUMP:
- case AMDGPU::CF_ELSE:
- case AMDGPU::POP: {
+ case AMDGPU::WHILE_LOOP_EG:
+ case AMDGPU::END_LOOP_EG:
+ case AMDGPU::LOOP_BREAK_EG:
+ case AMDGPU::CF_CONTINUE_EG:
+ case AMDGPU::CF_JUMP_EG:
+ case AMDGPU::CF_ELSE_EG:
+ case AMDGPU::POP_EG:
+ case AMDGPU::WHILE_LOOP_R600:
+ case AMDGPU::END_LOOP_R600:
+ case AMDGPU::LOOP_BREAK_R600:
+ case AMDGPU::CF_CONTINUE_R600:
+ case AMDGPU::CF_JUMP_R600:
+ case AMDGPU::CF_ELSE_R600:
+ case AMDGPU::POP_R600: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_NATIVE, OS);
Emit(Inst, OS);
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 3a6c7ea..cfaa36e 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -30,9 +30,22 @@ namespace llvm {
class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
+ enum ControlFlowInstruction {
+ CF_TC,
+ CF_CALL_FS,
+ CF_WHILE_LOOP,
+ CF_END_LOOP,
+ CF_LOOP_BREAK,
+ CF_LOOP_CONTINUE,
+ CF_JUMP,
+ CF_ELSE,
+ CF_POP
+ };
+
static char ID;
const R600InstrInfo *TII;
unsigned MaxFetchInst;
+ const AMDGPUSubtarget &ST;
bool isFetch(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
@@ -70,6 +83,52 @@ private:
}
}
+ const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
+ switch (CFI) {
+ case CF_TC:
+ return TII->get(AMDGPU::CF_TC_R600);
+ case CF_CALL_FS:
+ return TII->get(AMDGPU::CF_CALL_FS_R600);
+ case CF_WHILE_LOOP:
+ return TII->get(AMDGPU::WHILE_LOOP_R600);
+ case CF_END_LOOP:
+ return TII->get(AMDGPU::END_LOOP_R600);
+ case CF_LOOP_BREAK:
+ return TII->get(AMDGPU::LOOP_BREAK_R600);
+ case CF_LOOP_CONTINUE:
+ return TII->get(AMDGPU::CF_CONTINUE_R600);
+ case CF_JUMP:
+ return TII->get(AMDGPU::CF_JUMP_R600);
+ case CF_ELSE:
+ return TII->get(AMDGPU::CF_ELSE_R600);
+ case CF_POP:
+ return TII->get(AMDGPU::POP_R600);
+ }
+ } else {
+ switch (CFI) {
+ case CF_TC:
+ return TII->get(AMDGPU::CF_TC_EG);
+ case CF_CALL_FS:
+ return TII->get(AMDGPU::CF_CALL_FS_EG);
+ case CF_WHILE_LOOP:
+ return TII->get(AMDGPU::WHILE_LOOP_EG);
+ case CF_END_LOOP:
+ return TII->get(AMDGPU::END_LOOP_EG);
+ case CF_LOOP_BREAK:
+ return TII->get(AMDGPU::LOOP_BREAK_EG);
+ case CF_LOOP_CONTINUE:
+ return TII->get(AMDGPU::CF_CONTINUE_EG);
+ case CF_JUMP:
+ return TII->get(AMDGPU::CF_JUMP_EG);
+ case CF_ELSE:
+ return TII->get(AMDGPU::CF_ELSE_EG);
+ case CF_POP:
+ return TII->get(AMDGPU::POP_EG);
+ }
+ }
+ }
+
MachineBasicBlock::iterator
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned CfAddress) const {
@@ -85,7 +144,7 @@ private:
break;
}
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
- TII->get(AMDGPU::CF_TC))
+ getHWInstrDesc(CF_TC))
.addImm(CfAddress) // ADDR
.addImm(AluInstCount); // COUNT
return I;
@@ -104,7 +163,8 @@ private:
public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+ ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
MaxFetchInst = 8;
@@ -124,7 +184,7 @@ public:
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
if (MFI->ShaderType == 1) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
- TII->get(AMDGPU::CF_CALL_FS));
+ getHWInstrDesc(CF_CALL_FS));
CfCount++;
}
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
@@ -154,7 +214,7 @@ public:
CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::WHILE_LOOP))
+ getHWInstrDesc(CF_WHILE_LOOP))
.addImm(2);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
@@ -170,7 +230,7 @@ public:
LoopStack.back();
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
.addImm(Pair.first + 1);
MI->eraseFromParent();
CfCount++;
@@ -178,7 +238,7 @@ public:
}
case AMDGPU::IF_PREDICATE_SET: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::CF_JUMP))
+ getHWInstrDesc(CF_JUMP))
.addImm(0)
.addImm(0);
IfThenElseStack.push_back(MIb);
@@ -192,7 +252,7 @@ public:
IfThenElseStack.pop_back();
CounterPropagateAddr(JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::CF_ELSE))
+ getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -207,7 +267,7 @@ public:
IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount + 1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::POP))
+ getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -218,13 +278,13 @@ public:
case AMDGPU::PREDICATED_BREAK: {
CurrentStack--;
CfCount += 3;
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
.addImm(CfCount)
.addImm(1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::LOOP_BREAK))
+ getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
.addImm(CfCount)
.addImm(1);
LoopStack.back().second.insert(MIb);
@@ -233,7 +293,7 @@ public:
}
case AMDGPU::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::CF_CONTINUE))
+ getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 663b41a..b4c45e1 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -823,97 +823,103 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
let Inst{63-32} = Word1;
}
-class CF_WORD0 {
+class CF_WORD0_R600 {
field bits<32> Word0;
- bits<24> ADDR;
- bits<3> JUMPTABLE_SEL;
+ bits<32> ADDR;
- let Word0{23-0} = ADDR;
- let Word0{26-24} = JUMPTABLE_SEL;
+ let Word0 = ADDR;
}
-class CF_WORD1 {
+class CF_WORD1_R600 {
field bits<32> Word1;
bits<3> POP_COUNT;
bits<5> CF_CONST;
bits<2> COND;
- bits<6> COUNT;
+ bits<3> COUNT;
+ bits<6> CALL_COUNT;
+ bits<1> COUNT_3;
+ bits<1> END_OF_PROGRAM;
bits<1> VALID_PIXEL_MODE;
- bits<8> CF_INST;
+ bits<7> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
bits<1> BARRIER;
let Word1{2-0} = POP_COUNT;
let Word1{7-3} = CF_CONST;
let Word1{9-8} = COND;
- let Word1{15-10} = COUNT;
- let Word1{20} = VALID_PIXEL_MODE;
- let Word1{29-22} = CF_INST;
+ let Word1{12-10} = COUNT;
+ let Word1{18-13} = CALL_COUNT;
+ let Word1{19} = COUNT_3;
+ let Word1{21} = END_OF_PROGRAM;
+ let Word1{22} = VALID_PIXEL_MODE;
+ let Word1{29-23} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
let Word1{31} = BARRIER;
}
-class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
-ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
let CF_INST = inst;
let BARRIER = 1;
- let JUMPTABLE_SEL = 0;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
+ let CALL_COUNT = 0;
+ let COUNT_3 = 0;
+ let END_OF_PROGRAM = 0;
+ let WHOLE_QUAD_MODE = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
-def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
-"TEX $COUNT @$ADDR"> {
- let POP_COUNT = 0;
-}
-
-def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
-"VTX $COUNT @$ADDR"> {
- let POP_COUNT = 0;
-}
+class CF_WORD0_EG {
+ field bits<32> Word0;
-def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
-def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
}
-def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
+class CF_WORD1_EG {
+ field bits<32> Word1;
-def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
-def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
}
-def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
-}
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+ field bits<64> Inst;
-def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
- let ADDR = 0;
- let COUNT = 0;
- let POP_COUNT = 0;
-}
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
-def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
}
def CF_ALU : ALU_CLAUSE<8, "ALU">;
@@ -1433,6 +1439,52 @@ let Predicates = [isR600] in {
let Word1{31} = 1; // BARRIER
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+ def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+ "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+ "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+ "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ }
+ def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+
}
// Helper pattern for normalizing inputs to triginomic instructions for R700+
@@ -1589,6 +1641,52 @@ let hasSideEffects = 1 in {
}
defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+ def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+ "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+ "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+ "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ }
+ def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
--
1.8.1.4
More information about the mesa-dev
mailing list