[Mesa-dev] [PATCH] R600: Add support for native control flow
Vincent Lejeune
vljn at ovi.com
Mon Apr 1 08:42:43 PDT 2013
---
lib/Target/R600/AMDGPU.h | 1 +
lib/Target/R600/AMDGPUTargetMachine.cpp | 1 +
lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 21 +-
lib/Target/R600/R600ControlFlowFinalizer.cpp | 264 +++++++++++++++++++++
lib/Target/R600/R600Instructions.td | 100 ++++++++
.../CodeGen/R600/disconnected-predset-break-bug.ll | 5 +-
test/CodeGen/R600/predicates.ll | 12 +-
7 files changed, 397 insertions(+), 7 deletions(-)
create mode 100644 lib/Target/R600/R600ControlFlowFinalizer.cpp
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3cd792a..0b01433 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -24,6 +24,7 @@ class AMDGPUTargetMachine;
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 45b1be0..e7ea876 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -153,6 +153,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600EmitClauseMarkers(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 1bf87fc..6ef4d40 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -147,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
return;
} else {
switch(MI.getOpcode()) {
+ case AMDGPU::STACK_SIZE: {
+ EmitByte(MI.getOperand(0).getImm(), OS);
+ break;
+ }
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
@@ -259,7 +263,22 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS);
break;
}
-
+ case AMDGPU::CF_TC:
+ case AMDGPU::CF_VC:
+ case AMDGPU::CF_CALL_FS:
+ return;
+ case AMDGPU::WHILE_LOOP:
+ case AMDGPU::END_LOOP:
+ case AMDGPU::LOOP_BREAK:
+ case AMDGPU::CF_CONTINUE:
+ case AMDGPU::CF_JUMP:
+ case AMDGPU::CF_ELSE:
+ case AMDGPU::POP: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(Inst, OS);
+ break;
+ }
default:
EmitALUInstr(MI, Fixups, OS);
break;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 0000000..bd87d74
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,264 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+ unsigned MaxFetchInst;
+
+ bool isFetch(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator
+ MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned CfAddress) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isFetch(I))
+ break;
+ AluInstCount ++;
+ if (AluInstCount > MaxFetchInst)
+ break;
+ }
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+ TII->get(AMDGPU::CF_TC))
+ .addImm(CfAddress) // ADDR
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+ void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::WHILE_LOOP:
+ MI->getOperand(0).setImm(Addr + 1);
+ break;
+ default:
+ MI->getOperand(0).setImm(Addr);
+ break;
+ }
+ }
+ void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+ const {
+ for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+ It != E; ++It) {
+ MachineInstr *MI = *It;
+ CounterPropagateAddr(MI, Addr);
+ }
+ }
+
+public:
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+ MaxFetchInst = 8;
+ else
+ MaxFetchInst = 16;
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ unsigned MaxStack = 0;
+ unsigned CurrentStack = 0;
+ for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+ ++MB) {
+ MachineBasicBlock &MBB = *MB;
+ unsigned CfCount = 0;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<std::pair<unsigned, MachineInstr *> > IfThenElseStack;
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ if (MFI->ShaderType == 1) {
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::CF_CALL_FS));
+ CfCount++;
+ }
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ if (isFetch(I)) {
+ I = MakeFetchClause(MBB, I, 0);
+ CfCount++;
+ continue;
+ }
+
+ MachineBasicBlock::iterator MI = I;
+ I++;
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ case AMDGPU::KILLGT:
+ case AMDGPU::CF_ALU:
+ CfCount++;
+ break;
+ case AMDGPU::WHILELOOP: {
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::WHILE_LOOP))
+ .addImm(0);
+ std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::set<MachineInstr *>());
+ Pair.second.insert(MIb);
+ LoopStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDLOOP: {
+ CurrentStack--;
+ std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ LoopStack.back();
+ LoopStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ .addImm(Pair.first + 1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::IF_PREDICATE_SET: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_JUMP))
+ .addImm(0)
+ .addImm(0);
+ std::pair<unsigned, MachineInstr *> Pair(CfCount, MIb);
+ IfThenElseStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ELSE: {
+ std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_ELSE))
+ .addImm(0)
+ .addImm(1);
+ std::pair<unsigned, MachineInstr *> NewPair(CfCount, MIb);
+ IfThenElseStack.push_back(NewPair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDIF: {
+ CurrentStack--;
+ std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount + 1);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::PREDICATED_BREAK: {
+ CurrentStack--;
+ CfCount += 3;
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+ .addImm(CfCount)
+ .addImm(1);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::LOOP_BREAK))
+ .addImm(0);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ .addImm(CfCount)
+ .addImm(1);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::CONTINUE: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_CONTINUE))
+ .addImm(CfCount);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::STACK_SIZE))
+ .addImm(MaxStack);
+ }
+
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Control Flow Finalizer Pass";
+ }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+ return new R600ControlFlowFinalizer(TM);
+}
+
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 7cfde78..663b41a 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -823,9 +823,109 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
let Inst{63-32} = Word1;
}
+class CF_WORD0 {
+ field bits<32> Word0;
+
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
+
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1 {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+"TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+"VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+}
+
+def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+def STACK_SIZE : AMDGPUInst <(outs),
+(ins i32imm:$num), "nstack $num", [] > {
+ field bits<8> Inst;
+ bits<8> num;
+ let Inst = num;
+}
+
let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index a586742..09baee7 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -5,9 +5,10 @@
; and the PREDICATE_BREAK in this loop.
; CHECK: @loop_ge
-; CHECK: WHILE
+; CHECK: LOOP_START_DX10
; CHECK: PRED_SET
-; CHECK-NEXT: PREDICATED_BREAK
+; CHECK-NEXT: JUMP
+; CHECK-NEXT: LOOP_BREAK
define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
entry:
%cmp5 = icmp sgt i32 %iterations, 0
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 18895a4..eb8b052 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -45,10 +45,12 @@ ENDIF:
}
; CHECK: @nested_if
-; CHECK: IF_PREDICATE_SET
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: ENDIF
+; CHECK: POP
define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
@@ -70,11 +72,13 @@ ENDIF:
}
; CHECK: @nested_if_else
-; CHECK: IF_PREDICATE_SET
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: ENDIF
+; CHECK: POP
define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
--
1.8.1.4
More information about the mesa-dev
mailing list