[Mesa-dev] [PATCH] R600: Control Flow support for pre EG gen
Tom Stellard
tom at stellard.net
Sun Apr 7 13:02:25 PDT 2013
On Sun, Apr 07, 2013 at 09:43:43PM +0200, Vincent Lejeune wrote:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> ---
> lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 30 ++--
> lib/Target/R600/R600ControlFlowFinalizer.cpp | 84 +++++++--
> lib/Target/R600/R600Instructions.td | 198 +++++++++++++++------
> 3 files changed, 240 insertions(+), 72 deletions(-)
>
> diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> index 927bcbd..469a8ad 100644
> --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> Emit(Inst, OS);
> break;
> }
> - case AMDGPU::CF_TC:
> - case AMDGPU::CF_VC:
> - case AMDGPU::CF_CALL_FS:
> + case AMDGPU::CF_TC_EG:
> + case AMDGPU::CF_VC_EG:
> + case AMDGPU::CF_CALL_FS_EG:
> + case AMDGPU::CF_TC_R600:
> + case AMDGPU::CF_VC_R600:
> + case AMDGPU::CF_CALL_FS_R600:
> return;
> - case AMDGPU::WHILE_LOOP:
> - case AMDGPU::END_LOOP:
> - case AMDGPU::LOOP_BREAK:
> - case AMDGPU::CF_CONTINUE:
> - case AMDGPU::CF_JUMP:
> - case AMDGPU::CF_ELSE:
> - case AMDGPU::POP: {
> + case AMDGPU::WHILE_LOOP_EG:
> + case AMDGPU::END_LOOP_EG:
> + case AMDGPU::LOOP_BREAK_EG:
> + case AMDGPU::CF_CONTINUE_EG:
> + case AMDGPU::CF_JUMP_EG:
> + case AMDGPU::CF_ELSE_EG:
> + case AMDGPU::POP_EG:
> + case AMDGPU::WHILE_LOOP_R600:
> + case AMDGPU::END_LOOP_R600:
> + case AMDGPU::LOOP_BREAK_R600:
> + case AMDGPU::CF_CONTINUE_R600:
> + case AMDGPU::CF_JUMP_R600:
> + case AMDGPU::CF_ELSE_R600:
> + case AMDGPU::POP_R600: {
> uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
> EmitByte(INSTR_NATIVE, OS);
> Emit(Inst, OS);
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index 3a6c7ea..cfaa36e 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -30,9 +30,22 @@ namespace llvm {
> class R600ControlFlowFinalizer : public MachineFunctionPass {
>
> private:
> + enum ControlFlowInstruction {
> + CF_TC,
> + CF_CALL_FS,
> + CF_WHILE_LOOP,
> + CF_END_LOOP,
> + CF_LOOP_BREAK,
> + CF_LOOP_CONTINUE,
> + CF_JUMP,
> + CF_ELSE,
> + CF_POP
> + };
> +
> static char ID;
> const R600InstrInfo *TII;
> unsigned MaxFetchInst;
> + const AMDGPUSubtarget &ST;
>
> bool isFetch(const MachineInstr *MI) const {
> switch (MI->getOpcode()) {
> @@ -70,6 +83,52 @@ private:
> }
> }
>
> + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
> + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
> + switch (CFI) {
> + case CF_TC:
> + return TII->get(AMDGPU::CF_TC_R600);
> + case CF_CALL_FS:
> + return TII->get(AMDGPU::CF_CALL_FS_R600);
> + case CF_WHILE_LOOP:
> + return TII->get(AMDGPU::WHILE_LOOP_R600);
> + case CF_END_LOOP:
> + return TII->get(AMDGPU::END_LOOP_R600);
> + case CF_LOOP_BREAK:
> + return TII->get(AMDGPU::LOOP_BREAK_R600);
> + case CF_LOOP_CONTINUE:
> + return TII->get(AMDGPU::CF_CONTINUE_R600);
> + case CF_JUMP:
> + return TII->get(AMDGPU::CF_JUMP_R600);
> + case CF_ELSE:
> + return TII->get(AMDGPU::CF_ELSE_R600);
> + case CF_POP:
> + return TII->get(AMDGPU::POP_R600);
> + }
> + } else {
> + switch (CFI) {
> + case CF_TC:
> + return TII->get(AMDGPU::CF_TC_EG);
> + case CF_CALL_FS:
> + return TII->get(AMDGPU::CF_CALL_FS_EG);
> + case CF_WHILE_LOOP:
> + return TII->get(AMDGPU::WHILE_LOOP_EG);
> + case CF_END_LOOP:
> + return TII->get(AMDGPU::END_LOOP_EG);
> + case CF_LOOP_BREAK:
> + return TII->get(AMDGPU::LOOP_BREAK_EG);
> + case CF_LOOP_CONTINUE:
> + return TII->get(AMDGPU::CF_CONTINUE_EG);
> + case CF_JUMP:
> + return TII->get(AMDGPU::CF_JUMP_EG);
> + case CF_ELSE:
> + return TII->get(AMDGPU::CF_ELSE_EG);
> + case CF_POP:
> + return TII->get(AMDGPU::POP_EG);
> + }
> + }
> + }
> +
> MachineBasicBlock::iterator
> MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
> unsigned CfAddress) const {
> @@ -85,7 +144,7 @@ private:
> break;
> }
> BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
> - TII->get(AMDGPU::CF_TC))
> + getHWInstrDesc(CF_TC))
> .addImm(CfAddress) // ADDR
> .addImm(AluInstCount); // COUNT
> return I;
> @@ -104,7 +163,8 @@ private:
>
> public:
> R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
> - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
> + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
> + ST(tm.getSubtarget<AMDGPUSubtarget>()) {
> const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
> if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
> MaxFetchInst = 8;
> @@ -124,7 +184,7 @@ public:
> R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
> if (MFI->ShaderType == 1) {
> BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
> - TII->get(AMDGPU::CF_CALL_FS));
> + getHWInstrDesc(CF_CALL_FS));
> CfCount++;
> }
> for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> @@ -154,7 +214,7 @@ public:
> CurrentStack++;
> MaxStack = std::max(MaxStack, CurrentStack);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - TII->get(AMDGPU::WHILE_LOOP))
> + getHWInstrDesc(CF_WHILE_LOOP))
> .addImm(2);
> std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
> std::set<MachineInstr *>());
> @@ -170,7 +230,7 @@ public:
> LoopStack.back();
> LoopStack.pop_back();
> CounterPropagateAddr(Pair.second, CfCount);
> - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
> + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
> .addImm(Pair.first + 1);
> MI->eraseFromParent();
> CfCount++;
> @@ -178,7 +238,7 @@ public:
> }
> case AMDGPU::IF_PREDICATE_SET: {
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - TII->get(AMDGPU::CF_JUMP))
> + getHWInstrDesc(CF_JUMP))
> .addImm(0)
> .addImm(0);
> IfThenElseStack.push_back(MIb);
> @@ -192,7 +252,7 @@ public:
> IfThenElseStack.pop_back();
> CounterPropagateAddr(JumpInst, CfCount);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - TII->get(AMDGPU::CF_ELSE))
> + getHWInstrDesc(CF_ELSE))
> .addImm(0)
> .addImm(1);
> DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> @@ -207,7 +267,7 @@ public:
> IfThenElseStack.pop_back();
> CounterPropagateAddr(IfOrElseInst, CfCount + 1);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - TII->get(AMDGPU::POP))
> + getHWInstrDesc(CF_POP))
> .addImm(CfCount + 1)
> .addImm(1);
> DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> @@ -218,13 +278,13 @@ public:
> case AMDGPU::PREDICATED_BREAK: {
> CurrentStack--;
> CfCount += 3;
> - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
> + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
> .addImm(CfCount)
> .addImm(1);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - TII->get(AMDGPU::LOOP_BREAK))
> + getHWInstrDesc(CF_LOOP_BREAK))
> .addImm(0);
> - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
> + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
> .addImm(CfCount)
> .addImm(1);
> LoopStack.back().second.insert(MIb);
> @@ -233,7 +293,7 @@ public:
> }
> case AMDGPU::CONTINUE: {
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - TII->get(AMDGPU::CF_CONTINUE))
> + getHWInstrDesc(CF_LOOP_CONTINUE))
> .addImm(0);
> LoopStack.back().second.insert(MIb);
> MI->eraseFromParent();
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 663b41a..b4c45e1 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -823,97 +823,103 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
> let Inst{63-32} = Word1;
> }
>
> -class CF_WORD0 {
> +class CF_WORD0_R600 {
> field bits<32> Word0;
>
> - bits<24> ADDR;
> - bits<3> JUMPTABLE_SEL;
> + bits<32> ADDR;
>
> - let Word0{23-0} = ADDR;
> - let Word0{26-24} = JUMPTABLE_SEL;
> + let Word0 = ADDR;
> }
>
> -class CF_WORD1 {
> +class CF_WORD1_R600 {
> field bits<32> Word1;
>
> bits<3> POP_COUNT;
> bits<5> CF_CONST;
> bits<2> COND;
> - bits<6> COUNT;
> + bits<3> COUNT;
> + bits<6> CALL_COUNT;
> + bits<1> COUNT_3;
> + bits<1> END_OF_PROGRAM;
> bits<1> VALID_PIXEL_MODE;
> - bits<8> CF_INST;
> + bits<7> CF_INST;
> + bits<1> WHOLE_QUAD_MODE;
> bits<1> BARRIER;
>
> let Word1{2-0} = POP_COUNT;
> let Word1{7-3} = CF_CONST;
> let Word1{9-8} = COND;
> - let Word1{15-10} = COUNT;
> - let Word1{20} = VALID_PIXEL_MODE;
> - let Word1{29-22} = CF_INST;
> + let Word1{12-10} = COUNT;
> + let Word1{18-13} = CALL_COUNT;
> + let Word1{19} = COUNT_3;
> + let Word1{21} = END_OF_PROGRAM;
> + let Word1{22} = VALID_PIXEL_MODE;
> + let Word1{29-23} = CF_INST;
> + let Word1{30} = WHOLE_QUAD_MODE;
> let Word1{31} = BARRIER;
> }
>
> -class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
> -ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
> +class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
> +ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
> field bits<64> Inst;
>
> let CF_INST = inst;
> let BARRIER = 1;
> - let JUMPTABLE_SEL = 0;
> let CF_CONST = 0;
> let VALID_PIXEL_MODE = 0;
> let COND = 0;
> + let CALL_COUNT = 0;
> + let COUNT_3 = 0;
> + let END_OF_PROGRAM = 0;
> + let WHOLE_QUAD_MODE = 0;
>
> let Inst{31-0} = Word0;
> let Inst{63-32} = Word1;
> }
>
> -def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
> -"TEX $COUNT @$ADDR"> {
> - let POP_COUNT = 0;
> -}
> -
> -def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
> -"VTX $COUNT @$ADDR"> {
> - let POP_COUNT = 0;
> -}
> +class CF_WORD0_EG {
> + field bits<32> Word0;
>
> -def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
> - let POP_COUNT = 0;
> - let COUNT = 0;
> -}
> + bits<24> ADDR;
> + bits<3> JUMPTABLE_SEL;
>
> -def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
> - let POP_COUNT = 0;
> - let COUNT = 0;
> + let Word0{23-0} = ADDR;
> + let Word0{26-24} = JUMPTABLE_SEL;
> }
>
> -def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
> - let POP_COUNT = 0;
> - let COUNT = 0;
> -}
> +class CF_WORD1_EG {
> + field bits<32> Word1;
>
> -def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
> - let POP_COUNT = 0;
> - let COUNT = 0;
> -}
> + bits<3> POP_COUNT;
> + bits<5> CF_CONST;
> + bits<2> COND;
> + bits<6> COUNT;
> + bits<1> VALID_PIXEL_MODE;
> + bits<8> CF_INST;
> + bits<1> BARRIER;
>
> -def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
> - let COUNT = 0;
> + let Word1{2-0} = POP_COUNT;
> + let Word1{7-3} = CF_CONST;
> + let Word1{9-8} = COND;
> + let Word1{15-10} = COUNT;
> + let Word1{20} = VALID_PIXEL_MODE;
> + let Word1{29-22} = CF_INST;
> + let Word1{31} = BARRIER;
> }
>
> -def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
> - let COUNT = 0;
> -}
> +class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
> +ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
> + field bits<64> Inst;
>
> -def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
> - let ADDR = 0;
> - let COUNT = 0;
> - let POP_COUNT = 0;
> -}
> + let CF_INST = inst;
> + let BARRIER = 1;
> + let JUMPTABLE_SEL = 0;
> + let CF_CONST = 0;
> + let VALID_PIXEL_MODE = 0;
> + let COND = 0;
>
> -def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
> - let COUNT = 0;
> + let Inst{31-0} = Word0;
> + let Inst{63-32} = Word1;
> }
>
> def CF_ALU : ALU_CLAUSE<8, "ALU">;
> @@ -1433,6 +1439,52 @@ let Predicates = [isR600] in {
> let Word1{31} = 1; // BARRIER
> }
> defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
> +
> + def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
> + "TEX $COUNT @$ADDR"> {
> + let POP_COUNT = 0;
> + }
> + def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
> + "VTX $COUNT @$ADDR"> {
> + let POP_COUNT = 0;
> + }
> + def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
> + "LOOP_START_DX10 @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
> + "LOOP_BREAK @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
> + "CONTINUE @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
> + "JUMP @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> + def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
> + "ELSE @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> + def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
> + let ADDR = 0;
> + let COUNT = 0;
> + let POP_COUNT = 0;
> + }
> + def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
> + "POP @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> +
> }
>
> // Helper pattern for normalizing inputs to triginomic instructions for R700+
> @@ -1589,6 +1641,52 @@ let hasSideEffects = 1 in {
> }
> defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
>
> + def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
> + "TEX $COUNT @$ADDR"> {
> + let POP_COUNT = 0;
> + }
> + def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
> + "VTX $COUNT @$ADDR"> {
> + let POP_COUNT = 0;
> + }
> + def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
> + "LOOP_START_DX10 @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
> + "LOOP_BREAK @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
> + "CONTINUE @$ADDR"> {
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
> + def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
> + "JUMP @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> + def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
> + "ELSE @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> + def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
> + let ADDR = 0;
> + let COUNT = 0;
> + let POP_COUNT = 0;
> + }
> + def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
> + "POP @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> +
> +
> //===----------------------------------------------------------------------===//
> // Memory read/write instructions
> //===----------------------------------------------------------------------===//
> --
> 1.8.1.4
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the mesa-dev
mailing list