[Mesa-dev] [PATCH 1/2] R600: Optimize and cleanup KILL on SI
Christian König
deathsimple at vodafone.de
Mon Dec 17 06:32:23 PST 2012
We shouldn't insert KILL optimization if we don't have a
kill instruction at all.
Signed-off-by: Christian König <deathsimple at vodafone.de>
---
lib/Target/AMDGPU/SIISelLowering.cpp | 14 ----
lib/Target/AMDGPU/SIISelLowering.h | 2 -
lib/Target/AMDGPU/SIInstructions.td | 24 +++---
lib/Target/AMDGPU/SILowerControlFlow.cpp | 125 ++++++++++++++++++++----------
4 files changed, 94 insertions(+), 71 deletions(-)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index cd6e0e9..435b0b3 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::SI_INTERP_CONST:
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
break;
- case AMDGPU::SI_KIL:
- LowerSI_KIL(MI, *BB, I, MRI);
- break;
case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI);
break;
@@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
MI->eraseFromParent();
}
-void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
- // Clear this pixel from the exec mask if the operand is negative
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
- AMDGPU::VCC)
- .addReg(AMDGPU::SREG_LIT_0)
- .addOperand(MI->getOperand(0));
-
- MI->eraseFromParent();
-}
-
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index c088112..db36eef 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
- void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 005be96..cac42da 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI <
imm:$attr, SReg_32:$params))]
>;
-def SI_KIL : InstSI <
- (outs),
- (ins VReg_32:$src),
- "SI_KIL $src",
- [(int_AMDGPU_kill VReg_32:$src)]
->;
-
def SI_WQM : InstSI <
(outs),
(ins),
@@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI <
[(int_SI_end_cf SReg_64:$saved)]
>;
+def SI_KILL : InstSI <
+ (outs),
+ (ins VReg_32:$src),
+ "SI_KIL $src",
+ [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
} // end IsCodeGenOnly, isPseudo
+def : Pat <
+ (int_AMDGPU_kilp),
+ (SI_KILL (V_MOV_IMM_I32 0xbf800000))
+>;
+
/* int_SI_vs_load_input */
def : Pat<
(int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
@@ -1315,11 +1320,6 @@ def : Pat<
>;
def : Pat <
- (int_AMDGPU_kilp),
- (SI_KIL (V_MOV_IMM_I32 0xbf800000))
->;
-
-def : Pat <
(int_AMDGPU_cube VReg_128:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
(V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 507cb54..34e5f17 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -68,7 +68,10 @@ private:
static char ID;
const TargetInstrInfo *TII;
- void Skip(MachineInstr &MI, MachineOperand &To);
+ bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+ void Skip(MachineInstr &From, MachineOperand &To);
+ void SkipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
void Else(MachineInstr &MI);
@@ -78,6 +81,7 @@ private:
void Loop(MachineInstr &MI);
void EndCf(MachineInstr &MI);
+ void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
public:
@@ -100,23 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
-void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+ MachineBasicBlock *To) {
unsigned NumInstr = 0;
- for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
- NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
+ for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
MBB = *MBB->succ_begin()) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
NumInstr < SkipThreshold && I != E; ++I) {
if (I->isBundle() || !I->isBundled())
- ++NumInstr;
+ if (++NumInstr >= SkipThreshold)
+ return true;
}
}
- if (NumInstr < SkipThreshold)
+ return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+ if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
return;
DebugLoc DL = From.getDebugLoc();
@@ -125,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
.addReg(AMDGPU::EXEC);
}
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+ return;
+
+ MachineBasicBlock::iterator Insert = &MI;
+ ++Insert;
+
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
void SILowerControlFlowPass::If(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -251,9 +293,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
assert(0);
}
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Kill is only allowed in pixel shaders
+ MachineFunction &MF = *MBB.getParent();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ assert(Info->ShaderType == ShaderType::PIXEL);
+
+ // Clear this pixel from the exec mask if the operand is negative
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addOperand(MI.getOperand(0));
+
+ MI.eraseFromParent();
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- bool HaveCf = false;
+ bool HaveKill = false;
+ unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -267,6 +328,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
+ ++Depth;
If(MI);
break;
@@ -287,14 +349,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
break;
case AMDGPU::SI_LOOP:
+ ++Depth;
Loop(MI);
break;
case AMDGPU::SI_END_CF:
- HaveCf = true;
+ if (--Depth == 0 && HaveKill) {
+ SkipIfDead(MI);
+ HaveKill = false;
+ }
EndCf(MI);
break;
+ case AMDGPU::SI_KILL:
+ if (Depth == 0)
+ SkipIfDead(MI);
+ else
+ HaveKill = true;
+ Kill(MI);
+ break;
+
case AMDGPU::S_BRANCH:
Branch(MI);
break;
@@ -302,40 +376,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // TODO: What is this good for?
- unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
- if (HaveCf && ShaderType == ShaderType::PIXEL) {
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- if (MBB.succ_empty()) {
-
- MachineInstr &MI = *MBB.getFirstNonPHI();
- DebugLoc DL = MI.getDebugLoc();
-
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
-
- // Exec mask is zero: Export to NULL target...
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0);
-
- // ... and terminate wavefront
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
- }
- }
- }
-
return true;
}
--
1.7.9.5
More information about the mesa-dev
mailing list