[Mesa-dev] [PATCH 5/6] AMDGPU: Add control flow optimization

Tue Dec 11 09:43:07 PST 2012

Branch if we have enough instructions so that it makes sense.
Also remove branches if they don't make sense.

Signed-off-by: Christian König <deathsimple at vodafone.de>
---
 lib/Target/AMDGPU/SILowerControlFlow.cpp |   49 ++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 1abcb88..507cb54 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -63,9 +63,13 @@ namespace {
 class SILowerControlFlowPass : public MachineFunctionPass {
 
 private:
+  static const unsigned SkipThreshold = 12;
+
   static char ID;
   const TargetInstrInfo *TII;
 
+  void Skip(MachineInstr &MI, MachineOperand &To);
+
   void If(MachineInstr &MI);
   void Else(MachineInstr &MI);
   void Break(MachineInstr &MI);
@@ -74,6 +78,8 @@ private:
   void Loop(MachineInstr &MI);
   void EndCf(MachineInstr &MI);
 
+  void Branch(MachineInstr &MI);
+
 public:
   SILowerControlFlowPass(TargetMachine &tm) :
     MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
@@ -94,6 +100,31 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
   return new SILowerControlFlowPass(tm);
 }
 
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+  unsigned NumInstr = 0;
+
+  for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
+       NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
+       MBB = *MBB->succ_begin()) {
+
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         NumInstr < SkipThreshold && I != E; ++I) {
+
+      if (I->isBundle() || !I->isBundled())
+        ++NumInstr;
+    }
+  }
+
+  if (NumInstr < SkipThreshold)
+    return;
+
+  DebugLoc DL = From.getDebugLoc();
+  BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+          .addOperand(To)
+          .addReg(AMDGPU::EXEC);
+}
+
 void SILowerControlFlowPass::If(MachineInstr &MI) {
 
   MachineBasicBlock &MBB = *MI.getParent();
@@ -108,6 +139,8 @@ void SILowerControlFlowPass::If(MachineInstr &MI) {
           .addReg(AMDGPU::EXEC)
           .addReg(Reg);
 
+  Skip(MI, MI.getOperand(2));
+
   MI.eraseFromParent();
 }
 
@@ -125,6 +158,8 @@ void SILowerControlFlowPass::Else(MachineInstr &MI) {
           .addReg(AMDGPU::EXEC)
           .addReg(Dst);
 
+  Skip(MI, MI.getOperand(2));
+
   MI.eraseFromParent();
 }
 
@@ -206,6 +241,16 @@ void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
+
+  MachineBasicBlock *Next = MI.getParent()->getNextNode();
+  MachineBasicBlock *Target = MI.getOperand(0).getMBB();
+  if (Target == Next)
+    MI.eraseFromParent();
+  else
+    assert(0);
+}
+
 bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
 
   bool HaveCf = false;
@@ -249,6 +294,10 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
           HaveCf = true;
           EndCf(MI);
           break;
+
+        case AMDGPU::S_BRANCH:
+          Branch(MI);
+          break;
       }
     }
   }
-- 
1.7.9.5