Mesa (master): radeon/llvm: Lower branch/branch_cond into predicated jump

Wed Aug 15 21:21:25 UTC 2012

Module: Mesa
Branch: master
Commit: 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=0eca5fd919b0a31ea926b5f5072e5e56f7a55269

Author: Vincent Lejeune <vljn at ovi.com>
Date:   Wed Aug  1 22:49:42 2012 +0200

radeon/llvm: Lower branch/branch_cond into predicated jump

Signed-off-by: Tom Stellard <thomas.stellard at amd.com>

---

 src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp     |  118 ------------
 src/gallium/drivers/radeon/AMDGPUInstrInfo.h       |   14 --
 .../drivers/radeon/AMDILCFGStructurizer.cpp        |   57 +++++--
 src/gallium/drivers/radeon/AMDILInstrInfo.td       |    2 +-
 src/gallium/drivers/radeon/R600ISelLowering.cpp    |   27 +++
 src/gallium/drivers/radeon/R600InstrInfo.cpp       |  196 ++++++++++++++++++++
 src/gallium/drivers/radeon/R600InstrInfo.h         |    9 +
 7 files changed, 278 insertions(+), 145 deletions(-)

diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
index 2af0367..03a647e 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
@@ -97,124 +97,6 @@ bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
   return false;
 }
 
-bool AMDGPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
-  bool retVal = true;
-  return retVal;
-  MachineBasicBlock::iterator iter = MBB.begin();
-  if (!getNextBranchInstr(iter, MBB)) {
-    retVal = false;
-  } else {
-    MachineInstr *firstBranch = iter;
-    if (!getNextBranchInstr(++iter, MBB)) {
-      if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
-        TBB = firstBranch->getOperand(0).getMBB();
-        firstBranch->eraseFromParent();
-        retVal = false;
-      } else {
-        TBB = firstBranch->getOperand(0).getMBB();
-        FBB = *(++MBB.succ_begin());
-        if (FBB == TBB) {
-          FBB = *(MBB.succ_begin());
-        }
-        Cond.push_back(firstBranch->getOperand(1));
-        retVal = false;
-      }
-    } else {
-      MachineInstr *secondBranch = iter;
-      if (!getNextBranchInstr(++iter, MBB)) {
-        if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
-          TBB = firstBranch->getOperand(0).getMBB();
-          Cond.push_back(firstBranch->getOperand(1));
-          FBB = secondBranch->getOperand(0).getMBB();
-          secondBranch->eraseFromParent();
-          retVal = false;
-        } else {
-          assert(0 && "Should not have two consecutive conditional branches");
-        }
-      } else {
-        MBB.getParent()->viewCFG();
-        assert(0 && "Should not have three branch instructions in"
-               " a single basic block");
-        retVal = false;
-      }
-    }
-  }
-  return retVal;
-}
-
-unsigned int AMDGPUInstrInfo::getBranchInstr(const MachineOperand &op) const {
-  const MachineInstr *MI = op.getParent();
-  
-  switch (MI->getDesc().OpInfo->RegClass) {
-  default: // FIXME: fallthrough??
-  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
-  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
-  };
-}
-
-unsigned int
-AMDGPUInstrInfo::InsertBranch(MachineBasicBlock &MBB,
-                             MachineBasicBlock *TBB,
-                             MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL) const
-{
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  for (unsigned int x = 0; x < Cond.size(); ++x) {
-    Cond[x].getParent()->dump();
-  }
-  if (FBB == 0) {
-    if (Cond.empty()) {
-      BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
-    } else {
-      BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
-        .addMBB(TBB).addReg(Cond[0].getReg());
-    }
-    return 1;
-  } else {
-    BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
-      .addMBB(TBB).addReg(Cond[0].getReg());
-    BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
-  }
-  assert(0 && "Inserting two branches not supported");
-  return 0;
-}
-
-unsigned int AMDGPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) {
-    return 0;
-  }
-  --I;
-  switch (I->getOpcode()) {
-  default:
-    return 0;
-    ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
-  case AMDGPU::BRANCH:
-    I->eraseFromParent();
-    break;
-  }
-  I = MBB.end();
-  
-  if (I == MBB.begin()) {
-    return 1;
-  }
-  --I;
-  switch (I->getOpcode()) {
-    // FIXME: only one case??
-  default:
-    return 1;
-    ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
-    I->eraseFromParent();
-    break;
-  }
-  return 2;
-}
-
 MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
   MachineBasicBlock::iterator tmp = MBB->end();
   if (!MBB->size()) {
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
index 28952cf..31400a7 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -49,8 +49,6 @@ private:
   TargetMachine &TM;
   bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
                           MachineBasicBlock &MBB) const;
-  unsigned int getBranchInstr(const MachineOperand &op) const;
-
 public:
   explicit AMDGPUInstrInfo(TargetMachine &tm);
 
@@ -77,18 +75,6 @@ public:
                         MachineBasicBlock::iterator &MBBI,
                         LiveVariables *LV) const;
 
-  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                     MachineBasicBlock *&FBB,
-                     SmallVectorImpl<MachineOperand> &Cond,
-                     bool AllowModify) const;
-
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-  unsigned
-  InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-               MachineBasicBlock *FBB,
-               const SmallVectorImpl<MachineOperand> &Cond,
-               DebugLoc DL) const;
 
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, DebugLoc DL,
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
index 95a75ac..b167d62 100644
--- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -301,6 +301,7 @@ public:
   bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
 
 private:
+  void reversePredicateSetter(typename BlockT::iterator);
   void   orderBlocks();
   void   printOrderedBlocks(llvm::raw_ostream &OS);
   int patternMatch(BlockT *CurBlock);
@@ -1664,6 +1665,31 @@ void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
 } //mergeLooplandBlock
 
 template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
+{
+  while (I--) {
+    if (I->getOpcode() == AMDGPU::PRED_X) {
+      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+      case OPCODE_IS_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+        return;
+      case OPCODE_IS_NOT_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+        return;
+      case OPCODE_IS_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+        return;
+      case OPCODE_IS_NOT_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+        return;
+      default:
+        assert(0 && "PRED_X Opcode invalid!");
+      }
+    }
+  }
+}
+
+template<class PassT>
 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
                                                  BlockT *exitBlk,
                                                  BlockT *exitLandBlk,
@@ -1695,14 +1721,17 @@ void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
 
   if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
     //break_logical
-    int newOpcode =
-    (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
-                            : CFGTraits::getBreakZeroOpcode(oldOpcode);
+
+    if (trueBranch != exitBlk) {
+      reversePredicateSetter(branchInstrPos);
+    }
+    int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
   } else {
-    int newOpcode =
-    (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
-                            : CFGTraits::getBranchZeroOpcode(oldOpcode);
+    if (trueBranch != exitBlk) {
+      reversePredicateSetter(branchInstr);
+    }
+    int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
     if (exitBlk != exitLandBlk) {
       //splice is insert-before ...
@@ -2765,7 +2794,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBreakNzeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
+      case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
     default:
       assert(0 && "internal error");
     };
@@ -2774,7 +2803,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBreakZeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
+      case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
     default:
       assert(0 && "internal error");
     };
@@ -2783,6 +2812,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBranchNzeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
+    case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
       case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
     default:
@@ -2793,6 +2823,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getBranchZeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
+    case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
       case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
     default:
@@ -2804,7 +2835,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
   static int getContinueNzeroOpcode(int oldOpcode)
   {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
+      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
       default:
         assert(0 && "internal error");
     };
@@ -2813,7 +2844,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static int getContinueZeroOpcode(int oldOpcode) {
     switch(oldOpcode) {
-      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
+      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
     default:
       assert(0 && "internal error");
     };
@@ -2845,6 +2876,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static bool isCondBranch(MachineInstr *instr) {
     switch (instr->getOpcode()) {
+      case AMDGPU::JUMP:
+        return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
       ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
       case AMDGPU::SI_IF_NZ:
       case AMDGPU::SI_IF_Z:
@@ -2857,8 +2890,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
 
   static bool isUncondBranch(MachineInstr *instr) {
     switch (instr->getOpcode()) {
-    case AMDGPU::BRANCH:
-      break;
+    case AMDGPU::JUMP:
+      return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
     default:
       return false;
     }
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td
index b2a0541..b683e49 100644
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.td
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.td
@@ -217,7 +217,7 @@ include "AMDILIntrinsics.td"
 // Custom Inserter for Branches and returns, this eventually will be a
 // seperate pass
 //===---------------------------------------------------------------------===//
-let isTerminator = 1 in {
+let isTerminator = 1, usesCustomInserter = 1 in {
   def BRANCH : ILFormat<(outs), (ins brtarget:$target),
       "; Pseudo unconditional branch instruction",
       [(br bb:$target)]>;
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp
index 26f14fa..1f5f417 100644
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -210,6 +210,33 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
               .addReg(t1, RegState::Implicit);
       break;
     }
+  case AMDGPU::BRANCH:
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+              .addOperand(MI->getOperand(0))
+              .addReg(0);
+      break;
+  case AMDGPU::BRANCH_COND_f32:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+            .addReg(AMDGPU::PREDICATE_BIT)
+            .addOperand(MI->getOperand(1))
+            .addImm(OPCODE_IS_ZERO);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+            .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    break;
+  case AMDGPU::BRANCH_COND_i32:
+    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
+            .addReg(AMDGPU::PREDICATE_BIT)
+            .addOperand(MI->getOperand(1))
+            .addImm(OPCODE_IS_ZERO_INT);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+            .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+   break;
 
 
   }
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
index c807d5c..4a396ef 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -17,6 +17,7 @@
 #include "R600RegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "AMDILUtilityFunctions.h"
+#include "AMDGPUUtil.h"
 
 #define GET_INSTRINFO_CTOR
 #include "AMDGPUGenDFAPacketizer.inc"
@@ -94,6 +95,8 @@ unsigned R600InstrInfo::getIEQOpcode() const
 
 bool R600InstrInfo::isMov(unsigned Opcode) const
 {
+
+
   switch(Opcode) {
   default: return false;
   case AMDGPU::MOV:
@@ -188,6 +191,199 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
   return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
 }
 
+static bool
+isPredicateSetter(unsigned opcode)
+{
+  switch (opcode) {
+  case AMDGPU::PRED_X:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I)
+{
+  while (I != MBB.begin()) {
+    --I;
+    MachineInstr *MI = I;
+    if (isPredicateSetter(MI->getOpcode()))
+      return MI;
+  }
+
+  return NULL;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const
+{
+  // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+    return false;
+  }
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() ||
+      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+    if (LastOpc == AMDGPU::JUMP) {
+      if(!isPredicated(LastInst)) {
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        MachineInstr *predSet = I;
+        while (!isPredicateSetter(predSet->getOpcode())) {
+          predSet = --I;
+        }
+        TBB = LastInst->getOperand(0).getMBB();
+        Cond.push_back(predSet->getOperand(1));
+        Cond.push_back(predSet->getOperand(2));
+        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+        return false;
+      }
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (SecondLastOpc == AMDGPU::JUMP &&
+      isPredicated(SecondLastInst) &&
+      LastOpc == AMDGPU::JUMP &&
+      !isPredicated(LastInst)) {
+    MachineInstr *predSet = --I;
+    while (!isPredicateSetter(predSet->getOpcode())) {
+      predSet = --I;
+    }
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    FBB = LastInst->getOperand(0).getMBB();
+    Cond.push_back(predSet->getOperand(1));
+    Cond.push_back(predSet->getOperand(2));
+    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+  const MachineInstr *MI = op.getParent();
+
+  switch (MI->getDesc().OpInfo->RegClass) {
+  default: // FIXME: fallthrough??
+  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+  };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                            MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond,
+                            DebugLoc DL) const
+{
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  if (FBB == 0) {
+    if (Cond.empty()) {
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+      return 1;
+    } else {
+      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+      assert(PredSet && "No previous predicate !");
+      PredSet->getOperand(1).addTargetFlag(1<<4);
+      PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+             .addMBB(TBB)
+             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+      return 1;
+    }
+  } else {
+    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+    assert(PredSet && "No previous predicate !");
+    PredSet->getOperand(1).addTargetFlag(1<<4);
+    PredSet->getOperand(2).setImm(Cond[1].getImm());
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+            .addMBB(TBB)
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+    return 2;
+  }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+
+  // Note : we leave PRED* instructions there.
+  // They may be needed when predicating instructions.
+
+  MachineBasicBlock::iterator I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 0;
+  }
+  --I;
+  switch (I->getOpcode()) {
+  default:
+    return 0;
+  case AMDGPU::JUMP:
+    if (isPredicated(I)) {
+      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+      predSet->getOperand(1).setTargetFlags(flag);
+    }
+    I->eraseFromParent();
+    break;
+  }
+  I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 1;
+  }
+  --I;
+  switch (I->getOpcode()) {
+    // FIXME: only one case??
+  default:
+    return 1;
+  case AMDGPU::JUMP:
+    if (isPredicated(I)) {
+      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
+      predSet->getOperand(1).setTargetFlags(flag);
+    }
+    I->eraseFromParent();
+    break;
+  }
+  return 2;
+}
+
 bool
 R600InstrInfo::isPredicated(const MachineInstr *MI) const
 {
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
index 9bdda7a..2819b0b 100644
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -34,6 +34,8 @@ namespace llvm {
   const R600RegisterInfo RI;
   AMDGPUTargetMachine &TM;
 
+  int getBranchInstr(const MachineOperand &op) const;
+
   public:
   explicit R600InstrInfo(AMDGPUTargetMachine &tm);
 
@@ -62,6 +64,13 @@ namespace llvm {
 
   DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
                                            const ScheduleDAG *DAG) const;
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
   bool isPredicated(const MachineInstr *MI) const;
 
   bool isPredicable(MachineInstr *MI) const;