[Beignet] [PATCH] add selection IR SEL_OP_LABEL and SEL_OP_BRANCH.

xionghu.luo at intel.com xionghu.luo at intel.com
Wed Feb 8 10:58:36 UTC 2017


From: Luo Xionghu <xionghu.luo at intel.com>

 lower LABEL, backward/forward BRANCH after instruction selection.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/CMakeLists.txt                         |   1 +
 backend/src/backend/gen_context.cpp                |   2 +
 backend/src/backend/gen_insn_selection.cpp         | 170 +++-----
 backend/src/backend/gen_insn_selection.hpp         |  17 +-
 backend/src/backend/gen_insn_selection.hxx         |   1 +
 .../backend/gen_insn_selection_branch_lowering.cpp | 468 +++++++++++++++++++++
 backend/src/backend/gen_insn_selection_passes.hpp  |  30 ++
 backend/src/sys/intrusive_list.hpp                 |   4 +
 8 files changed, 581 insertions(+), 112 deletions(-)
 create mode 100644 backend/src/backend/gen_insn_selection_branch_lowering.cpp
 create mode 100644 backend/src/backend/gen_insn_selection_passes.hpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 6ff25e7..d1f3f32 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -104,6 +104,7 @@ set (GBE_SRC
     backend/gen_insn_selection.cpp
     backend/gen_insn_selection.hpp
     backend/gen_insn_selection_optimize.cpp
+    backend/gen_insn_selection_branch_lowering.cpp
     backend/gen_insn_scheduling.cpp
     backend/gen_insn_scheduling.hpp
     backend/gen_insn_selection_output.cpp
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index c8019e3..19550a3 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -27,6 +27,7 @@
 #include "backend/gen_defs.hpp"
 #include "backend/gen_encoder.hpp"
 #include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_selection_passes.hpp"
 #include "backend/gen_insn_scheduling.hpp"
 #include "backend/gen_insn_selection_output.hpp"
 #include "backend/gen_reg_allocation.hpp"
@@ -4047,6 +4048,7 @@ namespace gbe
     if (OCL_OUTPUT_SEL_IR)
       outputSelectionIR(*this, this->sel, genKernel->getName());
     schedulePreRegAllocation(*this, *this->sel);
+    lowerBranch(this->sel);
     sel->addID();
     if (UNLIKELY(ra->allocate(*this->sel) == false))
       return false;
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 128c2bc..e7ff970 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -291,6 +291,11 @@ namespace gbe
     insn->parent = this;
   }
 
+  void SelectionBlock::insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn) {
+    this->insnList.insert_after(prevInsn, insn);
+    insn->parent = this;
+  }
+
   void SelectionBlock::append(SelectionVector *vec) {
     this->vectorList.push_back(vec);
   }
@@ -646,7 +651,7 @@ namespace gbe
     /*! Encode a barrier instruction */
     void FENCE(GenRegister dst);
     /*! Encode a label instruction */
-    void LABEL(ir::LabelIndex label);
+    void LABEL(ir::LabelIndex label, ir::LabelIndex jip);
     /*! Jump indexed instruction, return the encoded instruction count according to jump distance. */
     int JMPI(Reg src, ir::LabelIndex target, ir::LabelIndex origin);
     /*! IF indexed instruction */
@@ -661,6 +666,8 @@ namespace gbe
     void BRD(Reg src, ir::LabelIndex jip);
     /*! BRC indexed instruction */
     void BRC(Reg src, ir::LabelIndex jip, ir::LabelIndex uip);
+    /*! BRANCH instruction */
+    void BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t pred_index, uint32_t jip);
     /*! Compare instructions */
     void CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst = GenRegister::null());
     /*! Select instruction with embedded comparison */
@@ -843,6 +850,12 @@ namespace gbe
       return temps;
     }
 
+    INLINE ir::LabelIndex newAuxLabel()
+    {
+      currAuxLabel++;
+      return (ir::LabelIndex)currAuxLabel;
+    }
+
     /*! Use custom allocators */
     GBE_CLASS(Opaque);
     friend class SelectionBlock;
@@ -858,12 +871,6 @@ namespace gbe
     bool bHasSends;
     uint32_t ldMsgOrder;
     bool slowByteGather;
-    INLINE ir::LabelIndex newAuxLabel()
-    {
-      currAuxLabel++;
-      return (ir::LabelIndex)currAuxLabel;
-    }
-
   };
 
   ///////////////////////////////////////////////////////////////////////////
@@ -1244,9 +1251,10 @@ namespace gbe
   /*! Syntactic sugar for method declaration */
   typedef const GenRegister &Reg;
 
-  void Selection::Opaque::LABEL(ir::LabelIndex index) {
+  void Selection::Opaque::LABEL(ir::LabelIndex index, ir::LabelIndex jip) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_LABEL, 0, 0);
     insn->index = index.value();
+    insn->index1 = jip.value();
   }
 
   void Selection::Opaque::BARRIER(GenRegister src, GenRegister fence, uint32_t barrierType) {
@@ -1294,6 +1302,15 @@ namespace gbe
     insn->index1 = uip.value();
   }
 
+  void Selection::Opaque::BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t pred_index, uint32_t jip) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_BRANCH, 0, 1);
+    insn->src(0) = reg;
+    insn->index = src.value();
+    insn->index1 = dst.value();
+    insn->jip = jip;
+    insn->extra.pred_index = pred_index;
+  }
+
   void Selection::Opaque::IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_IF, 0, 1);
     insn->src(0) = src;
@@ -1306,7 +1323,7 @@ namespace gbe
     SelectionInstruction *insn = this->appendInsn(SEL_OP_ELSE, 0, 1);
     insn->src(0) = src;
     insn->index = jip.value();
-    this->LABEL(elseLabel);
+    this->LABEL(elseLabel, ir::LabelIndex(0));
   }
 
   void Selection::Opaque::ENDIF(Reg src, ir::LabelIndex jip, ir::LabelIndex endifLabel) {
@@ -1314,7 +1331,7 @@ namespace gbe
       this->block->endifLabel = this->newAuxLabel();
     else
       this->block->endifLabel = endifLabel;
-    this->LABEL(this->block->endifLabel);
+    this->LABEL(this->block->endifLabel, ir::LabelIndex(0));
     SelectionInstruction *insn = this->appendInsn(SEL_OP_ENDIF, 0, 1);
     insn->src(0) = src;
     insn->index = this->block->endifLabel.value();
@@ -2530,7 +2547,7 @@ namespace gbe
     this->block->hasBranch = bb.getLastInstruction()->getOpcode() == OP_BRA ||
                              bb.getLastInstruction()->getOpcode() == OP_RET;
     if (!this->block->hasBranch)
-      this->block->endifOffset = -1;
+      this->block->needJump = false;
 
     // Build the DAG on the fly
     uint32_t insnNum = 0;
@@ -2604,7 +2621,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
     // Bottom up code generation
     bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier;
     needEndif = needEndif && bb.needEndif;
-    this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb, insnNum);
+    this->block->removeSimpleIfEndif = false;//insnNum < 10 && isSimpleBlock(bb, insnNum);
     if (needEndif && !this->block->removeSimpleIfEndif) {
       if(!bb.needIf) // this basic block is the exit of a structure
         this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel);
@@ -2750,7 +2767,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
   ///////////////////////////////////////////////////////////////////////////
   // Code selection public implementation
   ///////////////////////////////////////////////////////////////////////////
-  const GenContext& Selection::getCtx()
+  GenContext& Selection::getCtx()
   {
     return this->opaque->ctx;
   }
@@ -2904,6 +2921,12 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
     return this->opaque->isPartialWrite(reg);
   }
 
+  GenRegister Selection::selReg(ir::Register reg, ir::Type type) const {
+    return this->opaque->selReg(reg, type);
+  }
+
+  ir::LabelIndex Selection::newAuxLabel() { return this->opaque->newAuxLabel(); }
+
   SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) {
     return this->opaque->create(opcode, dstNum, srcNum);
   }
@@ -6602,18 +6625,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
     {
       using namespace ir;
       const LabelIndex label = insn.getLabelIndex();
-      const GenRegister src0 = sel.getBlockIP();
-      const GenRegister src1 = sel.getLabelImmReg(label);
-      const uint32_t simdWidth = sel.ctx.getSimdWidth();
       GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling");
-      sel.LABEL(label);
 
-      if(!insn.getParent()->needIf)
+      if(!insn.getParent()->needIf) {
+        sel.LABEL(label, LabelIndex(0));
         return true;
+      }
 
       // Do not emit any code for the "returning" block. There is no need for it
-      if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock())
+      if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock()) {
+        sel.LABEL(label, LabelIndex(0));
         return true;
+      }
 
       LabelIndex jip;
       const LabelIndex nextLabel = insn.getParent()->getNextBlock()->getLabelIndex();
@@ -6621,85 +6644,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
         jip = sel.ctx.getLabelIndex(&insn);
       else
         jip = nextLabel;
-
-      // Emit the mask computation at the head of each basic block
-      sel.push();
-        sel.curr.noMask = 1;
-        sel.curr.predicate = GEN_PREDICATE_NONE;
-        sel.curr.flag = 0;
-        sel.curr.subFlag = 1;
-        sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
-      sel.pop();
-
-      if (sel.block->hasBarrier) {
-        // If this block has barrier, we don't execute the block until all lanes
-        // are 1s. Set each reached lane to 1, then check all lanes. If there is any
-        // lane not reached, we jump to jip. And no need to issue if/endif for
-        // this block, as it will always excute with all lanes activated.
-        sel.push();
-          sel.curr.predicate = GEN_PREDICATE_NORMAL;
-          sel.curr.flag = 0;
-          sel.curr.subFlag = 1;
-          sel.setBlockIP(src0, sel.ctx.getMaxLabel());
-          sel.curr.predicate = GEN_PREDICATE_NONE;
-          sel.curr.noMask = 1;
-          sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel());
-          if (simdWidth == 8)
-            sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
-          else if (simdWidth == 16)
-            sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
-          else
-            NOT_IMPLEMENTED;
-          sel.curr.noMask = 1;
-          sel.curr.execWidth = 1;
-          sel.curr.inversePredicate = 1;
-          sel.JMPI(GenRegister::immd(0), jip, label);
-        sel.pop();
-        // FIXME, if the last BRA is unconditional jump, we don't need to update the label here.
-        sel.push();
-         sel.curr.predicate = GEN_PREDICATE_NORMAL;
-         sel.curr.flag = 0;
-         sel.curr.subFlag = 1;
-         sel.setBlockIP(src0, label.value());
-        sel.pop();
-      }
-      else {
-        if (sel.ctx.hasJIP(&insn) &&
-            // If jump to next label and the endif offset is -1, then
-            // We don't need to add a jmpi here, as the following IF will do the same
-            // thing if all channels are disabled.
-            (jip != nextLabel || sel.block->endifOffset != -1)) {
-          // If it is required, insert a JUMP to bypass the block
-          sel.push();
-            sel.curr.flag = 0;
-            sel.curr.subFlag = 1;
-            if (simdWidth == 8)
-              sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
-            else if (simdWidth == 16)
-              sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
-            else
-              NOT_IMPLEMENTED;
-            sel.curr.noMask = 1;
-            sel.curr.execWidth = 1;
-            sel.curr.inversePredicate = 1;
-            sel.JMPI(GenRegister::immd(0), jip, label);
-          sel.pop();
-        }
-        if(!sel.block->removeSimpleIfEndif){
-          sel.push();
-            sel.curr.flag = 0;
-            sel.curr.subFlag = 1;
-            sel.curr.predicate = GEN_PREDICATE_NORMAL;
-            if(!insn.getParent()->needEndif && insn.getParent()->needIf) {
-              ir::LabelIndex label = insn.getParent()->endifLabel;
-              sel.IF(GenRegister::immd(0), label, label);
-            }
-            else
-              sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel);
-          sel.pop();
-        }
-      }
-
+      sel.LABEL(label, jip);
       return true;
     }
     DECL_CTOR(LabelInstruction, 1, 1);
@@ -7259,7 +7204,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
           sel.curr.predicate = GEN_PREDICATE_NONE;
           if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
             sel.ENDIF(GenRegister::immd(0), nextLabel);
-          sel.block->endifOffset = -1;
+          sel.block->needJump = false;
         sel.pop();
       } else {
         // Update the PcIPs
@@ -7275,7 +7220,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
           else if(insn.getParent()->needEndif)
             sel.ENDIF(GenRegister::immd(0), nextLabel);
         }
-        sel.block->endifOffset = -1;
+        sel.block->needJump = false;
         if (nextLabel == jip) return;
         // Branch to the jump target
         sel.push();
@@ -7283,7 +7228,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
           sel.curr.noMask = 1;
           sel.curr.predicate = GEN_PREDICATE_NONE;
           // Actually, the origin of this JMPI should be the beginning of next BB.
-          sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr->getLabelIndex().value() + 1));
+          sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr->getLabelIndex().value() + 1));
+          sel.block->needJump = true;
         sel.pop();
       }
     }
@@ -7317,7 +7263,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
           sel.curr.flagIndex = pred.value();
           sel.curr.predicate = GEN_PREDICATE_NORMAL;
           sel.setBlockIP(ip, dst.value());
-          sel.block->endifOffset = -1;
+          sel.block->needJump = false;
           sel.curr.predicate = GEN_PREDICATE_NONE;
           if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
             sel.ENDIF(GenRegister::immd(0), next);
@@ -7327,7 +7273,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
           else
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
           sel.curr.noMask = 1;
-          sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label);
+          sel.JMPI(GenRegister::immd(0), jip, label);
+          sel.block->needJump = true;
         sel.pop();
       } else {
         const LabelIndex next = bb.getNextBlock()->getLabelIndex();
@@ -7336,7 +7283,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
         sel.curr.subFlag = 1;
         if(insn.getParent()->needEndif)
         sel.setBlockIP(ip, dst.value());
-        sel.block->endifOffset = -1;
+        sel.block->needJump = false;
         if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
           if(insn.getParent()->needEndif && !insn.getParent()->needIf)
             sel.ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel);
@@ -7348,7 +7295,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
           sel.curr.execWidth = 1;
           sel.curr.noMask = 1;
           sel.curr.predicate = GEN_PREDICATE_NONE;
-          sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label);
+          sel.JMPI(GenRegister::immd(0), jip, label);
+          sel.block->needJump = true;
         sel.pop();
       }
     }
@@ -7362,6 +7310,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
       else if (opcode == OP_BRA) {
         const LabelIndex dst = insn.getLabelIndex();
         const LabelIndex src = insn.getParent()->getLabelIndex();
+        const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
 
         sel.push();
         if (insn.isPredicated() == true) {
@@ -7369,11 +7318,12 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
             sel.curr.externFlag = 1;
         }
 
-        // We handle foward and backward branches differently
-        if (uint32_t(dst) <= uint32_t(src))
-          this->emitBackwardBranch(sel, insn, dst, src);
-        else
-          this->emitForwardBranch(sel, insn, dst, src);
+        if (insn.isPredicated() == true) {
+            const Register pred = insn.getPredicateIndex();
+            sel.BRANCH(GenRegister::immd(0), dst, src, pred.value(), jip.value());
+        } else {
+            sel.BRANCH(GenRegister::immd(0), dst, src, 0, jip.value());
+        }
         sel.pop();
       }
       else if(opcode == OP_IF) {
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 01999a2..fbbb826 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -68,6 +68,8 @@ namespace gbe
   public:
     /*! Owns the instruction */
     SelectionBlock *parent;
+    /*! Get the parent Selection block */
+    SelectionBlock *getParent(void) { return parent; }
     /*! Append an instruction before this one */
     void prepend(SelectionInstruction &insn);
     /*! Append an instruction after this one */
@@ -160,6 +162,7 @@ namespace gbe
         uint16_t printfSize;
       };
       uint32_t workgroupOp;
+      uint32_t pred_index;
     } extra;
     /*! Gen opcode */
     uint8_t opcode;
@@ -171,6 +174,8 @@ namespace gbe
     uint32_t index;
     /*! For BRC/IF to store the UIP */
     uint32_t index1;
+    /*! for BRANCH to store jip */
+    uint32_t jip;
     /*! instruction ID used for vector allocation. */
     uint32_t ID;
     DebugInfo DBGInfo;
@@ -258,8 +263,10 @@ namespace gbe
     void append(SelectionInstruction *insn);
     /*! Append a new selection instruction at the beginning of the block */
     void prepend(SelectionInstruction *insn);
+    /*! insert a new selection instruction after prevInsn */
+    void insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn);
     ir::LabelIndex endifLabel;
-    int endifOffset;
+    bool needJump;
     bool hasBarrier;
     bool hasBranch;
     bool removeSimpleIfEndif;
@@ -305,6 +312,10 @@ namespace gbe
     bool isScalarReg(const ir::Register &reg) const;
     /*! is this register a partially written register.*/
     bool isPartialWrite(const ir::Register &reg) const;
+    /*! create GenRegister for ir Register.*/
+    GenRegister selReg(ir::Register reg, ir::Type type) const;
+
+    ir::LabelIndex newAuxLabel();
     /*! Create a new selection instruction */
     SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
     /*! List of emitted blocks */
@@ -316,11 +327,13 @@ namespace gbe
 
     /* optimize at selection IR level */
     void optimize(void);
+    /* branch lower at selection IR level */
+    void branchLowering(void);
     uint32_t opt_features;
 
     /* Add insn ID for sel IR */
     void addID(void);
-    const GenContext &getCtx();
+    GenContext &getCtx();
 
     /*! Use custom allocators */
     GBE_CLASS(Selection);
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 5d96e9e..9e4806b 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -90,6 +90,7 @@ DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
 DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction)
 DECL_SELECTION_IR(CONVF_TO_I64, FloatToI64Instruction)
 DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction)
+DECL_SELECTION_IR(BRANCH, UnaryInstruction)
 DECL_SELECTION_IR(BRC, UnaryInstruction)
 DECL_SELECTION_IR(BRD, UnaryInstruction)
 DECL_SELECTION_IR(IF, UnaryInstruction)
diff --git a/backend/src/backend/gen_insn_selection_branch_lowering.cpp b/backend/src/backend/gen_insn_selection_branch_lowering.cpp
new file mode 100644
index 0000000..92efbdb
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection_branch_lowering.cpp
@@ -0,0 +1,468 @@
+
+#include "backend/gen_insn_selection.hpp"
+#include "backend/gen_insn_selection_passes.hpp"
+#include "backend/gen_context.hpp"
+#include "ir/function.hpp"
+#include "ir/liveness.hpp"
+#include "ir/profile.hpp"
+#include "sys/cvar.hpp"
+#include "sys/vector.hpp"
+#include <algorithm>
+#include <climits>
+#include <map>
+
+namespace gbe
+{
+  class BranchLowering
+  {
+  public:
+    /*! To make function prototypes more readable */
+    typedef const GenRegister &Reg;
+    typedef const GenInstructionState &State;
+    BranchLowering(GenContext &ctx, SelectionBlock &selblock)
+        : ctx(ctx), selBlock(selblock) {}
+    void run();
+    void insnLower();
+    void lowerBRANCH(SelectionInstruction &insn);
+    void lowerForwardBRANCH(SelectionInstruction &insn);
+    void lowerBackwardBRANCH(SelectionInstruction &insn);
+    void lowerLABEL(SelectionInstruction &insn);
+
+    /* Get current block IP register according to label width. */
+    GenRegister getBlockIP() {
+      return ctx.isDWLabel() ? ctx.sel->selReg(ir::ocl::dwblockip, ir::TYPE_U32) : ctx.sel->selReg(ir::ocl::blockip, ir::TYPE_U32);
+    }
+
+    SelectionInstruction *setBlockIP(GenRegister blockip, uint32_t labelValue,
+                                     State state,
+                                     SelectionInstruction *prevInsn);
+    SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip,
+                                     GenRegister labelReg, State state,
+                                     SelectionInstruction *prevInsn);
+    SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip,
+                                     uint32_t labelValue, State state,
+                                     SelectionInstruction *prevInsn);
+    SelectionInstruction *MOV(Reg dst, Reg src, State state,
+                              SelectionInstruction *prevInsn);
+    SelectionInstruction *CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst,
+                              State state, SelectionInstruction *prevInsn);
+    SelectionInstruction *JMPI(Reg src, ir::LabelIndex index,
+                               ir::LabelIndex origin, State state,
+                               SelectionInstruction *prevInsn);
+    SelectionInstruction *IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip,
+                             State state, SelectionInstruction *prevInsn);
+    SelectionInstruction *ENDIF(Reg src, ir::LabelIndex jip,
+                                ir::LabelIndex endifLabel, State state,
+                                SelectionInstruction *prevInsn);
+    SelectionInstruction *LABEL(ir::LabelIndex index, ir::LabelIndex jip,
+                                State state, SelectionInstruction *prevInsn);
+    SelectionInstruction *generateInsn(SelectionOpcode opcode, uint32_t dstNum,
+                                       uint32_t srcNum, State state,
+                                       SelectionInstruction *prevInsn);
+    ~BranchLowering() {}
+
+  protected:
+    GenContext &ctx;      //in case that we need it
+    SelectionBlock &selBlock;
+    bool lowered;
+  };
+
+  void BranchLowering::insnLower()
+  {
+      //for (auto &insn : selBlock.insnList) {
+      for (auto iter = selBlock.insnList.end() ; iter != selBlock.insnList.begin(); ) {
+          iter--;
+        SelectionInstruction &insn = *iter;
+        if (insn.opcode == SEL_OP_BRANCH) {
+          lowerBRANCH(insn);
+        }
+        else if (insn.opcode == SEL_OP_LABEL) {
+          lowerLABEL(insn);
+        }
+      }
+      for (auto iter = selBlock.insnList.begin() ; iter != selBlock.insnList.end(); iter++) {
+        SelectionInstruction &insn = *iter;
+        if (insn.opcode == SEL_OP_BRANCH) {
+          iter = selBlock.insnList.erase(&insn);
+        }
+      }
+
+  }
+
+  void BranchLowering::lowerBackwardBRANCH(SelectionInstruction &insn)
+  {
+      using namespace ir;
+      SelectionInstruction *prev_insn = &insn;
+      const GenRegister ip = getBlockIP();
+      const BasicBlock *currBB = insn.getParent()->bb;
+      const BasicBlock *nextBB = currBB->getNextBlock();
+      const LabelIndex nextLabel = nextBB->getLabelIndex();
+      const uint32_t jip = insn.jip;
+      const LabelIndex label = LabelIndex(insn.index1);
+      uint32_t dst = insn.index;
+      uint32_t predIndex = insn.extra.pred_index;
+      const uint32_t simdWidth = ctx.getSimdWidth();
+      GBE_ASSERT(nextBB != NULL);
+
+      if (predIndex != 0) {
+
+        // Update the PcIPs for all the branches. Just put the IPs of the next
+        // block. Next instruction will properly update the IPs of the lanes
+        // that actually take the branch
+        {
+          GenInstructionState curr = insn.state;
+          prev_insn = setBlockIP(ip, nextLabel.value(), curr, prev_insn);
+        }
+        GBE_ASSERT(jip == dst);
+        GenInstructionState curr;
+        curr.execWidth = simdWidth;
+        curr.physicalFlag = 0;
+        curr.flagIndex = predIndex;
+        curr.predicate = GEN_PREDICATE_NORMAL;
+        prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+        curr.predicate = GEN_PREDICATE_NONE;
+        if (!selBlock.hasBarrier)
+          prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+        curr.execWidth = 1;
+        if (simdWidth == 16)
+          curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+        else
+          curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+        curr.noMask = 1;
+        prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label), curr, prev_insn);
+        selBlock.needJump = true;
+      } else {
+        // Update the PcIPs
+        GenInstructionState curr = insn.state;
+        curr.flag = 0;
+        curr.subFlag = 1;
+        if(insn.getParent()->bb->needEndif)
+        prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+        if (!selBlock.hasBarrier) {
+          if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf)
+            prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel, curr, prev_insn);
+          else if(insn.getParent()->bb->needEndif)
+            prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+        }
+        // Branch to the jump target
+        {
+          GenInstructionState curr;
+          curr.execWidth = simdWidth;
+          curr.execWidth = 1;
+          curr.noMask = 1;
+          curr.predicate = GEN_PREDICATE_NONE;
+          prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label), curr, prev_insn);
+          selBlock.needJump = true;
+        }
+      }
+  }
+
+  void BranchLowering::lowerForwardBRANCH(SelectionInstruction &insn)
+  {
+    using namespace ir;
+    SelectionInstruction *prev_insn = &insn;
+    const uint32_t simdWidth = ctx.getSimdWidth();
+    uint32_t dst = insn.index;
+    uint32_t predIndex = insn.extra.pred_index;
+    const uint32_t jipValue = insn.jip;
+    const GenRegister ip = getBlockIP();
+    // We will not emit any jump if we must go the next block anyway
+    const BasicBlock *currBB = insn.getParent()->bb;
+    const BasicBlock *nextBB = currBB->getNextBlock();
+    const LabelIndex nextLabel = nextBB->getLabelIndex();
+    if (predIndex != 0) {
+        GenInstructionState curr = insn.state;
+          // we don't need to set next label to the pcip
+          // as if there is no backward jump latter, then obviously everything will work fine.
+          // If there is backward jump latter, then all the pcip will be updated correctly there.
+          curr.execWidth = simdWidth;
+          curr.physicalFlag = 0;
+          curr.flagIndex = predIndex;
+          curr.predicate = GEN_PREDICATE_NORMAL;
+          prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+          curr.predicate = GEN_PREDICATE_NONE;
+          if (!selBlock.hasBarrier)
+            prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+          selBlock.needJump = false;
+    } else {
+        // Update the PcIPs
+        GenInstructionState curr = insn.state;
+        curr.flag = 0;
+        curr.subFlag = 1;
+        if(insn.getParent()->bb->needEndif)
+          prev_insn = setBlockIP(ip, dst, curr, prev_insn);
+
+        if (!selBlock.hasBarrier) {
+          if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf)
+            prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->bb->endifLabel, insn.getParent()->bb->endifLabel, curr, prev_insn);
+          else if(insn.getParent()->bb->needEndif)
+            prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn);
+        }
+        selBlock.needJump = false;
+        if (nextLabel == jipValue) return;
+        // Branch to the jump target
+        {
+          GenInstructionState curr;
+          curr.execWidth = simdWidth;
+          curr.execWidth = 1;
+          curr.noMask = 1;
+          curr.predicate = GEN_PREDICATE_NONE;
+          // Actually, the origin of this JMPI should be the beginning of next
+          // BB.
+          prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
+               ir::LabelIndex(currBB->getLabelIndex().value() + 1), curr,
+               prev_insn);
+          selBlock.needJump = true;
+        }
+    }
+  }
+
+  void BranchLowering::lowerBRANCH(SelectionInstruction& insn)
+  {
+    uint32_t dst = insn.index;
+    uint32_t src = insn.index1;
+    if (dst <= src) {
+        lowerBackwardBRANCH(insn);
+    } else {
+        lowerForwardBRANCH(insn);
+    }
+  }
+
+  void BranchLowering::lowerLABEL(SelectionInstruction& insn)
+  {
+    //src0, src1, jip, nextLabel, hasBarrier, sel
+      using namespace ir;
+      uint32_t labelValue = insn.index;
+      const GenRegister src0 = getBlockIP();
+      const GenRegister src1 = ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue);
+      const uint32_t simdWidth = ctx.getSimdWidth();
+
+      if (insn.getParent()->bb == &ctx.getFunction().getBottomBlock()) {
+        return;
+      }
+
+      const LabelIndex nextLabel = insn.getParent()->bb->getNextBlock()->getLabelIndex();
+      uint32_t jipValue = insn.index1;
+      if(jipValue == 0)
+          return;
+
+      SelectionInstruction* prev_insn = &insn;
+      GenInstructionState curr;
+      curr.execWidth = simdWidth;
+      curr.noMask = 1;
+      curr.predicate = GEN_PREDICATE_NONE;
+      curr.flag = 0;
+      curr.subFlag = 1;
+      SelectionInstruction *insn_cmp = cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1, curr, prev_insn);
+      prev_insn = insn_cmp;
+
+      if (selBlock.hasBarrier) {
+        GenInstructionState curr;
+        curr.execWidth = simdWidth;
+        curr.predicate = GEN_PREDICATE_NORMAL;
+        curr.flag = 0;
+        curr.subFlag = 1;
+        prev_insn = setBlockIP(src0, ctx.getMaxLabel(), curr, prev_insn);
+        curr.predicate = GEN_PREDICATE_NONE;
+        curr.noMask = 1;
+        prev_insn = cmpBlockIP(GEN_CONDITIONAL_EQ, src0, ctx.getMaxLabel(), curr, prev_insn);
+
+        if (simdWidth == 8)
+          curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
+        else if (simdWidth == 16)
+          curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
+        else
+          NOT_IMPLEMENTED;
+        curr.noMask = 1;
+        curr.execWidth = 1;
+        curr.inversePredicate = 1;
+        prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
+                         LabelIndex(labelValue), curr, prev_insn);
+        {
+          GenInstructionState curr;
+          curr.execWidth = simdWidth;
+          curr.predicate = GEN_PREDICATE_NORMAL;
+          curr.flag = 0;
+          curr.subFlag = 1;
+          prev_insn = setBlockIP(src0, labelValue, curr, prev_insn);
+        }
+      } else {
+        if (
+            // If jump to next label and the endif offset is -1, then
+            // We don't need to add a jmpi here, as the following IF will do the
+            // same
+            // thing if all channels are disabled.
+            (jipValue != nextLabel.value() || selBlock.needJump != false)) {
+          // If it is required, insert a JUMP to bypass the block
+            GenInstructionState curr;
+            curr.noMask = 1;
+            curr.execWidth = 1;
+            curr.inversePredicate = 1;
+            curr.flag = 0;
+            curr.subFlag = 1;
+            if (simdWidth == 8)
+                curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+            else if (simdWidth == 16)
+                curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+            else
+                NOT_IMPLEMENTED;
+            prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue),
+                             LabelIndex(labelValue), curr, prev_insn);
+        }
+        GenInstructionState curr;
+        curr.execWidth = simdWidth;
+        curr.predicate = GEN_PREDICATE_NORMAL;
+        curr.flag = 0;
+        curr.subFlag = 1;
+        if(!insn.getParent()->bb->needEndif && insn.getParent()->bb->needIf) {
+            ir::LabelIndex label = insn.getParent()->bb->endifLabel;
+            prev_insn = IF(GenRegister::immd(0), label, label, curr, prev_insn);
+        }
+        else {
+            prev_insn = IF(GenRegister::immd(0), selBlock.endifLabel, selBlock.endifLabel, curr, prev_insn);
+        }
+      }
+  }
+    /* Set current label register to a label value. */
+  SelectionInstruction *
+  BranchLowering::setBlockIP(GenRegister blockip, uint32_t labelValue,
+                             State state, SelectionInstruction *prevInsn) {
+    if (!ctx.isDWLabel())
+      return MOV(GenRegister::retype(blockip, GEN_TYPE_UW),
+                 GenRegister::immuw(labelValue), state, prevInsn);
+    else
+      return MOV(GenRegister::retype(blockip, GEN_TYPE_UD),
+                 GenRegister::immud(labelValue), state, prevInsn);
+  }
+
+  SelectionInstruction *
+  BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip,
+                             GenRegister labelReg, State state,
+                             SelectionInstruction *prevInsn) {
+    if (!ctx.isDWLabel())
+      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW), labelReg,
+          GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state, prevInsn);
+    else
+      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD), labelReg,
+          GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state, prevInsn);
+  }
+
+  SelectionInstruction *
+  BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip,
+                             uint32_t labelValue, State state,
+                             SelectionInstruction *prevInsn) {
+    if (!ctx.isDWLabel())
+      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW),
+                 GenRegister::immuw(labelValue),
+                 GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state,
+                 prevInsn);
+    else
+      return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD),
+                 GenRegister::immuw(labelValue),
+                 GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state,
+                 prevInsn);
+    }
+
+  SelectionInstruction *BranchLowering::MOV(Reg dst, Reg src, State state,
+                                            SelectionInstruction *prevInsn) {
+    SelectionInstruction *insn = this->generateInsn(SEL_OP_MOV, 1, 1, state, prevInsn);
+    insn->dst(0) = dst;
+    insn->src(0) = src;
+    return insn;
+  }
+
+  SelectionInstruction *BranchLowering::CMP(uint32_t conditional, Reg src0,
+                                            Reg src1, Reg dst, State state,
+                                            SelectionInstruction *prevInsn) {
+    SelectionInstruction *insn = this->generateInsn(SEL_OP_CMP, 1, 2, state, prevInsn);
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    insn->dst(0) = dst;
+    insn->extra.function = conditional;
+    return insn;
+  }
+
+  SelectionInstruction *BranchLowering::JMPI(Reg src, ir::LabelIndex index,
+                                             ir::LabelIndex origin, State state,
+                                             SelectionInstruction *prevInsn) {
+    SelectionInstruction *insn = this->generateInsn(SEL_OP_JMPI, 0, 1, state, prevInsn);
+    insn->src(0) = src;
+    insn->index = index.value();
+    ir::LabelIndex start, end;
+    if (origin.value() < index.value()) {
+    // Forward Jump, need to exclude the target BB. Because we
+    // need to jump to the beginning of it.
+      start = origin;
+      end = ir::LabelIndex(index.value() - 1);
+    } else {
+      start = index;
+      end = origin;
+    }
+    // FIXME, this longjmp check is too hacky. We need to support instruction
+    // insertion at code emission stage in the future.
+    insn->extra.longjmp = ctx.getFunction().getDistance(start, end) > 3000;
+    return insn;
+  }
+
+  SelectionInstruction *BranchLowering::IF(Reg src, ir::LabelIndex jip,
+                                           ir::LabelIndex uip, State state,
+                                           SelectionInstruction *prevInsn) {
+    SelectionInstruction *insn = this->generateInsn(SEL_OP_IF, 0, 1, state, prevInsn);
+    insn->src(0) = src;
+    insn->index = jip.value();
+    insn->index1 = uip.value();
+    return insn;
+  }
+
+  SelectionInstruction *BranchLowering::ENDIF(Reg src, ir::LabelIndex jip,
+                                              ir::LabelIndex endifLabel, State state,
+                                              SelectionInstruction *prevInsn) {
+    if(endifLabel == 0)
+      selBlock.endifLabel = ctx.sel->newAuxLabel();
+    else
+      selBlock.endifLabel = endifLabel;
+    SelectionInstruction * insn_label = LABEL(selBlock.endifLabel, ir::LabelIndex(0), state, prevInsn);
+    SelectionInstruction *insn = this->generateInsn(SEL_OP_ENDIF, 0, 1, state, insn_label);
+    insn->src(0) = src;
+    insn->index = selBlock.endifLabel.value();
+    return insn;
+  }
+
+  SelectionInstruction *BranchLowering::LABEL(ir::LabelIndex index,
+                                              ir::LabelIndex jip, State state,
+                                              SelectionInstruction *prevInsn) {
+    SelectionInstruction *insn = this->generateInsn(SEL_OP_LABEL, 0, 0, state, prevInsn);
+    insn->index = index.value();
+    insn->index1 = jip.value();
+    return insn;
+  }
+
+  SelectionInstruction *
+  BranchLowering::generateInsn(SelectionOpcode opcode, uint32_t dstNum,
+                               uint32_t srcNum, State state,
+                               SelectionInstruction *prevInsn) {
+    GBE_ASSERT(dstNum <= SelectionInstruction::MAX_DST_NUM &&
+               srcNum <= SelectionInstruction::MAX_SRC_NUM);
+    SelectionInstruction *insn = ctx.sel->create(opcode, dstNum, srcNum);
+    selBlock.insertAfter(prevInsn, insn);
+    insn->state = state;
+    return insn;
+  }
+
+  void BranchLowering::run() {
+    lowered = false;
+    insnLower();
+  }
+
+  void lowerBranch(Selection *sel)
+  {
+    //do basic block level optimization
+    for (SelectionBlock &block : *sel->blockList) {
+      BranchLowering branch_lower(sel->getCtx(), block);
+      branch_lower.run();
+    }
+
+  }
+} /* namespace gbe */
+
+
diff --git a/backend/src/backend/gen_insn_selection_passes.hpp b/backend/src/backend/gen_insn_selection_passes.hpp
new file mode 100644
index 0000000..19d3f4c
--- /dev/null
+++ b/backend/src/backend/gen_insn_selection_passes.hpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __GEN_INSN_SELECTION_PASSES_HPP__
+#define __GEN_INSN_SELECTION_PASSES_HPP__
+
+#include "backend/gen_insn_selection.hpp"
+
+namespace gbe
+{
+    /* !optimize functions after instruction selection */
+
+    /* !lower functions after instruction selection */
+    void lowerBranch(Selection* sel);
+}
+
+#endif
diff --git a/backend/src/sys/intrusive_list.hpp b/backend/src/sys/intrusive_list.hpp
index 2e2f2a9..6677c77 100644
--- a/backend/src/sys/intrusive_list.hpp
+++ b/backend/src/sys/intrusive_list.hpp
@@ -147,6 +147,10 @@ namespace gbe
       link(v, pos.node());
       return iterator(v);
     }
+    iterator insert_after(iterator pos, value_type* v) {
+      append(v, pos.node());
+      return iterator(v);
+    }
     iterator erase(iterator it) {
       iterator itErase(it);
       ++it;
-- 
2.5.0



More information about the Beignet mailing list