[Beignet] [PATCH] GBE: Fix register liveness issue under simd mode.

Ruiling Song ruiling.song at intel.com
Sun Mar 23 18:46:21 PDT 2014


As we run in SIMD mode with prediction mask to indicate active lanes,
If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
the define point may be run several times under *different* prediction mask.
For these kinds of vreg, we must extend the vreg liveness into the whole loop.
If we don't do this, it's liveness is killed before the def point inside loop.
If the vreg's corresponding physical reg is assigned to other vreg during the
killed period, and the instructions before kill point were re-executed with different prediction,
the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.

This patch fixes the HaarFixture case in opencv.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/ir/context.cpp            |    7 +--
 backend/src/ir/function.cpp           |   16 ++++++
 backend/src/ir/function.hpp           |   15 ++++++
 backend/src/ir/liveness.cpp           |   96 +++++++++++++--------------------
 backend/src/ir/liveness.hpp           |    2 +-
 backend/src/llvm/llvm_gen_backend.cpp |   80 ++++++++++++++++++++++++++-
 6 files changed, 151 insertions(+), 65 deletions(-)

diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
index c89286e..bc14fd6 100644
--- a/backend/src/ir/context.cpp
+++ b/backend/src/ir/context.cpp
@@ -76,13 +76,14 @@ namespace ir {
     // function
     lowerReturn(unit, fn->getName());
 
+    // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower
+    fn->sortLabels();
+    fn->computeCFG();
+
     // Spill function argument to the stack if required and identify which
     // function arguments can use constant push
     lowerFunctionArguments(unit, fn->getName());
 
-    // Properly order labels and compute the CFG
-    fn->sortLabels();
-    fn->computeCFG();
     const StackElem elem = fnStack.back();
     fnStack.pop_back();
     fn = elem.fn;
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index 71dcc1f..a6aecb5 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -52,6 +52,7 @@ namespace ir {
 
   Function::~Function(void) {
     for (auto block : blocks) GBE_DELETE(block);
+    for (auto loop : loops) GBE_DELETE(loop);
     for (auto arg : args) GBE_DELETE(arg);
   }
 
@@ -59,6 +60,10 @@ namespace ir {
     return unit.getPointerFamily();
   }
 
+  void Function::addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
+    loops.push_back(GBE_NEW(Loop, bbs, exits));
+  }
+
   void Function::sortLabels(void) {
     uint32_t last = 0;
 
@@ -96,6 +101,17 @@ namespace ir {
       }
     });
 
+    // fix labels for loops
+    for (auto &x : loops) {
+      for (auto &y : x->bbs)
+        y = labelMap[y];
+
+      for (auto &z : x->exits) {
+        z.first = labelMap[z.first];
+        z.second = labelMap[z.second];
+      }
+    }
+
     // Reset the label to block mapping
     this->labels.resize(last);
     foreachBlock([&](BasicBlock &bb) {
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 8a42529..303c9ba 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -134,6 +134,17 @@ namespace ir {
     return arg0.offset < arg1.offset;
   }
 
+  /*! CFG loops */
+  struct Loop : public NonCopyable
+  {
+  public:
+    Loop(const vector<LabelIndex> &in, const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
+    bbs(in), exits(exit) {}
+    vector<LabelIndex> bbs;
+    vector<std::pair<LabelIndex, LabelIndex>> exits;
+    GBE_STRUCT(Loop);
+  };
+
   /*! A function is :
    *  - a register file
    *  - a set of basic block layout into a CGF
@@ -318,6 +329,9 @@ namespace ir {
     INLINE const uint32_t getStackSize(void) const { return this->stackSize; }
     /*! Push stack size. */
     INLINE void pushStackSize(uint32_t step) { this->stackSize += step; }
+    /*! add the loop info for later liveness analysis */
+    void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
+    INLINE const vector<Loop * > &getLoops() { return loops; }
   private:
     friend class Context;           //!< Can freely modify a function
     std::string name;               //!< Function name
@@ -327,6 +341,7 @@ namespace ir {
     vector<BasicBlock*> labels;     //!< Each label points to a basic block
     vector<Immediate> immediates;   //!< All immediate values in the function
     vector<BasicBlock*> blocks;     //!< All chained basic blocks
+    vector<Loop *> loops;           //!< Loops info of the function
     RegisterFile file;              //!< RegisterDatas used by the instructions
     Profile profile;                //!< Current function profile
     PushMap pushMap;                //!< Pushed function arguments (reg->loc)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 724d5c3..afab02b 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -38,19 +38,11 @@ namespace ir {
       if (op == OP_RET) {
         workSet.insert(info);
         info->liveOut.insert(ocl::retVal);
-      } else if (op == OP_BRA) {
-        // If this is a backward jump, put it to the extra work list.
-        if (((BranchInstruction*)lastInsn)->getLabelIndex() < bb.getLabelIndex())
-          extraWorkSet.insert(info);
       }
     });
     // Now with iterative analysis, we compute liveout and livein sets
     this->computeLiveInOut();
-    for (auto it : extraWorkSet) {
-      for (auto reg : it->liveOut) {
-        it->extraLiveIn.insert(reg);
-      }
-    }
+    // extend register (def in loop, use out-of-loop) liveness to the whole loop
     this->computeExtraLiveInOut();
   }
 
@@ -128,60 +120,46 @@ namespace ir {
    }
 
 /*
-  Consider the following scenario, %100's normal liveness will start from Ln-1's
-  position. In normal analysis, the Ln-1 is not Ln's predecessor, thus the liveness
-  of %100 will be passed to Ln and then will not be passed to L0.
-
-  But considering we are running on a multilane with predication's vector machine.
-  The unconditional BR in Ln-1 may be removed and it will enter Ln with a subset of
-  the revert set of Ln-1's predication. For example when running Ln-1, the active lane
-  is 0-7, then at Ln the active lane is 8-15. Then at the end of Ln, a subset of 8-15
-  will jump to L0. If a register %10 is allocated the same GRF as %100, given the fact
-  that their normal liveness doesn't overlapped, the a subset of 8-15 lanes will be
-  modified. If the %10 and %100 are the same vector data type, then we are fine. But if
-  %100 is a float vector, and the %10 is a bool or short vector, then we hit a bug here.
-
-L0:
-  ...
-  %10 = 5
-  ...
-Ln-1:
-  %100 = 2
-  BR Ln+1
-
-Ln:
-  ...
-  BR(%xxx) L0
-
-Ln+1:
-  %101 = %100 + 2;
-  ...
-
-  The solution to fix this issue is to build another liveness data. We will start with
-  those BBs with backward jump. Then pass all the liveOut register as extra liveIn
-  of current BB and then forward this extra liveIn to all the blocks. This is very similar
-  to the normal liveness analysis just with reverse direction.
+  As we run in SIMD mode with prediction mask to indicate active lanes.
+  If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
+  the define point may be run several times under *different* prediction mask.
+  For these kinds of vreg, we must extend the vreg liveness into the whole loop.
+  If we don't do this, it's liveness is killed before the def point inside loop.
+  If the vreg's corresponding physical reg is assigned to other vreg during the
+  killed period, and the instructions before kill point were re-executed with different prediction,
+  the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.
 */
+
   void Liveness::computeExtraLiveInOut(void) {
-    while(!extraWorkSet.empty()) {
-      struct BlockInfo *currInfo = *extraWorkSet.begin();
-      extraWorkSet.erase(currInfo);
-      for (auto currInVar : currInfo->extraLiveIn)
-        currInfo->extraLiveOut.insert(currInVar);
-      bool isChanged = false;
-      for (auto succ : currInfo->bb.getSuccessorSet()) {
-        BlockInfo *succInfo = liveness[succ];
-        for (auto currOutVar : currInfo->extraLiveOut) {
-          bool changed = false;
-          if (!succInfo->upwardUsed.contains(currOutVar)) {
-            auto it  = succInfo->extraLiveIn.insert(currOutVar);
-            changed = it.second;
+    const vector<Loop *> &loops = fn.getLoops();
+    if(loops.size() == 0) return;
+
+    for (auto l : loops) {
+      for (auto x : l->exits) {
+        const BasicBlock &a = fn.getBlock(x.first);
+        const BasicBlock &b = fn.getBlock(x.second);
+        BlockInfo * exiting = liveness[&a];
+        BlockInfo * exit = liveness[&b];
+        std::vector<Register> toExtend;
+
+        if(b.getPredecessorSet().size() > 1) {
+          for (auto p : exit->upwardUsed)
+            toExtend.push_back(p);
+        } else {
+          std::set_intersection(exiting->liveOut.begin(), exiting->liveOut.end(), exit->upwardUsed.begin(), exit->upwardUsed.end(), std::back_inserter(toExtend));
+        }
+        if (toExtend.size() == 0) continue;
+
+        for (auto bb : l->bbs) {
+          BlockInfo * bI = liveness[&fn.getBlock(bb)];
+          for(auto r : toExtend) {
+            if(!bI->upwardUsed.contains(r))
+              bI->extraLiveIn.insert(r);
+            bI->extraLiveOut.insert(r);
           }
-          if (changed) isChanged = true;
         }
-        if (isChanged)
-          extraWorkSet.insert(succInfo);}
-    };
+      }
+    }
 #if 0
     fn.foreachBlock([this](const BasicBlock &bb){
       printf("label %d:\n", bb.getLabelIndex());
diff --git a/backend/src/ir/liveness.hpp b/backend/src/ir/liveness.hpp
index 9198eae..2545053 100644
--- a/backend/src/ir/liveness.hpp
+++ b/backend/src/ir/liveness.hpp
@@ -143,7 +143,7 @@ namespace ir {
     void computeExtraLiveInOut(void);
     /*! Set of work list block which has exit(return) instruction */
     typedef set <struct BlockInfo*> WorkSet;
-    WorkSet workSet, extraWorkSet;
+    WorkSet workSet;
 
     /*! Use custom allocators */
     GBE_CLASS(Liveness);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 49fbc7b..80cdb37 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -489,7 +489,6 @@ namespace gbe
       if(!bKernel) return false;
 
       LI = &getAnalysis<LoopInfo>();
-
       emitFunction(F);
       return false;
     }
@@ -497,6 +496,8 @@ namespace gbe
     virtual bool doFinalization(Module &M) { return false; }
     /*! handle global variable register allocation (local, constant space) */
     void allocateGlobalVariableRegister(Function &F);
+    /*! gather all the loops in the function and add them to ir::Function */
+    void gatherLoopInfo(ir::Function &fn);
     /*! Emit the complete function code and declaration */
     void emitFunction(Function &F);
     /*! Handle input and output function parameters */
@@ -1447,6 +1448,78 @@ namespace gbe
     }
 
   }
+  static INLINE void findAllLoops(LoopInfo * LI, std::vector<std::pair<Loop*, int>> &lp)
+  {
+      for (Loop::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) {
+        lp.push_back(std::make_pair(*I, -1));
+      }
+      if (lp.size() == 0) return;
+
+      uint32_t i = 0;
+      do {
+        const std::vector<Loop*> subLoops = lp[i].first->getSubLoops();
+        for(auto sub : subLoops)
+          lp.push_back(std::make_pair(sub, i));
+        i++;
+      } while(i < lp.size());
+  }
+
+  void GenWriter::gatherLoopInfo(ir::Function &fn) {
+    vector<ir::LabelIndex> loopBBs;
+    vector<std::pair<ir::LabelIndex, ir::LabelIndex>> loopExits;
+    std::vector<std::pair<Loop*, int>> lp;
+
+    findAllLoops(LI, lp);
+#if GBE_DEBUG
+    // check two loops' interference
+    for(int i = 0; i < lp.size(); i++) {
+        SmallVector<Loop::Edge, 8> exitBBs;
+        lp[i].first->getExitEdges(exitBBs);
+
+      const std::vector<BasicBlock*> &inBBs = lp[i].first->getBlocks();
+      std::vector<ir::LabelIndex> bbs1;
+      for(auto x : inBBs) {
+        bbs1.push_back(labelMap[x]);
+      }
+      std::sort(bbs1.begin(), bbs1.end());
+      for(int j = i+1; j < lp.size(); j++) {
+        if(! lp[i].first->contains(lp[j].first)) {
+          const std::vector<BasicBlock*> &inBBs2 = lp[j].first->getBlocks();
+          std::vector<ir::LabelIndex> bbs2;
+          std::vector<ir::LabelIndex> bbs3;
+
+          for(auto x : inBBs2) {
+            bbs2.push_back(labelMap[x]);
+          }
+
+          std::sort(bbs2.begin(), bbs2.end());
+          std::set_intersection(bbs1.begin(), bbs1.end(), bbs2.begin(), bbs2.end(), std::back_inserter(bbs3));
+          GBE_ASSERT(bbs3.size() < 1);
+        }
+      }
+    }
+#endif
+
+    for (auto loop : lp) {
+      loopBBs.clear();
+      loopExits.clear();
+
+      const std::vector<BasicBlock*> &inBBs = loop.first->getBlocks();
+      for (auto b : inBBs) {
+        GBE_ASSERT(labelMap.find(b) != labelMap.end());
+        loopBBs.push_back(labelMap[b]);
+      }
+
+      SmallVector<Loop::Edge, 8> exitBBs;
+      loop.first->getExitEdges(exitBBs);
+      for(auto b : exitBBs){
+        GBE_ASSERT(labelMap.find(b.first) != labelMap.end());
+        GBE_ASSERT(labelMap.find(b.second) != labelMap.end());
+        loopExits.push_back(std::make_pair(labelMap[b.first], labelMap[b.second]));
+      }
+      fn.addLoop(loopBBs, loopExits);
+    }
+  }
 
   void GenWriter::emitFunction(Function &F)
   {
@@ -1463,6 +1536,7 @@ namespace gbe
     }
 
     ctx.startFunction(F.getName());
+    ir::Function &fn = ctx.getFunction();
     this->regTranslator.clear();
     this->labelMap.clear();
     this->emitFunctionPrototype(F);
@@ -1483,11 +1557,13 @@ namespace gbe
     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
       this->simplifyTerminator(BB);
 
+    // gather loop info, which is useful for liveness analysis
+    gatherLoopInfo(fn);
+
     // ... then, emit the instructions for all basic blocks
     pass = PASS_EMIT_INSTRUCTIONS;
     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
       emitBasicBlock(BB);
-    ir::Function &fn = ctx.getFunction();
     ctx.endFunction();
 
     // Liveness can be shared when we optimized the immediates and the MOVs
-- 
1.7.9.5



More information about the Beignet mailing list