[Beignet] [PATCH] GBE: Fix register liveness issue under simd mode.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Mar 24 22:30:19 PDT 2014


This patch LGTM. Pushed with slightly change to remove the extraLive
structure and merge it into the normal livein/out set. Thanks.

On Mon, Mar 24, 2014 at 09:46:21AM +0800, Ruiling Song wrote:
> As we run in SIMD mode with prediction mask to indicate active lanes,
> If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
> the define point may be run several times under *different* prediction mask.
> For these kinds of vreg, we must extend the vreg liveness into the whole loop.
> If we don't do this, it's liveness is killed before the def point inside loop.
> If the vreg's corresponding physical reg is assigned to other vreg during the
> killed period, and the instructions before kill point were re-executed with different prediction,
> the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.
> 
> This patch fixes the HaarFixture case in opencv.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/ir/context.cpp            |    7 +--
>  backend/src/ir/function.cpp           |   16 ++++++
>  backend/src/ir/function.hpp           |   15 ++++++
>  backend/src/ir/liveness.cpp           |   96 +++++++++++++--------------------
>  backend/src/ir/liveness.hpp           |    2 +-
>  backend/src/llvm/llvm_gen_backend.cpp |   80 ++++++++++++++++++++++++++-
>  6 files changed, 151 insertions(+), 65 deletions(-)
> 
> diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
> index c89286e..bc14fd6 100644
> --- a/backend/src/ir/context.cpp
> +++ b/backend/src/ir/context.cpp
> @@ -76,13 +76,14 @@ namespace ir {
>      // function
>      lowerReturn(unit, fn->getName());
>  
> +    // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower
> +    fn->sortLabels();
> +    fn->computeCFG();
> +
>      // Spill function argument to the stack if required and identify which
>      // function arguments can use constant push
>      lowerFunctionArguments(unit, fn->getName());
>  
> -    // Properly order labels and compute the CFG
> -    fn->sortLabels();
> -    fn->computeCFG();
>      const StackElem elem = fnStack.back();
>      fnStack.pop_back();
>      fn = elem.fn;
> diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
> index 71dcc1f..a6aecb5 100644
> --- a/backend/src/ir/function.cpp
> +++ b/backend/src/ir/function.cpp
> @@ -52,6 +52,7 @@ namespace ir {
>  
>    Function::~Function(void) {
>      for (auto block : blocks) GBE_DELETE(block);
> +    for (auto loop : loops) GBE_DELETE(loop);
>      for (auto arg : args) GBE_DELETE(arg);
>    }
>  
> @@ -59,6 +60,10 @@ namespace ir {
>      return unit.getPointerFamily();
>    }
>  
> +  void Function::addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
> +    loops.push_back(GBE_NEW(Loop, bbs, exits));
> +  }
> +
>    void Function::sortLabels(void) {
>      uint32_t last = 0;
>  
> @@ -96,6 +101,17 @@ namespace ir {
>        }
>      });
>  
> +    // fix labels for loops
> +    for (auto &x : loops) {
> +      for (auto &y : x->bbs)
> +        y = labelMap[y];
> +
> +      for (auto &z : x->exits) {
> +        z.first = labelMap[z.first];
> +        z.second = labelMap[z.second];
> +      }
> +    }
> +
>      // Reset the label to block mapping
>      this->labels.resize(last);
>      foreachBlock([&](BasicBlock &bb) {
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
> index 8a42529..303c9ba 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -134,6 +134,17 @@ namespace ir {
>      return arg0.offset < arg1.offset;
>    }
>  
> +  /*! CFG loops */
> +  struct Loop : public NonCopyable
> +  {
> +  public:
> +    Loop(const vector<LabelIndex> &in, const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
> +    bbs(in), exits(exit) {}
> +    vector<LabelIndex> bbs;
> +    vector<std::pair<LabelIndex, LabelIndex>> exits;
> +    GBE_STRUCT(Loop);
> +  };
> +
>    /*! A function is :
>     *  - a register file
>     *  - a set of basic block layout into a CGF
> @@ -318,6 +329,9 @@ namespace ir {
>      INLINE const uint32_t getStackSize(void) const { return this->stackSize; }
>      /*! Push stack size. */
>      INLINE void pushStackSize(uint32_t step) { this->stackSize += step; }
> +    /*! add the loop info for later liveness analysis */
> +    void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
> +    INLINE const vector<Loop * > &getLoops() { return loops; }
>    private:
>      friend class Context;           //!< Can freely modify a function
>      std::string name;               //!< Function name
> @@ -327,6 +341,7 @@ namespace ir {
>      vector<BasicBlock*> labels;     //!< Each label points to a basic block
>      vector<Immediate> immediates;   //!< All immediate values in the function
>      vector<BasicBlock*> blocks;     //!< All chained basic blocks
> +    vector<Loop *> loops;           //!< Loops info of the function
>      RegisterFile file;              //!< RegisterDatas used by the instructions
>      Profile profile;                //!< Current function profile
>      PushMap pushMap;                //!< Pushed function arguments (reg->loc)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
> index 724d5c3..afab02b 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -38,19 +38,11 @@ namespace ir {
>        if (op == OP_RET) {
>          workSet.insert(info);
>          info->liveOut.insert(ocl::retVal);
> -      } else if (op == OP_BRA) {
> -        // If this is a backward jump, put it to the extra work list.
> -        if (((BranchInstruction*)lastInsn)->getLabelIndex() < bb.getLabelIndex())
> -          extraWorkSet.insert(info);
>        }
>      });
>      // Now with iterative analysis, we compute liveout and livein sets
>      this->computeLiveInOut();
> -    for (auto it : extraWorkSet) {
> -      for (auto reg : it->liveOut) {
> -        it->extraLiveIn.insert(reg);
> -      }
> -    }
> +    // extend register (def in loop, use out-of-loop) liveness to the whole loop
>      this->computeExtraLiveInOut();
>    }
>  
> @@ -128,60 +120,46 @@ namespace ir {
>     }
>  
>  /*
> -  Consider the following scenario, %100's normal liveness will start from Ln-1's
> -  position. In normal analysis, the Ln-1 is not Ln's predecessor, thus the liveness
> -  of %100 will be passed to Ln and then will not be passed to L0.
> -
> -  But considering we are running on a multilane with predication's vector machine.
> -  The unconditional BR in Ln-1 may be removed and it will enter Ln with a subset of
> -  the revert set of Ln-1's predication. For example when running Ln-1, the active lane
> -  is 0-7, then at Ln the active lane is 8-15. Then at the end of Ln, a subset of 8-15
> -  will jump to L0. If a register %10 is allocated the same GRF as %100, given the fact
> -  that their normal liveness doesn't overlapped, the a subset of 8-15 lanes will be
> -  modified. If the %10 and %100 are the same vector data type, then we are fine. But if
> -  %100 is a float vector, and the %10 is a bool or short vector, then we hit a bug here.
> -
> -L0:
> -  ...
> -  %10 = 5
> -  ...
> -Ln-1:
> -  %100 = 2
> -  BR Ln+1
> -
> -Ln:
> -  ...
> -  BR(%xxx) L0
> -
> -Ln+1:
> -  %101 = %100 + 2;
> -  ...
> -
> -  The solution to fix this issue is to build another liveness data. We will start with
> -  those BBs with backward jump. Then pass all the liveOut register as extra liveIn
> -  of current BB and then forward this extra liveIn to all the blocks. This is very similar
> -  to the normal liveness analysis just with reverse direction.
> +  As we run in SIMD mode with prediction mask to indicate active lanes.
> +  If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
> +  the define point may be run several times under *different* prediction mask.
> +  For these kinds of vreg, we must extend the vreg liveness into the whole loop.
> +  If we don't do this, it's liveness is killed before the def point inside loop.
> +  If the vreg's corresponding physical reg is assigned to other vreg during the
> +  killed period, and the instructions before kill point were re-executed with different prediction,
> +  the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.
>  */
> +
>    void Liveness::computeExtraLiveInOut(void) {
> -    while(!extraWorkSet.empty()) {
> -      struct BlockInfo *currInfo = *extraWorkSet.begin();
> -      extraWorkSet.erase(currInfo);
> -      for (auto currInVar : currInfo->extraLiveIn)
> -        currInfo->extraLiveOut.insert(currInVar);
> -      bool isChanged = false;
> -      for (auto succ : currInfo->bb.getSuccessorSet()) {
> -        BlockInfo *succInfo = liveness[succ];
> -        for (auto currOutVar : currInfo->extraLiveOut) {
> -          bool changed = false;
> -          if (!succInfo->upwardUsed.contains(currOutVar)) {
> -            auto it  = succInfo->extraLiveIn.insert(currOutVar);
> -            changed = it.second;
> +    const vector<Loop *> &loops = fn.getLoops();
> +    if(loops.size() == 0) return;
> +
> +    for (auto l : loops) {
> +      for (auto x : l->exits) {
> +        const BasicBlock &a = fn.getBlock(x.first);
> +        const BasicBlock &b = fn.getBlock(x.second);
> +        BlockInfo * exiting = liveness[&a];
> +        BlockInfo * exit = liveness[&b];
> +        std::vector<Register> toExtend;
> +
> +        if(b.getPredecessorSet().size() > 1) {
> +          for (auto p : exit->upwardUsed)
> +            toExtend.push_back(p);
> +        } else {
> +          std::set_intersection(exiting->liveOut.begin(), exiting->liveOut.end(), exit->upwardUsed.begin(), exit->upwardUsed.end(), std::back_inserter(toExtend));
> +        }
> +        if (toExtend.size() == 0) continue;
> +
> +        for (auto bb : l->bbs) {
> +          BlockInfo * bI = liveness[&fn.getBlock(bb)];
> +          for(auto r : toExtend) {
> +            if(!bI->upwardUsed.contains(r))
> +              bI->extraLiveIn.insert(r);
> +            bI->extraLiveOut.insert(r);
>            }
> -          if (changed) isChanged = true;
>          }
> -        if (isChanged)
> -          extraWorkSet.insert(succInfo);}
> -    };
> +      }
> +    }
>  #if 0
>      fn.foreachBlock([this](const BasicBlock &bb){
>        printf("label %d:\n", bb.getLabelIndex());
> diff --git a/backend/src/ir/liveness.hpp b/backend/src/ir/liveness.hpp
> index 9198eae..2545053 100644
> --- a/backend/src/ir/liveness.hpp
> +++ b/backend/src/ir/liveness.hpp
> @@ -143,7 +143,7 @@ namespace ir {
>      void computeExtraLiveInOut(void);
>      /*! Set of work list block which has exit(return) instruction */
>      typedef set <struct BlockInfo*> WorkSet;
> -    WorkSet workSet, extraWorkSet;
> +    WorkSet workSet;
>  
>      /*! Use custom allocators */
>      GBE_CLASS(Liveness);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 49fbc7b..80cdb37 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -489,7 +489,6 @@ namespace gbe
>        if(!bKernel) return false;
>  
>        LI = &getAnalysis<LoopInfo>();
> -
>        emitFunction(F);
>        return false;
>      }
> @@ -497,6 +496,8 @@ namespace gbe
>      virtual bool doFinalization(Module &M) { return false; }
>      /*! handle global variable register allocation (local, constant space) */
>      void allocateGlobalVariableRegister(Function &F);
> +    /*! gather all the loops in the function and add them to ir::Function */
> +    void gatherLoopInfo(ir::Function &fn);
>      /*! Emit the complete function code and declaration */
>      void emitFunction(Function &F);
>      /*! Handle input and output function parameters */
> @@ -1447,6 +1448,78 @@ namespace gbe
>      }
>  
>    }
> +  static INLINE void findAllLoops(LoopInfo * LI, std::vector<std::pair<Loop*, int>> &lp)
> +  {
> +      for (Loop::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) {
> +        lp.push_back(std::make_pair(*I, -1));
> +      }
> +      if (lp.size() == 0) return;
> +
> +      uint32_t i = 0;
> +      do {
> +        const std::vector<Loop*> subLoops = lp[i].first->getSubLoops();
> +        for(auto sub : subLoops)
> +          lp.push_back(std::make_pair(sub, i));
> +        i++;
> +      } while(i < lp.size());
> +  }
> +
> +  void GenWriter::gatherLoopInfo(ir::Function &fn) {
> +    vector<ir::LabelIndex> loopBBs;
> +    vector<std::pair<ir::LabelIndex, ir::LabelIndex>> loopExits;
> +    std::vector<std::pair<Loop*, int>> lp;
> +
> +    findAllLoops(LI, lp);
> +#if GBE_DEBUG
> +    // check two loops' interference
> +    for(int i = 0; i < lp.size(); i++) {
> +        SmallVector<Loop::Edge, 8> exitBBs;
> +        lp[i].first->getExitEdges(exitBBs);
> +
> +      const std::vector<BasicBlock*> &inBBs = lp[i].first->getBlocks();
> +      std::vector<ir::LabelIndex> bbs1;
> +      for(auto x : inBBs) {
> +        bbs1.push_back(labelMap[x]);
> +      }
> +      std::sort(bbs1.begin(), bbs1.end());
> +      for(int j = i+1; j < lp.size(); j++) {
> +        if(! lp[i].first->contains(lp[j].first)) {
> +          const std::vector<BasicBlock*> &inBBs2 = lp[j].first->getBlocks();
> +          std::vector<ir::LabelIndex> bbs2;
> +          std::vector<ir::LabelIndex> bbs3;
> +
> +          for(auto x : inBBs2) {
> +            bbs2.push_back(labelMap[x]);
> +          }
> +
> +          std::sort(bbs2.begin(), bbs2.end());
> +          std::set_intersection(bbs1.begin(), bbs1.end(), bbs2.begin(), bbs2.end(), std::back_inserter(bbs3));
> +          GBE_ASSERT(bbs3.size() < 1);
> +        }
> +      }
> +    }
> +#endif
> +
> +    for (auto loop : lp) {
> +      loopBBs.clear();
> +      loopExits.clear();
> +
> +      const std::vector<BasicBlock*> &inBBs = loop.first->getBlocks();
> +      for (auto b : inBBs) {
> +        GBE_ASSERT(labelMap.find(b) != labelMap.end());
> +        loopBBs.push_back(labelMap[b]);
> +      }
> +
> +      SmallVector<Loop::Edge, 8> exitBBs;
> +      loop.first->getExitEdges(exitBBs);
> +      for(auto b : exitBBs){
> +        GBE_ASSERT(labelMap.find(b.first) != labelMap.end());
> +        GBE_ASSERT(labelMap.find(b.second) != labelMap.end());
> +        loopExits.push_back(std::make_pair(labelMap[b.first], labelMap[b.second]));
> +      }
> +      fn.addLoop(loopBBs, loopExits);
> +    }
> +  }
>  
>    void GenWriter::emitFunction(Function &F)
>    {
> @@ -1463,6 +1536,7 @@ namespace gbe
>      }
>  
>      ctx.startFunction(F.getName());
> +    ir::Function &fn = ctx.getFunction();
>      this->regTranslator.clear();
>      this->labelMap.clear();
>      this->emitFunctionPrototype(F);
> @@ -1483,11 +1557,13 @@ namespace gbe
>      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
>        this->simplifyTerminator(BB);
>  
> +    // gather loop info, which is useful for liveness analysis
> +    gatherLoopInfo(fn);
> +
>      // ... then, emit the instructions for all basic blocks
>      pass = PASS_EMIT_INSTRUCTIONS;
>      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
>        emitBasicBlock(BB);
> -    ir::Function &fn = ctx.getFunction();
>      ctx.endFunction();
>  
>      // Liveness can be shared when we optimized the immediates and the MOVs
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list