[Beignet] [PATCH] GBE: Fix register liveness issue under simd mode.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Mar 24 22:30:19 PDT 2014
This patch LGTM. Pushed with slightly change to remove the extraLive
structure and merge it into the normal livein/out set. Thanks.
On Mon, Mar 24, 2014 at 09:46:21AM +0800, Ruiling Song wrote:
> As we run in SIMD mode with prediction mask to indicate active lanes,
> If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
> the define point may be run several times under *different* prediction mask.
> For these kinds of vreg, we must extend the vreg liveness into the whole loop.
> If we don't do this, it's liveness is killed before the def point inside loop.
> If the vreg's corresponding physical reg is assigned to other vreg during the
> killed period, and the instructions before kill point were re-executed with different prediction,
> the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.
>
> This patch fixes the HaarFixture case in opencv.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/ir/context.cpp | 7 +--
> backend/src/ir/function.cpp | 16 ++++++
> backend/src/ir/function.hpp | 15 ++++++
> backend/src/ir/liveness.cpp | 96 +++++++++++++--------------------
> backend/src/ir/liveness.hpp | 2 +-
> backend/src/llvm/llvm_gen_backend.cpp | 80 ++++++++++++++++++++++++++-
> 6 files changed, 151 insertions(+), 65 deletions(-)
>
> diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
> index c89286e..bc14fd6 100644
> --- a/backend/src/ir/context.cpp
> +++ b/backend/src/ir/context.cpp
> @@ -76,13 +76,14 @@ namespace ir {
> // function
> lowerReturn(unit, fn->getName());
>
> + // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower
> + fn->sortLabels();
> + fn->computeCFG();
> +
> // Spill function argument to the stack if required and identify which
> // function arguments can use constant push
> lowerFunctionArguments(unit, fn->getName());
>
> - // Properly order labels and compute the CFG
> - fn->sortLabels();
> - fn->computeCFG();
> const StackElem elem = fnStack.back();
> fnStack.pop_back();
> fn = elem.fn;
> diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
> index 71dcc1f..a6aecb5 100644
> --- a/backend/src/ir/function.cpp
> +++ b/backend/src/ir/function.cpp
> @@ -52,6 +52,7 @@ namespace ir {
>
> Function::~Function(void) {
> for (auto block : blocks) GBE_DELETE(block);
> + for (auto loop : loops) GBE_DELETE(loop);
> for (auto arg : args) GBE_DELETE(arg);
> }
>
> @@ -59,6 +60,10 @@ namespace ir {
> return unit.getPointerFamily();
> }
>
> + void Function::addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
> + loops.push_back(GBE_NEW(Loop, bbs, exits));
> + }
> +
> void Function::sortLabels(void) {
> uint32_t last = 0;
>
> @@ -96,6 +101,17 @@ namespace ir {
> }
> });
>
> + // fix labels for loops
> + for (auto &x : loops) {
> + for (auto &y : x->bbs)
> + y = labelMap[y];
> +
> + for (auto &z : x->exits) {
> + z.first = labelMap[z.first];
> + z.second = labelMap[z.second];
> + }
> + }
> +
> // Reset the label to block mapping
> this->labels.resize(last);
> foreachBlock([&](BasicBlock &bb) {
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
> index 8a42529..303c9ba 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -134,6 +134,17 @@ namespace ir {
> return arg0.offset < arg1.offset;
> }
>
> + /*! CFG loops */
> + struct Loop : public NonCopyable
> + {
> + public:
> + Loop(const vector<LabelIndex> &in, const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
> + bbs(in), exits(exit) {}
> + vector<LabelIndex> bbs;
> + vector<std::pair<LabelIndex, LabelIndex>> exits;
> + GBE_STRUCT(Loop);
> + };
> +
> /*! A function is :
> * - a register file
> * - a set of basic block layout into a CGF
> @@ -318,6 +329,9 @@ namespace ir {
> INLINE const uint32_t getStackSize(void) const { return this->stackSize; }
> /*! Push stack size. */
> INLINE void pushStackSize(uint32_t step) { this->stackSize += step; }
> + /*! add the loop info for later liveness analysis */
> + void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
> + INLINE const vector<Loop * > &getLoops() { return loops; }
> private:
> friend class Context; //!< Can freely modify a function
> std::string name; //!< Function name
> @@ -327,6 +341,7 @@ namespace ir {
> vector<BasicBlock*> labels; //!< Each label points to a basic block
> vector<Immediate> immediates; //!< All immediate values in the function
> vector<BasicBlock*> blocks; //!< All chained basic blocks
> + vector<Loop *> loops; //!< Loops info of the function
> RegisterFile file; //!< RegisterDatas used by the instructions
> Profile profile; //!< Current function profile
> PushMap pushMap; //!< Pushed function arguments (reg->loc)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
> index 724d5c3..afab02b 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -38,19 +38,11 @@ namespace ir {
> if (op == OP_RET) {
> workSet.insert(info);
> info->liveOut.insert(ocl::retVal);
> - } else if (op == OP_BRA) {
> - // If this is a backward jump, put it to the extra work list.
> - if (((BranchInstruction*)lastInsn)->getLabelIndex() < bb.getLabelIndex())
> - extraWorkSet.insert(info);
> }
> });
> // Now with iterative analysis, we compute liveout and livein sets
> this->computeLiveInOut();
> - for (auto it : extraWorkSet) {
> - for (auto reg : it->liveOut) {
> - it->extraLiveIn.insert(reg);
> - }
> - }
> + // extend register (def in loop, use out-of-loop) liveness to the whole loop
> this->computeExtraLiveInOut();
> }
>
> @@ -128,60 +120,46 @@ namespace ir {
> }
>
> /*
> - Consider the following scenario, %100's normal liveness will start from Ln-1's
> - position. In normal analysis, the Ln-1 is not Ln's predecessor, thus the liveness
> - of %100 will be passed to Ln and then will not be passed to L0.
> -
> - But considering we are running on a multilane with predication's vector machine.
> - The unconditional BR in Ln-1 may be removed and it will enter Ln with a subset of
> - the revert set of Ln-1's predication. For example when running Ln-1, the active lane
> - is 0-7, then at Ln the active lane is 8-15. Then at the end of Ln, a subset of 8-15
> - will jump to L0. If a register %10 is allocated the same GRF as %100, given the fact
> - that their normal liveness doesn't overlapped, the a subset of 8-15 lanes will be
> - modified. If the %10 and %100 are the same vector data type, then we are fine. But if
> - %100 is a float vector, and the %10 is a bool or short vector, then we hit a bug here.
> -
> -L0:
> - ...
> - %10 = 5
> - ...
> -Ln-1:
> - %100 = 2
> - BR Ln+1
> -
> -Ln:
> - ...
> - BR(%xxx) L0
> -
> -Ln+1:
> - %101 = %100 + 2;
> - ...
> -
> - The solution to fix this issue is to build another liveness data. We will start with
> - those BBs with backward jump. Then pass all the liveOut register as extra liveIn
> - of current BB and then forward this extra liveIn to all the blocks. This is very similar
> - to the normal liveness analysis just with reverse direction.
> + As we run in SIMD mode with prediction mask to indicate active lanes.
> + If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
> + the define point may be run several times under *different* prediction mask.
> + For these kinds of vreg, we must extend the vreg liveness into the whole loop.
> + If we don't do this, it's liveness is killed before the def point inside loop.
> + If the vreg's corresponding physical reg is assigned to other vreg during the
> + killed period, and the instructions before kill point were re-executed with different prediction,
> + the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.
> */
> +
> void Liveness::computeExtraLiveInOut(void) {
> - while(!extraWorkSet.empty()) {
> - struct BlockInfo *currInfo = *extraWorkSet.begin();
> - extraWorkSet.erase(currInfo);
> - for (auto currInVar : currInfo->extraLiveIn)
> - currInfo->extraLiveOut.insert(currInVar);
> - bool isChanged = false;
> - for (auto succ : currInfo->bb.getSuccessorSet()) {
> - BlockInfo *succInfo = liveness[succ];
> - for (auto currOutVar : currInfo->extraLiveOut) {
> - bool changed = false;
> - if (!succInfo->upwardUsed.contains(currOutVar)) {
> - auto it = succInfo->extraLiveIn.insert(currOutVar);
> - changed = it.second;
> + const vector<Loop *> &loops = fn.getLoops();
> + if(loops.size() == 0) return;
> +
> + for (auto l : loops) {
> + for (auto x : l->exits) {
> + const BasicBlock &a = fn.getBlock(x.first);
> + const BasicBlock &b = fn.getBlock(x.second);
> + BlockInfo * exiting = liveness[&a];
> + BlockInfo * exit = liveness[&b];
> + std::vector<Register> toExtend;
> +
> + if(b.getPredecessorSet().size() > 1) {
> + for (auto p : exit->upwardUsed)
> + toExtend.push_back(p);
> + } else {
> + std::set_intersection(exiting->liveOut.begin(), exiting->liveOut.end(), exit->upwardUsed.begin(), exit->upwardUsed.end(), std::back_inserter(toExtend));
> + }
> + if (toExtend.size() == 0) continue;
> +
> + for (auto bb : l->bbs) {
> + BlockInfo * bI = liveness[&fn.getBlock(bb)];
> + for(auto r : toExtend) {
> + if(!bI->upwardUsed.contains(r))
> + bI->extraLiveIn.insert(r);
> + bI->extraLiveOut.insert(r);
> }
> - if (changed) isChanged = true;
> }
> - if (isChanged)
> - extraWorkSet.insert(succInfo);}
> - };
> + }
> + }
> #if 0
> fn.foreachBlock([this](const BasicBlock &bb){
> printf("label %d:\n", bb.getLabelIndex());
> diff --git a/backend/src/ir/liveness.hpp b/backend/src/ir/liveness.hpp
> index 9198eae..2545053 100644
> --- a/backend/src/ir/liveness.hpp
> +++ b/backend/src/ir/liveness.hpp
> @@ -143,7 +143,7 @@ namespace ir {
> void computeExtraLiveInOut(void);
> /*! Set of work list block which has exit(return) instruction */
> typedef set <struct BlockInfo*> WorkSet;
> - WorkSet workSet, extraWorkSet;
> + WorkSet workSet;
>
> /*! Use custom allocators */
> GBE_CLASS(Liveness);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 49fbc7b..80cdb37 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -489,7 +489,6 @@ namespace gbe
> if(!bKernel) return false;
>
> LI = &getAnalysis<LoopInfo>();
> -
> emitFunction(F);
> return false;
> }
> @@ -497,6 +496,8 @@ namespace gbe
> virtual bool doFinalization(Module &M) { return false; }
> /*! handle global variable register allocation (local, constant space) */
> void allocateGlobalVariableRegister(Function &F);
> + /*! gather all the loops in the function and add them to ir::Function */
> + void gatherLoopInfo(ir::Function &fn);
> /*! Emit the complete function code and declaration */
> void emitFunction(Function &F);
> /*! Handle input and output function parameters */
> @@ -1447,6 +1448,78 @@ namespace gbe
> }
>
> }
> + static INLINE void findAllLoops(LoopInfo * LI, std::vector<std::pair<Loop*, int>> &lp)
> + {
> + for (Loop::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) {
> + lp.push_back(std::make_pair(*I, -1));
> + }
> + if (lp.size() == 0) return;
> +
> + uint32_t i = 0;
> + do {
> + const std::vector<Loop*> subLoops = lp[i].first->getSubLoops();
> + for(auto sub : subLoops)
> + lp.push_back(std::make_pair(sub, i));
> + i++;
> + } while(i < lp.size());
> + }
> +
> + void GenWriter::gatherLoopInfo(ir::Function &fn) {
> + vector<ir::LabelIndex> loopBBs;
> + vector<std::pair<ir::LabelIndex, ir::LabelIndex>> loopExits;
> + std::vector<std::pair<Loop*, int>> lp;
> +
> + findAllLoops(LI, lp);
> +#if GBE_DEBUG
> + // check two loops' interference
> + for(int i = 0; i < lp.size(); i++) {
> + SmallVector<Loop::Edge, 8> exitBBs;
> + lp[i].first->getExitEdges(exitBBs);
> +
> + const std::vector<BasicBlock*> &inBBs = lp[i].first->getBlocks();
> + std::vector<ir::LabelIndex> bbs1;
> + for(auto x : inBBs) {
> + bbs1.push_back(labelMap[x]);
> + }
> + std::sort(bbs1.begin(), bbs1.end());
> + for(int j = i+1; j < lp.size(); j++) {
> + if(! lp[i].first->contains(lp[j].first)) {
> + const std::vector<BasicBlock*> &inBBs2 = lp[j].first->getBlocks();
> + std::vector<ir::LabelIndex> bbs2;
> + std::vector<ir::LabelIndex> bbs3;
> +
> + for(auto x : inBBs2) {
> + bbs2.push_back(labelMap[x]);
> + }
> +
> + std::sort(bbs2.begin(), bbs2.end());
> + std::set_intersection(bbs1.begin(), bbs1.end(), bbs2.begin(), bbs2.end(), std::back_inserter(bbs3));
> + GBE_ASSERT(bbs3.size() < 1);
> + }
> + }
> + }
> +#endif
> +
> + for (auto loop : lp) {
> + loopBBs.clear();
> + loopExits.clear();
> +
> + const std::vector<BasicBlock*> &inBBs = loop.first->getBlocks();
> + for (auto b : inBBs) {
> + GBE_ASSERT(labelMap.find(b) != labelMap.end());
> + loopBBs.push_back(labelMap[b]);
> + }
> +
> + SmallVector<Loop::Edge, 8> exitBBs;
> + loop.first->getExitEdges(exitBBs);
> + for(auto b : exitBBs){
> + GBE_ASSERT(labelMap.find(b.first) != labelMap.end());
> + GBE_ASSERT(labelMap.find(b.second) != labelMap.end());
> + loopExits.push_back(std::make_pair(labelMap[b.first], labelMap[b.second]));
> + }
> + fn.addLoop(loopBBs, loopExits);
> + }
> + }
>
> void GenWriter::emitFunction(Function &F)
> {
> @@ -1463,6 +1536,7 @@ namespace gbe
> }
>
> ctx.startFunction(F.getName());
> + ir::Function &fn = ctx.getFunction();
> this->regTranslator.clear();
> this->labelMap.clear();
> this->emitFunctionPrototype(F);
> @@ -1483,11 +1557,13 @@ namespace gbe
> for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
> this->simplifyTerminator(BB);
>
> + // gather loop info, which is useful for liveness analysis
> + gatherLoopInfo(fn);
> +
> // ... then, emit the instructions for all basic blocks
> pass = PASS_EMIT_INSTRUCTIONS;
> for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
> emitBasicBlock(BB);
> - ir::Function &fn = ctx.getFunction();
> ctx.endFunction();
>
> // Liveness can be shared when we optimized the immediates and the MOVs
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list