[Beignet] [PATCH 2/2] GBE: Improve spill policy by considering use count.

Pan, Xiuli xiuli.pan at intel.com
Fri Jun 24 08:09:00 UTC 2016



-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Ruiling Song
Sent: Tuesday, June 21, 2016 9:03 AM
To: beignet at lists.freedesktop.org
Cc: Song, Ruiling <ruiling.song at intel.com>
Subject: [Beignet] [PATCH 2/2] GBE: Improve spill policy by considering use count.

In this patch, We use usecount/liverange as the spill cost.
And try to minimize the spill cost when do register spilling.
For registers inside loop, we will use 10**loop_depth to approximate
the use count.

When we spill for a scalar register, that is easy, just find
the candidate with least spill cost. But when spilling for a vector
of registers, we need to search in physical register space to make
enough room for the requested size, and then we will choose the group
of registers with least spill cost.

Note I change to always allocate for dst SelectionVector. As dst
SelectionVector normally don't introduce too much register pressure.
Allocating SelectionVector will greatly reduce MOVs. I think we
need to do further optimization for SelectionVector. We need to
do dynamic decision based on register pressure. If allowed, we should
try to allocate SelectionVector as much as possible, if register
pressure not allowed, then we need to try to add extra MOVs or even spilling.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/context.cpp            |  33 +++
 backend/src/backend/context.hpp            |   1 +
 backend/src/backend/gen_reg_allocation.cpp | 329 +++++++++++++++++++----------
 backend/src/ir/function.cpp                |  34 ++-
 backend/src/ir/function.hpp                |  15 +-
 backend/src/llvm/llvm_gen_backend.cpp      |   2 +-
 6 files changed, 300 insertions(+), 114 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 0991786..675dc78 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -48,6 +48,10 @@ namespace gbe
     /*! Free the given register file piece */
     void deallocate(int32_t offset);
 
+    /*! check whether a super register is in free list,
+     *  a super register means a 32byte register, Gen
+     *  often has 128 super registers*/
+    bool isSuperRegisterFree(int32_t offset);
     /*! Spilt a block into 2 blocks */
     void splitBlock(int32_t offset, int32_t subOffset);
 
@@ -65,6 +69,7 @@ namespace gbe
      *  If the colascing was done, the left block is deleted
      */
     void coalesce(Block *left, Block *right);
+    void dumpFreeList();
     /*! the maximum offset */
     int32_t maxOffset;
     /*! Head and tail of the free list */
@@ -120,6 +125,30 @@ namespace gbe
     }
   }
 
+  void SimpleAllocator::dumpFreeList() {
+    Block *s = head;
+    printf("register free list:\n");
+    while (s) {
+      printf("blk: %d(r%d.%d) (%d)\n", s->offset, s->offset/GEN_REG_SIZE, s->offset % GEN_REG_SIZE, s->size);
+      s = s->next;
+    }
+    printf("free list end\n");
+  }
+
+  bool SimpleAllocator::isSuperRegisterFree(int32_t offset) {
+    assert((offset % GEN_REG_SIZE) == 0);
+    Block *s = head;
+    while (s) {
+      if (s->offset <= offset && (s->offset+s->size) >= offset+GEN_REG_SIZE) {
+        return true;
+      }
+      if (s->offset > offset)
+        return false;
+      s = s->next;
+    }
+    return false;
+  }
+
   int32_t SimpleAllocator::allocate(int32_t size, int32_t alignment, bool bFwd)
   {
     // Make it simple and just use the first block we find
@@ -372,6 +401,10 @@ namespace gbe
     return registerAllocator->allocate(size, alignment, bFwd);
   }
 
+  bool Context::isSuperRegisterFree(int offset) {
+    return registerAllocator->isSuperRegisterFree(offset);
+  }
+
   void Context::deallocate(int32_t offset) { registerAllocator->deallocate(offset); }
 
   void Context::splitBlock(int32_t offset, int32_t subOffset) {
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 7524949..1567bd6 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -86,6 +86,7 @@ namespace gbe
     }
     /*! Allocate some memory in the register file */
     int32_t allocate(int32_t size, int32_t alignment, bool bFwd = true);
+    bool isSuperRegisterFree(int offset);
     /*! Deallocate previously allocated memory */
     void deallocate(int32_t offset);
     /*! Spilt a block into 2 blocks, for some registers allocate together but  deallocate seperate */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 26a010d..c3cc3f6 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -49,49 +49,22 @@ namespace gbe
    */
   struct GenRegInterval {
     INLINE GenRegInterval(ir::Register reg) :
-      reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0), b3OpAlign(0) {}
+      reg(reg), minID(INT_MAX), maxID(-INT_MAX), accessCount(0),
+      conflictReg(0), b3OpAlign(0) {}
     ir::Register reg;     //!< (virtual) register of the interval
     int32_t minID, maxID; //!< Starting and ending points
+    int32_t accessCount;
     ir::Register conflictReg; // < has banck conflict with this register
     bool b3OpAlign;
   };
 
-  typedef struct GenRegIntervalKey {
-    GenRegIntervalKey(uint32_t reg, int32_t maxID) {
-      key = ((uint64_t)maxID << 32) | reg;
-    }
-    const ir::Register getReg() const {
-      return (ir::Register)(key & 0xFFFFFFFF);
-    }
-    int32_t getMaxID() const {
-      return key >> 32;
-    }
-    uint64_t key;
-  } GenRegIntervalKey;
-
-  struct spillCmp {
-    bool operator () (const GenRegIntervalKey &lhs, const GenRegIntervalKey &rhs) const
-    { return lhs.key > rhs.key; }
-  };
-
-  typedef set <GenRegIntervalKey, spillCmp> SpillSet;
-
-  class SpillCandidateSet : public SpillSet
-  {
-  public:
-    std::set<GenRegIntervalKey, spillCmp>::iterator find(GenRegInterval interval) {
-      GenRegIntervalKey key(interval.reg, interval.maxID);
-      return SpillSet::find(key);
-    }
-    void insert(GenRegInterval interval) {
-      GenRegIntervalKey key(interval.reg, interval.maxID);
-      SpillSet::insert(key);
-    }
-    void erase(GenRegInterval interval) {
-      GenRegIntervalKey key(interval.reg, interval.maxID);
-      SpillSet::erase(key);
-    }
+  struct SpillInterval {
+    SpillInterval(const ir::Register r, float c):
+      reg(r), cost(c) {}
+    ir::Register reg;
+    float cost;
   };
+  typedef std::vector<SpillInterval>::iterator SpillIntervalIter;
 
   /*! Implements the register allocation */
   class GenRegAllocator::Opaque
@@ -131,6 +104,9 @@ namespace gbe
     bool expireFlag(const GenRegInterval &limit);
     /*! Allocate the virtual boolean (== flags) registers */
     void allocateFlags(Selection &selection);
+    /*! calculate the spill cost, what we store here is 'use count',
+     * we use [use count]/[live range] as spill cost */
+    void calculateSpillCost(Selection &selection);
     /*! validated flags which contains valid value in the physical flag register */
     set<uint32_t> validatedFlags;
     /*! validated temp flag register which indicate the flag 0,1 contains which virtual flag register. */
@@ -181,7 +157,7 @@ namespace gbe
     /*! registers that are spilled */
     SpilledRegs spilledRegs;
     /*! register which could be spilled.*/
-    SpillCandidateSet spillCandidate;
+    std::set<GenRegInterval*> spillCandidate;
     /*! BBs last instruction ID map */
     map<const ir::BasicBlock *, int32_t> bbLastInsnIDMap;
     /* reserved registers for register spill/reload */
@@ -191,6 +167,8 @@ namespace gbe
     INLINE void insertNewReg(const Selection &selection, ir::Register reg, uint32_t grfOffset, bool isVector = false);
     INLINE bool expireReg(ir::Register reg);
     INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t alignment);
+    INLINE bool findNextSpillCandidate(std::vector<SpillInterval> &candidate,
+                int &remainSize, int &offset, SpillIntervalIter &nextCand);
     INLINE uint32_t allocateReg(GenRegInterval interval, uint32_t size, uint32_t alignment);
     INLINE bool spillReg(GenRegInterval interval, bool isAllocated = false);
     INLINE bool spillReg(ir::Register reg, bool isAllocated = false);
@@ -205,11 +183,13 @@ namespace gbe
       ir::Register reg;
       if (isSrc) {
         reg = sel.replaceSrc(insn, regID, type, needMov);
+        assert(reg == intervals.size());
         intervals.push_back(reg);
         intervals[reg].minID = insn->ID - 1;
         intervals[reg].maxID = insn->ID;
       } else {
         reg = sel.replaceDst(insn, regID, type, needMov);
+        assert(reg == intervals.size());
         intervals.push_back(reg);
         intervals[reg].minID = insn->ID;
         intervals[reg].maxID = insn->ID + 1;
@@ -348,10 +328,13 @@ namespace gbe
       // case 1: the register is not already in a vector, so it can stay in this
       // vector. Note that local IDs are *non-scalar* special registers but will
       // require a MOV anyway since pre-allocated in the CURBE
+      // for dst SelectionVector, we can always try to allocate them even under
+      // spilling, reason is that its components can be expired separately, so,
+      // it does not introduce too much register pressure.
       if (it == vectorMap.end() &&
           ctx.sel->isScalarReg(reg) == false &&
           ctx.isSpecialReg(reg) == false &&
-          ctx.reservedSpillRegs == 0 )
+          (ctx.reservedSpillRegs == 0 || !vector->isSrc) )
       {
         const VectorLocation location = std::make_pair(vector, regID);
         this->vectorMap.insert(std::make_pair(reg, location));
@@ -857,8 +840,8 @@ namespace gbe
 
     ctx.deallocate(it->second);
     if (reservedReg != 0
-        && (spillCandidate.find(intervals[reg]) != spillCandidate.end())) {
-        spillCandidate.erase(intervals[reg]);
+        && (spillCandidate.find(&intervals[reg]) != spillCandidate.end())) {
+        spillCandidate.erase(&intervals[reg]);
         /* offset --> reg map should keep updated. */
         offsetReg.erase(it->second);
     }
@@ -891,7 +874,7 @@ namespace gbe
           && !selection.isPartialWrite(reg)) {
          GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end());
          offsetReg.insert(std::make_pair(grfOffset, reg));
-         spillCandidate.insert(intervals[reg]);
+         spillCandidate.insert(&intervals[reg]);
        }
      }
   }
@@ -936,92 +919,190 @@ namespace gbe
   // FIXME we may need to fix those unspillable vector in the furture.
   INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) {
     for(uint32_t id = 0; id < vector->regNum; id++)
-      if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)])
+      if (spillCandidate.find(&intervals[(ir::Register)(vector->reg[id].value.reg)])
           == spillCandidate.end())
         return false;
     return true;
   }
 
+  INLINE float getSpillCost(const GenRegInterval &v) {
+    // check minID maxId value
+    assert(v.maxID > v.minID);

What about maxID == minID.

+    // FIXME some register may get access count of 0, need to be fixed.
+    float count = v.accessCount == 0 ? (float)2 : (float)v.accessCount;
+    return count / (float)(v.maxID - v.minID);
+  }
+
+  bool spillinterval_cmp(const SpillInterval &v1, const SpillInterval &v2) {
+    return v1.cost < v2.cost;
+  }
+
+  INLINE SpillIntervalIter findRegisterInSpillQueue(
+                           std::vector<SpillInterval> &cand, ir::Register reg) {
+    for (SpillIntervalIter it = cand.begin(); it != cand.end(); ++it) {
+      if (it->reg == reg)
+        return it;
+    }
+    return cand.end();
+  }
+  // The function tries to search in 'free physical register' and 'candidate'.
+  // so, the result may be on of the three possible situations:
+  // 1. search completed, find the next valid iterator to a candidate.
+  // 2. search ended, because we met unspillable register, we have to drop the iteration
+  // 3. search completed, there are enough free physical register.
+  //
+  // return value: should we break? because of:
+  // 1. search end, found enough free register
+  // 2. search end, because met unspillable register
+  INLINE bool GenRegAllocator::Opaque::findNextSpillCandidate(
+              std::vector<SpillInterval> &candidate, int &remainSize,
+              int &offset, SpillIntervalIter &nextCand) {
+    bool isFree = false;
+    bool shouldBreak = false;
+    do {
+      // check is free?
+      isFree = ctx.isSuperRegisterFree(offset);
+
+      if (isFree) {
+        remainSize -= GEN_REG_SIZE;
+        offset += GEN_REG_SIZE;
+      }
+    } while(isFree && remainSize > 0);
+
+    // done
+    if (remainSize <= 0) return true;
+
+    auto registerIter = offsetReg.find(offset);
+    shouldBreak = registerIter == offsetReg.end();
+
+    if (!shouldBreak) {
+      ir::Register reg = registerIter->second;
+      nextCand = findRegisterInSpillQueue(candidate, reg);
+    }
+    // if shouldBreak is false, means we need go on
+    return shouldBreak;
+  }
+
   INLINE bool GenRegAllocator::Opaque::spillAtInterval(GenRegInterval interval,
                                                        int size,
                                                        uint32_t alignment) {
     if (reservedReg == 0)
       return false;
-    auto it = spillCandidate.begin();
-    // If there is no spill candidate or current register is spillable and current register's
-    // endpoint is after all the spillCandidate register's endpoint we return false. The
-    // caller will spill current register.
-    // At simd16 mode, we will always try to spill here rather than return to the caller.
-    // The reason is that the caller may have a vector to allocate, and some element may be
-    // temporary registers which could not be spilled.
-    if (it == spillCandidate.end()
-        || (ctx.getSimdWidth() == 8 && (it->getMaxID() <= interval.maxID
-            && alignment == ctx.getSimdWidth()/8 * GEN_REG_SIZE)))
+
+    if (spillCandidate.empty())
       return false;
 
-    ir::Register reg = it->getReg();
-    set<ir::Register> spillSet;
-    int32_t savedSize = size;
-    while(size > 0) {
-      auto vectorIt = vectorMap.find(reg);
-      bool isVector = vectorIt != vectorMap.end();
-      bool needRestart = false;
-      ir::RegisterFamily family = ctx.sel->getRegisterFamily(reg);
-      if (isVector
-          && (vectorCanSpill(vectorIt->second.first))) {
-        const SelectionVector *vector = vectorIt->second.first;
-        for (uint32_t id = 0; id < vector->regNum; id++) {
-          GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
-                     == spilledRegs.end());
-          spillSet.insert(vector->reg[id].reg());
-          reg = vector->reg[id].reg();
-          family = ctx.sel->getRegisterFamily(reg);
-          size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
-                                             : GEN_REG_SIZE * ctx.getSimdWidth()/8;
+    // push spill candidate into a vector in ascending order of spill-cost.
+    std::vector<SpillInterval> candQ;
+    for (auto &p : spillCandidate) {
+      float cost = getSpillCost(*p);
+      candQ.push_back(SpillInterval(p->reg, cost));
+    }
+    std::sort(candQ.begin(), candQ.end(), spillinterval_cmp);
+
+    bool scalarAllocationFail = (vectorMap.find(interval.reg) == vectorMap.end());
+
+    int remainSize = size;
+    float spillCostTotal = 0.0f;
+    std::set<ir::Register> spillSet;
+    // if we search the whole register, it will take lots of time.
+    // so, I just add this max value to make the compile time not
+    // grow too much, although this method may not find truely lowest
+    // spill cost candidates.
+    const int spillGroupMax = 8;
+    int spillGroupID = 0;
+
+    std::vector<std::set<ir::Register>> spillGroups;
+    std::vector<float> spillGroupCost;
+
+    auto searchBegin = candQ.begin();
+    while (searchBegin != candQ.end() && spillGroupID < spillGroupMax) {
+      auto contiguousIter = searchBegin;
+
+      while (contiguousIter != candQ.end()) {
+        ir::Register reg = contiguousIter->reg;
+
+        auto vectorIt = vectorMap.find(reg);
+        bool spillVector = (vectorIt != vectorMap.end());
+        int32_t nextOffset = -1;
+
+        // is register allocation failed at scalar register?
+        // if so, don't try to spill a vector register,
+        // which is obviously of no benefit.
+        if (scalarAllocationFail && spillVector) break;
+
+        if (spillVector) {
+          if (vectorCanSpill(vectorIt->second.first)) {
+            const SelectionVector *vector = vectorIt->second.first;
+            for (uint32_t id = 0; id < vector->regNum; id++) {
+              GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
+                         == spilledRegs.end());
+              spillSet.insert(vector->reg[id].reg());
+              reg = vector->reg[id].reg();
+              uint32_t s;
+              getRegAttrib(reg, s);
+              remainSize-= s;
+              spillCostTotal += contiguousIter->cost;
+            }
+          } else {
+            break;
+          }
+        } else {
+          spillSet.insert(reg);
+          uint32_t s;
+          getRegAttrib(reg, s);
+          spillCostTotal += contiguousIter->cost;
+          remainSize -= s;
         }
-      } else if (!isVector) {
-        spillSet.insert(reg);
-        size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
-                                           : GEN_REG_SIZE * ctx.getSimdWidth()/8;
-      } else
-        needRestart = true; // is a vector which could not be spilled.
-
-      if (size <= 0)
-        break;
-      if (!needRestart) {
+        if (remainSize <= 0)
+          break;
+
         uint32_t offset = RA.find(reg)->second;
-        uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2 * GEN_REG_SIZE * ctx.getSimdWidth() / 8)
-                                                           : (offset + GEN_REG_SIZE * ctx.getSimdWidth() / 8);
-        auto nextRegIt = offsetReg.find(nextOffset);
-        if (nextRegIt != offsetReg.end())
-          reg = nextRegIt->second;
-        else
-          needRestart = true;
+        uint32_t s; getRegAttrib(reg, s);
+        nextOffset = offset + s;
+
+        SpillIntervalIter nextValid = candQ.end();
+
+        bool shouldBreak = findNextSpillCandidate(candQ, remainSize, nextOffset,
+                                                  nextValid);
+        contiguousIter = nextValid;
+        if (shouldBreak)
+          break;
       }
 
-      if (needRestart) {
-#if 0
-        // FIXME, we should enable this code block in the future.
-        // If the spill set is not zero and we need a restart, we can
-        // simply return to try to allocate the registers at first.
-        // As some vectors which have expired elements may be marked as
-        // unspillable vector.
-        if (spillSet.size() > 0)
+      if (remainSize <= 0) {
+        if (scalarAllocationFail) {
+          // Done
           break;
-#endif
-        it++;
-        // next register is not in spill candidate.
-        // let's move to next candidate and start over.
-        if (it == spillCandidate.end())
-          return false;
-        reg = it->getReg();
-        size = savedSize;
-        spillSet.clear();
+        } else {
+          // Add as one spillGroup
+          spillGroups.push_back(spillSet);
+          spillGroupCost.push_back(spillCostTotal);
+          ++spillGroupID;
+        }
       }
+
+      ++searchBegin;
+      // restore states
+      remainSize = size;
+      spillCostTotal = 0.0f;
+      spillSet.clear();
+    }
+    // failed to spill
+    if (scalarAllocationFail && remainSize > 0) return false;
+    if (!scalarAllocationFail && spillGroups.size() == 0) return false;
+
+    if (!scalarAllocationFail) {
+      // push min spillcost group into spillSet
+      int minIndex = std::distance(spillGroupCost.begin(),
+                                   std::min_element(spillGroupCost.begin(),
+                                                    spillGroupCost.end()));
+      spillSet.swap(spillGroups[minIndex]);
     }
 
-    for(auto spillreg : spillSet)
+    for(auto spillreg : spillSet) {
       spillReg(spillreg, true);
+    }
     return true;
   }
 
@@ -1064,6 +1145,37 @@ namespace gbe
     return grfOffset;
   }
 
+  int UseCountApproximate(int loopDepth) {
+    int ret = 1;
+    for (int i = 0; i < loopDepth; i++) {
+      ret = ret * 10;
+    }
+    return ret;
+  }
+
+  void GenRegAllocator::Opaque::calculateSpillCost(Selection &selection) {
+    int BlockIndex = 0;
+    for (auto &block : *selection.blockList) {
+      int LoopDepth = ctx.fn.getLoopDepth(ir::LabelIndex(BlockIndex));
+      for (auto &insn : block.insnList) {
+        const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
+        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+          const GenRegister &selReg = insn.src(srcID);
+          const ir::Register reg = selReg.reg();
+          if (selReg.file == GEN_GENERAL_REGISTER_FILE)
+            this->intervals[reg].accessCount += UseCountApproximate(LoopDepth);
+        }
+        for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+          const GenRegister &selReg = insn.dst(dstID);
+          const ir::Register reg = selReg.reg();
+          if (selReg.file == GEN_GENERAL_REGISTER_FILE)
+            this->intervals[reg].accessCount += UseCountApproximate(LoopDepth);
+        }
+      }
+      BlockIndex++;
+    }
+  }
+
   INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) {
     using namespace ir;
     const Function::PushMap &pushMap = ctx.fn.getPushMap();
@@ -1273,6 +1385,7 @@ do { \
 
     // First we try to put all booleans registers into flags
     this->allocateFlags(selection);
+    this->calculateSpillCost(selection);
 
     // Sort both intervals in starting point and ending point increasing orders
     const uint32_t regNum = ctx.sel->getRegNum();
@@ -1321,7 +1434,7 @@ do { \
              << "  " << setw(-3) << regSize  << "B\t"
              << "[  " << setw(8) << this->intervals[(uint)vReg].minID
              << " -> " << setw(8) << this->intervals[(uint)vReg].maxID
-             << "]" << endl;
+             << "]" << setw(8) << "use count: " << this->intervals[(uint)vReg].accessCount << endl;
     }
     if (!spilledRegs.empty())
       cout << "## spilled registers: " << spilledRegs.size() << endl;
@@ -1336,7 +1449,7 @@ do { \
            <<  "  " << setw(-3) << regSize << "B\t"
            << "[  " << setw(8) << this->intervals[(uint)vReg].minID
            << " -> " << setw(8) << this->intervals[(uint)vReg].maxID
-           << "]" << endl;
+           << "]" << setw(8) << "use count: " << this->intervals[(uint)vReg].accessCount << endl;
     }
     cout << endl;
   }
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index fddafc7..2fe080a 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -62,8 +62,38 @@ namespace ir {
     return unit.getPointerFamily();
   }
 
-  void Function::addLoop(LabelIndex preheader, const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
-    loops.push_back(GBE_NEW(Loop, preheader, bbs, exits));
+  void Function::addLoop(LabelIndex preheader,
+                        int parent,
+                        const vector<LabelIndex> &bbs,
+                        const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
+    loops.push_back(GBE_NEW(Loop, preheader, parent, bbs, exits));
+  }
+
+  int Function::getLoopDepth(LabelIndex Block) const{
+    if (loops.size() == 0) return 0;
+
+    int LoopIndex = -1;
+    int LoopDepth = 0;
+    // get innermost loop
+    for (int Idx = loops.size()-1; Idx >= 0; Idx--) {
+      Loop *lp = loops[Idx];
+      vector<LabelIndex> &Blocks = lp->bbs;
+      bool Found = (std::find(Blocks.begin(), Blocks.end(), Block) != Blocks.end());
+      if (Found) {
+        LoopIndex = Idx;
+        break;
+      }
+    }
+
+    if (LoopIndex != -1) {
+      int LoopId = LoopIndex;
+      do {
+        LoopId = loops[LoopId]->parent;
+        LoopDepth++;
+      } while(LoopId != -1);
+    }
+
+    return LoopDepth;
   }
 
   void Function::checkEmptyLabels(void) {
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 6a90767..ae0a702 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -273,9 +273,14 @@ namespace ir {
   struct Loop : public NonCopyable
   {
   public:
-    Loop(LabelIndex pre, const vector<LabelIndex> &in, const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
-    preheader(pre), bbs(in), exits(exit) {}
+    Loop(LabelIndex pre,
+         int paren,
+         const vector<LabelIndex> &in,
+         const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
+         preheader(pre), parent(paren), bbs(in), exits(exit) {}
+
     LabelIndex preheader;
+    int parent;
     vector<LabelIndex> bbs;
     vector<std::pair<LabelIndex, LabelIndex>> exits;
     GBE_STRUCT(Loop);
@@ -523,8 +528,12 @@ namespace ir {
     /*! Push stack size. */
     INLINE void pushStackSize(uint32_t step) { this->stackSize += step; }
     /*! add the loop info for later liveness analysis */
-    void addLoop(LabelIndex preheader, const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
+    void addLoop(LabelIndex preheader,
+                 int parent,
+                 const vector<LabelIndex> &bbs,
+                 const vector<std::pair<LabelIndex, LabelIndex>> &exits);
     INLINE const vector<Loop * > &getLoops() { return loops; }
+    int getLoopDepth(LabelIndex Block) const;
     vector<BasicBlock *> &getBlocks() { return blocks; }
     /*! Get surface starting address register from bti */
     Register getSurfaceBaseReg(uint8_t bti) const;
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 0ac28de..ce15989 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2760,7 +2760,7 @@ namespace gbe
         GBE_ASSERT(labelMap.find(b.second) != labelMap.end());
         loopExits.push_back(std::make_pair(labelMap[b.first], labelMap[b.second]));
       }
-      fn.addLoop(preheaderBB, loopBBs, loopExits);
+      fn.addLoop(preheaderBB, loop.second, loopBBs, loopExits);
     }
   }
 
-- 
2.4.1

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list