[Beignet] [PATCH V2] GBE: Improve spill policy by considering use count.

Yang, Rong R rong.r.yang at intel.com
Fri Jun 24 08:47:58 UTC 2016


The patchset LGTM, and the algorithm finding spill cost could refine later.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Friday, June 24, 2016 16:20
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling <ruiling.song at intel.com>
> Subject: [Beignet] [PATCH V2] GBE: Improve spill policy by considering use
> count.
> 
> In this patch, We use usecount/liverange as the spill cost.
> And try to minimize the spill cost when do register spilling.
> For registers inside loop, we will use 10**loop_depth to approximate the use
> count.
> 
> When we spill for a scalar register, that is easy, just find the candidate with
> least spill cost. But when spilling for a vector of registers, we need to search
> in physical register space to make enough room for the requested size, and
> then we will choose the group of registers with least spill cost.
> 
> Note I change to always allocate for dst SelectionVector. As dst
> SelectionVector normally don't introduce too much register pressure.
> Allocating SelectionVector will greatly reduce MOVs. I think we need to do
> further optimization for SelectionVector. We need to do dynamic decision
> based on register pressure. If allowed, we should try to allocate
> SelectionVector as much as possible, if register pressure not allowed, then
> we need to try to add extra MOVs or even spilling.
> 
> v2:
> fix assert in getSpillCost() for tmp register.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/context.cpp            |  33 +++
>  backend/src/backend/context.hpp            |   1 +
>  backend/src/backend/gen_reg_allocation.cpp | 331
> +++++++++++++++++++----------
>  backend/src/ir/function.cpp                |  34 ++-
>  backend/src/ir/function.hpp                |  15 +-
>  backend/src/llvm/llvm_gen_backend.cpp      |   2 +-
>  6 files changed, 302 insertions(+), 114 deletions(-)
> 
> diff --git a/backend/src/backend/context.cpp
> b/backend/src/backend/context.cpp index 0991786..675dc78 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -48,6 +48,10 @@ namespace gbe
>      /*! Free the given register file piece */
>      void deallocate(int32_t offset);
> 
> +    /*! check whether a super register is in free list,
> +     *  a super register means a 32byte register, Gen
> +     *  often has 128 super registers*/
> +    bool isSuperRegisterFree(int32_t offset);
>      /*! Spilt a block into 2 blocks */
>      void splitBlock(int32_t offset, int32_t subOffset);
> 
> @@ -65,6 +69,7 @@ namespace gbe
>       *  If the colascing was done, the left block is deleted
>       */
>      void coalesce(Block *left, Block *right);
> +    void dumpFreeList();
>      /*! the maximum offset */
>      int32_t maxOffset;
>      /*! Head and tail of the free list */ @@ -120,6 +125,30 @@ namespace gbe
>      }
>    }
> 
> +  void SimpleAllocator::dumpFreeList() {
> +    Block *s = head;
> +    printf("register free list:\n");
> +    while (s) {
> +      printf("blk: %d(r%d.%d) (%d)\n", s->offset, s->offset/GEN_REG_SIZE, s-
> >offset % GEN_REG_SIZE, s->size);
> +      s = s->next;
> +    }
> +    printf("free list end\n");
> +  }
> +
> +  bool SimpleAllocator::isSuperRegisterFree(int32_t offset) {
> +    assert((offset % GEN_REG_SIZE) == 0);
> +    Block *s = head;
> +    while (s) {
> +      if (s->offset <= offset && (s->offset+s->size) >= offset+GEN_REG_SIZE)
> {
> +        return true;
> +      }
> +      if (s->offset > offset)
> +        return false;
> +      s = s->next;
> +    }
> +    return false;
> +  }
> +
>    int32_t SimpleAllocator::allocate(int32_t size, int32_t alignment, bool bFwd)
>    {
>      // Make it simple and just use the first block we find @@ -372,6 +401,10
> @@ namespace gbe
>      return registerAllocator->allocate(size, alignment, bFwd);
>    }
> 
> +  bool Context::isSuperRegisterFree(int offset) {
> +    return registerAllocator->isSuperRegisterFree(offset);
> +  }
> +
>    void Context::deallocate(int32_t offset) { registerAllocator-
> >deallocate(offset); }
> 
>    void Context::splitBlock(int32_t offset, int32_t subOffset) { diff --git
> a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
> index 7524949..1567bd6 100644
> --- a/backend/src/backend/context.hpp
> +++ b/backend/src/backend/context.hpp
> @@ -86,6 +86,7 @@ namespace gbe
>      }
>      /*! Allocate some memory in the register file */
>      int32_t allocate(int32_t size, int32_t alignment, bool bFwd = true);
> +    bool isSuperRegisterFree(int offset);
>      /*! Deallocate previously allocated memory */
>      void deallocate(int32_t offset);
>      /*! Spilt a block into 2 blocks, for some registers allocate together but
> deallocate seperate */ diff --git
> a/backend/src/backend/gen_reg_allocation.cpp
> b/backend/src/backend/gen_reg_allocation.cpp
> index 26a010d..4451efb 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -49,49 +49,22 @@ namespace gbe
>     */
>    struct GenRegInterval {
>      INLINE GenRegInterval(ir::Register reg) :
> -      reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0),
> b3OpAlign(0) {}
> +      reg(reg), minID(INT_MAX), maxID(-INT_MAX), accessCount(0),
> +      conflictReg(0), b3OpAlign(0) {}
>      ir::Register reg;     //!< (virtual) register of the interval
>      int32_t minID, maxID; //!< Starting and ending points
> +    int32_t accessCount;
>      ir::Register conflictReg; // < has banck conflict with this register
>      bool b3OpAlign;
>    };
> 
> -  typedef struct GenRegIntervalKey {
> -    GenRegIntervalKey(uint32_t reg, int32_t maxID) {
> -      key = ((uint64_t)maxID << 32) | reg;
> -    }
> -    const ir::Register getReg() const {
> -      return (ir::Register)(key & 0xFFFFFFFF);
> -    }
> -    int32_t getMaxID() const {
> -      return key >> 32;
> -    }
> -    uint64_t key;
> -  } GenRegIntervalKey;
> -
> -  struct spillCmp {
> -    bool operator () (const GenRegIntervalKey &lhs, const GenRegIntervalKey
> &rhs) const
> -    { return lhs.key > rhs.key; }
> -  };
> -
> -  typedef set <GenRegIntervalKey, spillCmp> SpillSet;
> -
> -  class SpillCandidateSet : public SpillSet
> -  {
> -  public:
> -    std::set<GenRegIntervalKey, spillCmp>::iterator find(GenRegInterval
> interval) {
> -      GenRegIntervalKey key(interval.reg, interval.maxID);
> -      return SpillSet::find(key);
> -    }
> -    void insert(GenRegInterval interval) {
> -      GenRegIntervalKey key(interval.reg, interval.maxID);
> -      SpillSet::insert(key);
> -    }
> -    void erase(GenRegInterval interval) {
> -      GenRegIntervalKey key(interval.reg, interval.maxID);
> -      SpillSet::erase(key);
> -    }
> +  struct SpillInterval {
> +    SpillInterval(const ir::Register r, float c):
> +      reg(r), cost(c) {}
> +    ir::Register reg;
> +    float cost;
>    };
> +  typedef std::vector<SpillInterval>::iterator SpillIntervalIter;
> 
>    /*! Implements the register allocation */
>    class GenRegAllocator::Opaque
> @@ -131,6 +104,9 @@ namespace gbe
>      bool expireFlag(const GenRegInterval &limit);
>      /*! Allocate the virtual boolean (== flags) registers */
>      void allocateFlags(Selection &selection);
> +    /*! calculate the spill cost, what we store here is 'use count',
> +     * we use [use count]/[live range] as spill cost */
> +    void calculateSpillCost(Selection &selection);
>      /*! validated flags which contains valid value in the physical flag register */
>      set<uint32_t> validatedFlags;
>      /*! validated temp flag register which indicate the flag 0,1 contains which
> virtual flag register. */ @@ -181,7 +157,7 @@ namespace gbe
>      /*! registers that are spilled */
>      SpilledRegs spilledRegs;
>      /*! register which could be spilled.*/
> -    SpillCandidateSet spillCandidate;
> +    std::set<GenRegInterval*> spillCandidate;
>      /*! BBs last instruction ID map */
>      map<const ir::BasicBlock *, int32_t> bbLastInsnIDMap;
>      /* reserved registers for register spill/reload */ @@ -191,6 +167,8 @@
> namespace gbe
>      INLINE void insertNewReg(const Selection &selection, ir::Register reg,
> uint32_t grfOffset, bool isVector = false);
>      INLINE bool expireReg(ir::Register reg);
>      INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t
> alignment);
> +    INLINE bool findNextSpillCandidate(std::vector<SpillInterval> &candidate,
> +                int &remainSize, int &offset, SpillIntervalIter
> + &nextCand);
>      INLINE uint32_t allocateReg(GenRegInterval interval, uint32_t size,
> uint32_t alignment);
>      INLINE bool spillReg(GenRegInterval interval, bool isAllocated = false);
>      INLINE bool spillReg(ir::Register reg, bool isAllocated = false); @@ -205,11
> +183,13 @@ namespace gbe
>        ir::Register reg;
>        if (isSrc) {
>          reg = sel.replaceSrc(insn, regID, type, needMov);
> +        assert(reg == intervals.size());
>          intervals.push_back(reg);
>          intervals[reg].minID = insn->ID - 1;
>          intervals[reg].maxID = insn->ID;
>        } else {
>          reg = sel.replaceDst(insn, regID, type, needMov);
> +        assert(reg == intervals.size());
>          intervals.push_back(reg);
>          intervals[reg].minID = insn->ID;
>          intervals[reg].maxID = insn->ID + 1; @@ -348,10 +328,13 @@ namespace
> gbe
>        // case 1: the register is not already in a vector, so it can stay in this
>        // vector. Note that local IDs are *non-scalar* special registers but will
>        // require a MOV anyway since pre-allocated in the CURBE
> +      // for dst SelectionVector, we can always try to allocate them even
> under
> +      // spilling, reason is that its components can be expired separately, so,
> +      // it does not introduce too much register pressure.
>        if (it == vectorMap.end() &&
>            ctx.sel->isScalarReg(reg) == false &&
>            ctx.isSpecialReg(reg) == false &&
> -          ctx.reservedSpillRegs == 0 )
> +          (ctx.reservedSpillRegs == 0 || !vector->isSrc) )
>        {
>          const VectorLocation location = std::make_pair(vector, regID);
>          this->vectorMap.insert(std::make_pair(reg, location)); @@ -857,8
> +840,8 @@ namespace gbe
> 
>      ctx.deallocate(it->second);
>      if (reservedReg != 0
> -        && (spillCandidate.find(intervals[reg]) != spillCandidate.end())) {
> -        spillCandidate.erase(intervals[reg]);
> +        && (spillCandidate.find(&intervals[reg]) != spillCandidate.end())) {
> +        spillCandidate.erase(&intervals[reg]);
>          /* offset --> reg map should keep updated. */
>          offsetReg.erase(it->second);
>      }
> @@ -891,7 +874,7 @@ namespace gbe
>            && !selection.isPartialWrite(reg)) {
>           GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end());
>           offsetReg.insert(std::make_pair(grfOffset, reg));
> -         spillCandidate.insert(intervals[reg]);
> +         spillCandidate.insert(&intervals[reg]);
>         }
>       }
>    }
> @@ -936,92 +919,192 @@ namespace gbe
>    // FIXME we may need to fix those unspillable vector in the furture.
>    INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector
> *vector) {
>      for(uint32_t id = 0; id < vector->regNum; id++)
> -      if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)])
> +      if
> + (spillCandidate.find(&intervals[(ir::Register)(vector->reg[id].value.r
> + eg)])
>            == spillCandidate.end())
>          return false;
>      return true;
>    }
> 
> +  INLINE float getSpillCost(const GenRegInterval &v) {
> +    // check minID maxId value
> +    assert(v.maxID >= v.minID);
> +    if (v.maxID == v.minID)
> +      return 1.0f;
> +    // FIXME some register may get access count of 0, need to be fixed.
> +    float count = v.accessCount == 0 ? (float)2 : (float)v.accessCount;
> +    return count / (float)(v.maxID - v.minID);  }
> +
> +  bool spillinterval_cmp(const SpillInterval &v1, const SpillInterval &v2) {
> +    return v1.cost < v2.cost;
> +  }
> +
> +  INLINE SpillIntervalIter findRegisterInSpillQueue(
> +                           std::vector<SpillInterval> &cand, ir::Register reg) {
> +    for (SpillIntervalIter it = cand.begin(); it != cand.end(); ++it) {
> +      if (it->reg == reg)
> +        return it;
> +    }
> +    return cand.end();
> +  }
> +  // The function tries to search in 'free physical register' and 'candidate'.
> +  // so, the result may be on of the three possible situations:
> +  // 1. search completed, find the next valid iterator to a candidate.
> +  // 2. search ended, because we met unspillable register, we have to
> + drop the iteration  // 3. search completed, there are enough free physical
> register.
> +  //
> +  // return value: should we break? because of:
> +  // 1. search end, found enough free register  // 2. search end,
> + because met unspillable register  INLINE bool
> + GenRegAllocator::Opaque::findNextSpillCandidate(
> +              std::vector<SpillInterval> &candidate, int &remainSize,
> +              int &offset, SpillIntervalIter &nextCand) {
> +    bool isFree = false;
> +    bool shouldBreak = false;
> +    do {
> +      // check is free?
> +      isFree = ctx.isSuperRegisterFree(offset);
> +
> +      if (isFree) {
> +        remainSize -= GEN_REG_SIZE;
> +        offset += GEN_REG_SIZE;
> +      }
> +    } while(isFree && remainSize > 0);
> +
> +    // done
> +    if (remainSize <= 0) return true;
> +
> +    auto registerIter = offsetReg.find(offset);
> +    shouldBreak = registerIter == offsetReg.end();
> +
> +    if (!shouldBreak) {
> +      ir::Register reg = registerIter->second;
> +      nextCand = findRegisterInSpillQueue(candidate, reg);
> +    }
> +    // if shouldBreak is false, means we need go on
> +    return shouldBreak;
> +  }
> +
>    INLINE bool GenRegAllocator::Opaque::spillAtInterval(GenRegInterval
> interval,
>                                                         int size,
>                                                         uint32_t alignment) {
>      if (reservedReg == 0)
>        return false;
> -    auto it = spillCandidate.begin();
> -    // If there is no spill candidate or current register is spillable and current
> register's
> -    // endpoint is after all the spillCandidate register's endpoint we return
> false. The
> -    // caller will spill current register.
> -    // At simd16 mode, we will always try to spill here rather than return to
> the caller.
> -    // The reason is that the caller may have a vector to allocate, and some
> element may be
> -    // temporary registers which could not be spilled.
> -    if (it == spillCandidate.end()
> -        || (ctx.getSimdWidth() == 8 && (it->getMaxID() <= interval.maxID
> -            && alignment == ctx.getSimdWidth()/8 * GEN_REG_SIZE)))
> +
> +    if (spillCandidate.empty())
>        return false;
> 
> -    ir::Register reg = it->getReg();
> -    set<ir::Register> spillSet;
> -    int32_t savedSize = size;
> -    while(size > 0) {
> -      auto vectorIt = vectorMap.find(reg);
> -      bool isVector = vectorIt != vectorMap.end();
> -      bool needRestart = false;
> -      ir::RegisterFamily family = ctx.sel->getRegisterFamily(reg);
> -      if (isVector
> -          && (vectorCanSpill(vectorIt->second.first))) {
> -        const SelectionVector *vector = vectorIt->second.first;
> -        for (uint32_t id = 0; id < vector->regNum; id++) {
> -          GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
> -                     == spilledRegs.end());
> -          spillSet.insert(vector->reg[id].reg());
> -          reg = vector->reg[id].reg();
> -          family = ctx.sel->getRegisterFamily(reg);
> -          size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE *
> ctx.getSimdWidth()/8
> -                                             : GEN_REG_SIZE * ctx.getSimdWidth()/8;
> +    // push spill candidate into a vector in ascending order of spill-cost.
> +    std::vector<SpillInterval> candQ;
> +    for (auto &p : spillCandidate) {
> +      float cost = getSpillCost(*p);
> +      candQ.push_back(SpillInterval(p->reg, cost));
> +    }
> +    std::sort(candQ.begin(), candQ.end(), spillinterval_cmp);
> +
> +    bool scalarAllocationFail = (vectorMap.find(interval.reg) ==
> + vectorMap.end());
> +
> +    int remainSize = size;
> +    float spillCostTotal = 0.0f;
> +    std::set<ir::Register> spillSet;
> +    // if we search the whole register, it will take lots of time.
> +    // so, I just add this max value to make the compile time not
> +    // grow too much, although this method may not find truely lowest
> +    // spill cost candidates.
> +    const int spillGroupMax = 8;
> +    int spillGroupID = 0;
> +
> +    std::vector<std::set<ir::Register>> spillGroups;
> +    std::vector<float> spillGroupCost;
> +
> +    auto searchBegin = candQ.begin();
> +    while (searchBegin != candQ.end() && spillGroupID < spillGroupMax) {
> +      auto contiguousIter = searchBegin;
> +
> +      while (contiguousIter != candQ.end()) {
> +        ir::Register reg = contiguousIter->reg;
> +
> +        auto vectorIt = vectorMap.find(reg);
> +        bool spillVector = (vectorIt != vectorMap.end());
> +        int32_t nextOffset = -1;
> +
> +        // is register allocation failed at scalar register?
> +        // if so, don't try to spill a vector register,
> +        // which is obviously of no benefit.
> +        if (scalarAllocationFail && spillVector) break;
> +
> +        if (spillVector) {
> +          if (vectorCanSpill(vectorIt->second.first)) {
> +            const SelectionVector *vector = vectorIt->second.first;
> +            for (uint32_t id = 0; id < vector->regNum; id++) {
> +              GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
> +                         == spilledRegs.end());
> +              spillSet.insert(vector->reg[id].reg());
> +              reg = vector->reg[id].reg();
> +              uint32_t s;
> +              getRegAttrib(reg, s);
> +              remainSize-= s;
> +              spillCostTotal += contiguousIter->cost;
> +            }
> +          } else {
> +            break;
> +          }
> +        } else {
> +          spillSet.insert(reg);
> +          uint32_t s;
> +          getRegAttrib(reg, s);
> +          spillCostTotal += contiguousIter->cost;
> +          remainSize -= s;
>          }
> -      } else if (!isVector) {
> -        spillSet.insert(reg);
> -        size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE *
> ctx.getSimdWidth()/8
> -                                           : GEN_REG_SIZE * ctx.getSimdWidth()/8;
> -      } else
> -        needRestart = true; // is a vector which could not be spilled.
> -
> -      if (size <= 0)
> -        break;
> -      if (!needRestart) {
> +        if (remainSize <= 0)
> +          break;
> +
>          uint32_t offset = RA.find(reg)->second;
> -        uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2 *
> GEN_REG_SIZE * ctx.getSimdWidth() / 8)
> -                                                           : (offset + GEN_REG_SIZE * ctx.getSimdWidth()
> / 8);
> -        auto nextRegIt = offsetReg.find(nextOffset);
> -        if (nextRegIt != offsetReg.end())
> -          reg = nextRegIt->second;
> -        else
> -          needRestart = true;
> +        uint32_t s; getRegAttrib(reg, s);
> +        nextOffset = offset + s;
> +
> +        SpillIntervalIter nextValid = candQ.end();
> +
> +        bool shouldBreak = findNextSpillCandidate(candQ, remainSize,
> nextOffset,
> +                                                  nextValid);
> +        contiguousIter = nextValid;
> +        if (shouldBreak)
> +          break;
>        }
> 
> -      if (needRestart) {
> -#if 0
> -        // FIXME, we should enable this code block in the future.
> -        // If the spill set is not zero and we need a restart, we can
> -        // simply return to try to allocate the registers at first.
> -        // As some vectors which have expired elements may be marked as
> -        // unspillable vector.
> -        if (spillSet.size() > 0)
> +      if (remainSize <= 0) {
> +        if (scalarAllocationFail) {
> +          // Done
>            break;
> -#endif
> -        it++;
> -        // next register is not in spill candidate.
> -        // let's move to next candidate and start over.
> -        if (it == spillCandidate.end())
> -          return false;
> -        reg = it->getReg();
> -        size = savedSize;
> -        spillSet.clear();
> +        } else {
> +          // Add as one spillGroup
> +          spillGroups.push_back(spillSet);
> +          spillGroupCost.push_back(spillCostTotal);
> +          ++spillGroupID;
> +        }
>        }
> +
> +      ++searchBegin;
> +      // restore states
> +      remainSize = size;
> +      spillCostTotal = 0.0f;
> +      spillSet.clear();
> +    }
> +    // failed to spill
> +    if (scalarAllocationFail && remainSize > 0) return false;
> +    if (!scalarAllocationFail && spillGroups.size() == 0) return false;
> +
> +    if (!scalarAllocationFail) {
> +      // push min spillcost group into spillSet
> +      int minIndex = std::distance(spillGroupCost.begin(),
> +                                   std::min_element(spillGroupCost.begin(),
> +                                                    spillGroupCost.end()));
> +      spillSet.swap(spillGroups[minIndex]);
>      }
> 
> -    for(auto spillreg : spillSet)
> +    for(auto spillreg : spillSet) {
>        spillReg(spillreg, true);
> +    }
>      return true;
>    }
> 
> @@ -1064,6 +1147,37 @@ namespace gbe
>      return grfOffset;
>    }
> 
> +  int UseCountApproximate(int loopDepth) {
> +    int ret = 1;
> +    for (int i = 0; i < loopDepth; i++) {
> +      ret = ret * 10;
> +    }
> +    return ret;
> +  }
> +
> +  void GenRegAllocator::Opaque::calculateSpillCost(Selection &selection) {
> +    int BlockIndex = 0;
> +    for (auto &block : *selection.blockList) {
> +      int LoopDepth = ctx.fn.getLoopDepth(ir::LabelIndex(BlockIndex));
> +      for (auto &insn : block.insnList) {
> +        const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
> +        for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> +          const GenRegister &selReg = insn.src(srcID);
> +          const ir::Register reg = selReg.reg();
> +          if (selReg.file == GEN_GENERAL_REGISTER_FILE)
> +            this->intervals[reg].accessCount +=
> UseCountApproximate(LoopDepth);
> +        }
> +        for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
> +          const GenRegister &selReg = insn.dst(dstID);
> +          const ir::Register reg = selReg.reg();
> +          if (selReg.file == GEN_GENERAL_REGISTER_FILE)
> +            this->intervals[reg].accessCount +=
> UseCountApproximate(LoopDepth);
> +        }
> +      }
> +      BlockIndex++;
> +    }
> +  }
> +
>    INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) {
>      using namespace ir;
>      const Function::PushMap &pushMap = ctx.fn.getPushMap(); @@ -1273,6
> +1387,7 @@ do { \
> 
>      // First we try to put all booleans registers into flags
>      this->allocateFlags(selection);
> +    this->calculateSpillCost(selection);
> 
>      // Sort both intervals in starting point and ending point increasing orders
>      const uint32_t regNum = ctx.sel->getRegNum(); @@ -1321,7 +1436,7 @@
> do { \
>               << "  " << setw(-3) << regSize  << "B\t"
>               << "[  " << setw(8) << this->intervals[(uint)vReg].minID
>               << " -> " << setw(8) << this->intervals[(uint)vReg].maxID
> -             << "]" << endl;
> +             << "]" << setw(8) << "use count: " <<
> + this->intervals[(uint)vReg].accessCount << endl;
>      }
>      if (!spilledRegs.empty())
>        cout << "## spilled registers: " << spilledRegs.size() << endl; @@ -1336,7
> +1451,7 @@ do { \
>             <<  "  " << setw(-3) << regSize << "B\t"
>             << "[  " << setw(8) << this->intervals[(uint)vReg].minID
>             << " -> " << setw(8) << this->intervals[(uint)vReg].maxID
> -           << "]" << endl;
> +           << "]" << setw(8) << "use count: " <<
> + this->intervals[(uint)vReg].accessCount << endl;
>      }
>      cout << endl;
>    }
> diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index
> fddafc7..2fe080a 100644
> --- a/backend/src/ir/function.cpp
> +++ b/backend/src/ir/function.cpp
> @@ -62,8 +62,38 @@ namespace ir {
>      return unit.getPointerFamily();
>    }
> 
> -  void Function::addLoop(LabelIndex preheader, const vector<LabelIndex>
> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
> -    loops.push_back(GBE_NEW(Loop, preheader, bbs, exits));
> +  void Function::addLoop(LabelIndex preheader,
> +                        int parent,
> +                        const vector<LabelIndex> &bbs,
> +                        const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
> +    loops.push_back(GBE_NEW(Loop, preheader, parent, bbs, exits));  }
> +
> +  int Function::getLoopDepth(LabelIndex Block) const{
> +    if (loops.size() == 0) return 0;
> +
> +    int LoopIndex = -1;
> +    int LoopDepth = 0;
> +    // get innermost loop
> +    for (int Idx = loops.size()-1; Idx >= 0; Idx--) {
> +      Loop *lp = loops[Idx];
> +      vector<LabelIndex> &Blocks = lp->bbs;
> +      bool Found = (std::find(Blocks.begin(), Blocks.end(), Block) !=
> Blocks.end());
> +      if (Found) {
> +        LoopIndex = Idx;
> +        break;
> +      }
> +    }
> +
> +    if (LoopIndex != -1) {
> +      int LoopId = LoopIndex;
> +      do {
> +        LoopId = loops[LoopId]->parent;
> +        LoopDepth++;
> +      } while(LoopId != -1);
> +    }
> +
> +    return LoopDepth;
>    }
> 
>    void Function::checkEmptyLabels(void) { diff --git
> a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index
> 6a90767..ae0a702 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -273,9 +273,14 @@ namespace ir {
>    struct Loop : public NonCopyable
>    {
>    public:
> -    Loop(LabelIndex pre, const vector<LabelIndex> &in, const
> vector<std::pair<LabelIndex, LabelIndex>> &exit) :
> -    preheader(pre), bbs(in), exits(exit) {}
> +    Loop(LabelIndex pre,
> +         int paren,
> +         const vector<LabelIndex> &in,
> +         const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
> +         preheader(pre), parent(paren), bbs(in), exits(exit) {}
> +
>      LabelIndex preheader;
> +    int parent;
>      vector<LabelIndex> bbs;
>      vector<std::pair<LabelIndex, LabelIndex>> exits;
>      GBE_STRUCT(Loop);
> @@ -523,8 +528,12 @@ namespace ir {
>      /*! Push stack size. */
>      INLINE void pushStackSize(uint32_t step) { this->stackSize += step; }
>      /*! add the loop info for later liveness analysis */
> -    void addLoop(LabelIndex preheader, const vector<LabelIndex> &bbs,
> const vector<std::pair<LabelIndex, LabelIndex>> &exits);
> +    void addLoop(LabelIndex preheader,
> +                 int parent,
> +                 const vector<LabelIndex> &bbs,
> +                 const vector<std::pair<LabelIndex, LabelIndex>>
> + &exits);
>      INLINE const vector<Loop * > &getLoops() { return loops; }
> +    int getLoopDepth(LabelIndex Block) const;
>      vector<BasicBlock *> &getBlocks() { return blocks; }
>      /*! Get surface starting address register from bti */
>      Register getSurfaceBaseReg(uint8_t bti) const; diff --git
> a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 8a75124..5fe0424 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2765,7 +2765,7 @@ namespace gbe
>          GBE_ASSERT(labelMap.find(b.second) != labelMap.end());
>          loopExits.push_back(std::make_pair(labelMap[b.first],
> labelMap[b.second]));
>        }
> -      fn.addLoop(preheaderBB, loopBBs, loopExits);
> +      fn.addLoop(preheaderBB, loop.second, loopBBs, loopExits);
>      }
>    }
> 
> --
> 2.4.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list