[Beignet] [PATCH 6/7] Backend: Add hole reuse in reg alloction

Xiuli Pan xiuli.pan at intel.com
Mon Nov 14 08:20:07 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

We first find regs that have pool in simple linear scale, and save them
in HoleRegPool, when allocte regs we first try to search fit candidate
in the pool and choose the most fit one to reuse.

V2: Refine hole reuse only in one block.
V3: Refine data structure with less variable, add OCL_REUSE_HOLE_REG to
control the optimization.
V4: Spilt the patch into instruction ID part and hole reuse, refine the
blockID of the reg.
DEBUG version: You can export DUMPHOLE for easy review.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen_reg_allocation.cpp | 112 +++++++++++++++++++++++++----
 backend/src/backend/gen_reg_allocation.hpp |  11 +++
 2 files changed, 111 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index ebca368..1e9ff49 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -50,12 +50,13 @@ namespace gbe
   struct GenRegInterval {
     INLINE GenRegInterval(ir::Register reg) :
       reg(reg), minID(INT_MAX), maxID(-INT_MAX), accessCount(0),
-      conflictReg(0), b3OpAlign(0) {}
+      blockID(-1), conflictReg(0), b3OpAlign(0) , isreused(0) {}
     ir::Register reg;     //!< (virtual) register of the interval
     int32_t minID, maxID; //!< Starting and ending points
     int32_t accessCount;
+    int32_t blockID; //!< blockID for in-block regs that can reuse hole
     ir::Register conflictReg; // < has banck conflict with this register
-    bool b3OpAlign;
+    bool b3OpAlign, isreused;
   };
 
   struct SpillInterval {
@@ -123,7 +124,7 @@ namespace gbe
     /*! Allocate the vectors detected in the instruction selection pass */
     void allocateVector(Selection &selection);
     /*! Allocate the given interval. Return true if success */
-    bool createGenReg(const Selection &selection, const GenRegInterval &interval);
+    bool createGenReg(const Selection &selection, GenRegInterval &interval);
     /*! Indicate if the registers are already allocated in vectors */
     bool isAllocated(const SelectionVector *vector) const;
     /*! Reallocate registers if needed to make the registers in the vector
@@ -163,6 +164,8 @@ namespace gbe
     uint32_t reservedReg;
     /*! Current vector to expire */
     uint32_t expiringID;
+    /*! Hole regs that can be reused */
+    map<uint32_t, vector<HoleRegTag>> HoleRegPool;
     INLINE void insertNewReg(const Selection &selection, ir::Register reg, uint32_t grfOffset, bool isVector = false);
     INLINE bool expireReg(ir::Register reg);
     INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t alignment);
@@ -277,14 +280,65 @@ namespace gbe
     }
   }
 
-  bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, const GenRegInterval &interval) {
+  INLINE float IDFitess(int a, int b) {
+    if (a >= b) return 1.0f/(a - b + 1);
+    else return 2.0f;
+  }
+
+  INLINE float getPhiRegFitness(const GenRegInterval &interval, HoleRegTag &holeRegTag) {
+    int regstID = interval.minID;
+    int regendID = interval.maxID;
+    int holeregstID = holeRegTag.startID;
+    int holeregendID = holeRegTag.endID;
+    return IDFitess(regstID, holeregstID) + IDFitess(holeregendID, regendID);
+  }
+
+  BVAR(OCL_REUSE_HOLE_REG, 1);
+  bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, GenRegInterval &interval) {
     using namespace ir;
     const ir::Register reg = interval.reg;
-    if (RA.contains(reg) == true)
-      return true; // already allocated
     uint32_t regSize;
     ir::RegisterFamily family;
     getRegAttrib(reg, regSize, &family);
+    // Check if the reg is live only in one block, thus can use the hole
+    int blockID = interval.blockID;
+    if (blockID >= 0 && OCL_REUSE_HOLE_REG) {
+      uint32_t useID = interval.maxID;
+      auto holepool = this->HoleRegPool.find(blockID);
+      // Use block ID as index to get candidate hole reg to reuse
+      if (holepool != this->HoleRegPool.end()) {
+        auto &holepoolvec = holepool->second;
+        HoleRegTag* holeregbest = NULL;
+        float lastfitness = 0;
+        for (auto itr = holepoolvec.begin() ; itr != holepoolvec.end(); ++itr) {
+          if (regSize != itr->regSize)
+            continue;
+          float fitness = getPhiRegFitness(interval, *itr);
+          // reg out of range of holepool reg
+          if (fitness > 2.0f) continue;
+          if (fitness > lastfitness) {
+            lastfitness = fitness;
+            holeregbest = &*itr;
+          }
+          // reg has one prefect fit use this
+          if (fitness > 1 ) break;
+        }
+        // Reuse the hole and update the holeregpool
+        if (holeregbest) {
+          auto holereg = holeregbest->reg;
+          holeregbest->startID = useID + 1;
+          if (getenv("DUMPHOLE"))
+            printf("reg %d in block %d [%d->%d] has %zu candidate best fitness is %d with fitness %f\n", (int)reg, blockID, interval.minID, useID, holepoolvec.size(), (int)holereg, lastfitness);
+          if (RA.contains(holereg)) {
+            uint32_t grfOffset = RA.find(holereg)->second;
+            RA.insert(std::make_pair(reg, grfOffset));
+            interval.isreused = true;
+          }
+        }
+      }
+    }
+    if (RA.contains(reg) == true)
+      return true; // already allocated
     uint32_t grfOffset = allocateReg(interval, regSize, regSize);
     if (grfOffset == 0) {
       return false;
@@ -731,7 +785,7 @@ namespace gbe
     ctx.errCode = REGISTER_ALLOCATION_FAIL;
     const uint32_t regNum = ctx.sel->getRegNum();
     for (uint32_t startID = 0; startID < regNum; ++startID) {
-      const GenRegInterval &interval = *this->starting[startID];
+      GenRegInterval &interval = *this->starting[startID];
       const ir::Register reg = interval.reg;
 
       if (interval.maxID == -INT_MAX)
@@ -849,6 +903,8 @@ namespace gbe
 
   INLINE bool GenRegAllocator::Opaque::expireReg(ir::Register reg)
   {
+    if (this->intervals[reg].isreused)
+      return true;
     auto it = RA.find(reg);
     if (flagBooleans.contains(reg))
       return false;
@@ -1227,10 +1283,12 @@ namespace gbe
     }
 
     // Compute the intervals
-    int32_t insnID = 0;
+    int insnID = 0;
+    int blockID = 0;
     for (auto &block : *selection.blockList) {
+      map<int32_t, BlockRegInterval> blockIntervals;
       int32_t lastID = insnID;
-      int32_t firstID = insnID;
+      int32_t firstID = insnID ? insnID + 2 : insnID;
       // Update the intervals of each used register. Note that we do not
       // register allocate R0, so we skip all sub-registers in r0
       RegIntervalMap *boolsMap = new RegIntervalMap;
@@ -1267,6 +1325,10 @@ namespace gbe
             insnsrcID -= 1;
           this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnsrcID);
           this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnsrcID);
+          BlockRegInterval &blockRegInterval = blockIntervals[reg];
+          blockRegInterval.useID = insnsrcID;
+          if (this->intervals[reg].blockID > 0 && this->intervals[reg].blockID != blockID)
+            this->intervals[reg].blockID = -4;
         }
         for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
           const GenRegister &selReg = insn.dst(dstID);
@@ -1282,6 +1344,27 @@ namespace gbe
           }
           this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
           this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+          if ( this->intervals[reg].blockID == -1)
+            this->intervals[reg].blockID = blockID;
+          // Only find hole reg that not in ocl registers
+          if ( blockIntervals.find(reg) != blockIntervals.end() && reg >= ir::ocl::regNum) {
+            BlockRegInterval &blockRegInterval = blockIntervals[reg];
+            int useID = blockRegInterval.useID;
+            // Check if the reg will leave a hole and add it to HoleRegPool
+            if (useID >= 0 && useID < insnID - 1 && reg >= ir::ocl::regNum) {
+              if (getenv("DUMPHOLE"))
+                printf("reg %d has hole in block %d [%d => %d]\n",(int)reg, blockID, useID, insnID);
+              uint32_t regSize;
+              ir::RegisterFamily family;
+              getRegAttrib(reg, regSize, &family);
+              HoleRegTag holeregtag;
+              holeregtag.reg = reg;
+              holeregtag.startID = useID + 1;
+              holeregtag.endID = insnID - 1;
+              holeregtag.regSize = regSize;
+              this->HoleRegPool[blockID].push_back(holeregtag);
+            }
+          }
         }
 
         // OK, a flag is used as a predicate or a conditional modifier
@@ -1327,18 +1410,23 @@ namespace gbe
       // All registers alive at the begining of the block must update their intervals.
       const ir::BasicBlock *bb = block.bb;
       bbLastInsnIDMap.insert(std::make_pair(bb, lastID));
-      for (auto reg : ctx.getLiveIn(bb))
+      for (auto reg : ctx.getLiveIn(bb)) {
         this->intervals[reg].minID = std::min(this->intervals[reg].minID, firstID);
-
+        this->intervals[reg].blockID = -2;
+      }
       // All registers alive at the end of the block must have their intervals
       // updated as well
-      for (auto reg : ctx.getLiveOut(bb))
+      for (auto reg : ctx.getLiveOut(bb)) {
         this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID);
+        this->intervals[reg].blockID = -3;
+      }
 
       if (boolsMap->size() > 0)
         boolIntervalsMap.insert(std::make_pair(&block, boolsMap));
       else
         delete boolsMap;
+
+      blockID++;
     }
 
     for (auto &it : this->intervals) {
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp
index 8d5e797..3c18539 100644
--- a/backend/src/backend/gen_reg_allocation.hpp
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -40,6 +40,17 @@ namespace gbe
     int32_t addr;
   } SpillRegTag;
 
+  typedef struct HoleRegTag {
+    uint32_t startID;
+    uint32_t endID;
+    uint32_t regSize;
+    ir::Register reg;
+  }HoleRegTag;
+
+  typedef struct BlockRegInterval {
+    int32_t blockID, defID, useID;
+  }BlockRegInterval;
+
   typedef map<ir::Register, SpillRegTag> SpilledRegs;
 
   /*! Register allocate (i.e. virtual to physical register mapping) */
-- 
2.7.4



More information about the Beignet mailing list