[Beignet] [PATCH V3 3/3] Backend: Add hole reuse in reg alloction

Xiuli Pan xiuli.pan at intel.com
Tue Nov 1 07:17:21 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

We first find regs that have pool in simple linear scale, and save them
in HoleRegPool, when allocte regs we first try to search fit candidate
in the pool and choose the most fit one to reuse.

V2: Refine hole reuse only in one block.
V3: Refine data structure with less variable, add OCL_REUSE_HOLE_REG to
control the optimization.
DEBUG version: You can export DUMPHOLE for easy review.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |  31 +++++++
 backend/src/backend/gen_insn_selection.hpp |   2 +
 backend/src/backend/gen_reg_allocation.cpp | 134 ++++++++++++++++++++++++++---
 backend/src/backend/gen_reg_allocation.hpp |  19 ++++
 4 files changed, 173 insertions(+), 13 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 97ee17a..ba78858 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -225,6 +225,37 @@ namespace gbe
     return this->opcode == SEL_OP_LABEL;
   }
 
+  bool SelectionInstruction::isNative(void) const {
+    return this->opcode == SEL_OP_NOT         || /* ALU1 */
+           this->opcode == SEL_OP_LZD         ||
+           this->opcode == SEL_OP_RNDZ        ||
+           this->opcode == SEL_OP_RNDE        ||
+           this->opcode == SEL_OP_RNDD        ||
+           this->opcode == SEL_OP_RNDU        ||
+           this->opcode == SEL_OP_FRC         ||
+           this->opcode == SEL_OP_F16TO32     ||
+           this->opcode == SEL_OP_F32TO16     ||
+           this->opcode == SEL_OP_CBIT        ||
+           this->opcode == SEL_OP_SEL         || /* ALU2 */
+           this->opcode == SEL_OP_AND         ||
+           this->opcode == SEL_OP_OR          ||
+           this->opcode == SEL_OP_XOR         ||
+           this->opcode == SEL_OP_SHR         ||
+           this->opcode == SEL_OP_SHL         ||
+           this->opcode == SEL_OP_RSR         ||
+           this->opcode == SEL_OP_RSL         ||
+           this->opcode == SEL_OP_ASR         ||
+           this->opcode == SEL_OP_SEL         ||
+           this->opcode == SEL_OP_ADD         ||
+           this->opcode == SEL_OP_MUL         ||
+           this->opcode == SEL_OP_FBH         ||
+           this->opcode == SEL_OP_FBL         ||
+           this->opcode == SEL_OP_MACH        ||
+           this->opcode == SEL_OP_MATH        ||
+           this->opcode == SEL_OP_LRP         || /* ALU3 */
+           this->opcode == SEL_OP_MAD;
+  }
+
   ///////////////////////////////////////////////////////////////////////////
   // SelectionVector
   ///////////////////////////////////////////////////////////////////////////
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 814b449..6c8e472 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -82,6 +82,8 @@ namespace gbe
     bool isBranch(void) const;
     /*! Is it a label instruction (i.e. change the implicit mask) */
     bool isLabel(void) const;
+    /*! Is it a simple navtive label instruction (i.e. will be one simple ISA) */
+    bool isNative(void) const;
     /*! Get the destination register */
     GenRegister &dst(uint32_t dstID) { return regs[dstID]; }
     /*! Get the source register */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 4451efb..b871ea4 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -50,12 +50,12 @@ namespace gbe
   struct GenRegInterval {
     INLINE GenRegInterval(ir::Register reg) :
       reg(reg), minID(INT_MAX), maxID(-INT_MAX), accessCount(0),
-      conflictReg(0), b3OpAlign(0) {}
+      conflictReg(0), b3OpAlign(0) , isreused(0){}
     ir::Register reg;     //!< (virtual) register of the interval
     int32_t minID, maxID; //!< Starting and ending points
     int32_t accessCount;
     ir::Register conflictReg; // < has banck conflict with this register
-    bool b3OpAlign;
+    bool b3OpAlign, isreused;
   };
 
   struct SpillInterval {
@@ -123,7 +123,7 @@ namespace gbe
     /*! Allocate the vectors detected in the instruction selection pass */
     void allocateVector(Selection &selection);
     /*! Allocate the given interval. Return true if success */
-    bool createGenReg(const Selection &selection, const GenRegInterval &interval);
+    bool createGenReg(const Selection &selection, GenRegInterval &interval);
     /*! Indicate if the registers are already allocated in vectors */
     bool isAllocated(const SelectionVector *vector) const;
     /*! Reallocate registers if needed to make the registers in the vector
@@ -160,10 +160,14 @@ namespace gbe
     std::set<GenRegInterval*> spillCandidate;
     /*! BBs last instruction ID map */
     map<const ir::BasicBlock *, int32_t> bbLastInsnIDMap;
+    /*! Block Ranges instruction ID ranges */
+    BlockRanges blockRanges;
     /* reserved registers for register spill/reload */
     uint32_t reservedReg;
     /*! Current vector to expire */
     uint32_t expiringID;
+    /*! PHI regs that can be reused */
+    map<uint32_t, vector<HoleRegTag>> HoleRegPool;
     INLINE void insertNewReg(const Selection &selection, ir::Register reg, uint32_t grfOffset, bool isVector = false);
     INLINE bool expireReg(ir::Register reg);
     INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t alignment);
@@ -278,14 +282,83 @@ namespace gbe
     }
   }
 
-  bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, const GenRegInterval &interval) {
+  INLINE float IDFitess(int a, int b) {
+    if (a == b) return 1.0f;
+    else if (a > b) return 1.0f/(a - b -1);
+    else return 2.0f;
+  }
+
+  INLINE float getPhiRegFitness(const GenRegInterval &interval, HoleRegTag &holeRegTag) {
+    int regstID = interval.minID;
+    int regendID = interval.maxID;
+    int holeregstID = holeRegTag.startID;
+    int holeregendID = holeRegTag.endID;
+    return IDFitess(regstID, holeregstID) + IDFitess(holeregendID, regendID);
+  }
+
+  INLINE int getRegBlockID(const GenRegInterval &interval,const BlockRanges &blockRanges) {
+    int minID = interval.minID;
+    int maxID = interval.maxID;
+    int res = -1;
+    for(size_t i = 0; i < blockRanges.size(); ++i)
+    {
+      if (minID >= blockRanges[i].minID && maxID <= blockRanges[i].maxID)
+      {
+        res = i;
+        break;
+      }
+    }
+    return res;
+  }
+
+
+  BVAR(OCL_REUSE_HOLE_REG, 0);
+  bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, GenRegInterval &interval) {
     using namespace ir;
     const ir::Register reg = interval.reg;
-    if (RA.contains(reg) == true)
-      return true; // already allocated
     uint32_t regSize;
     ir::RegisterFamily family;
     getRegAttrib(reg, regSize, &family);
+    // Check if the reg is live only in one block, thus can use the hole
+    int blockID = getRegBlockID(interval, this->blockRanges);
+    if ( blockID >= 0 && OCL_REUSE_HOLE_REG) {
+      uint32_t useID = interval.maxID;
+      auto holepool = this->HoleRegPool.find(blockID);
+      // Use block ID as index to get candidate hole reg to reuse
+      if (holepool != this->HoleRegPool.end()) {
+        auto &holepoolvec = holepool->second;
+        HoleRegTag* holeregbest = NULL;
+        float lastfitness = 0;
+        for (auto itr = holepoolvec.begin() ; itr != holepoolvec.end(); ++itr) {
+          if (regSize != itr->regSize)
+            continue;
+          float fitness = getPhiRegFitness(interval, *itr);
+          // reg out of range of holepool reg
+          if (fitness > 2.0f) continue;
+          if (fitness > lastfitness) {
+            lastfitness = fitness;
+            holeregbest = &*itr;
+          }
+          // reg has one prefect fit use this
+          if (fitness > 1 ) break;
+        }
+        // Reuse the hole and update the holeregpool
+        if (holeregbest) {
+          auto holereg = holeregbest->reg;
+          holeregbest->startID = useID + 1;
+          if (getenv("DUMPHOLE"))
+            printf("%d %d(%d:%d->%d)has %zu candidate best fitness is %d with %f\n", (int)reg, family,blockID, interval.minID, useID, holepoolvec.size(), (int)holereg, lastfitness);
+          if (RA.contains(holereg)) {
+            uint32_t grfOffset = RA.find(holereg)->second;
+            RA.insert(std::make_pair(reg, grfOffset));
+            offsetReg.insert(std::make_pair(grfOffset, reg));
+            interval.isreused = true;
+          }
+        }
+      }
+    }
+    if (RA.contains(reg) == true)
+      return true; // already allocated
     uint32_t grfOffset = allocateReg(interval, regSize, regSize);
     if (grfOffset == 0) {
       return false;
@@ -712,7 +785,7 @@ namespace gbe
     ctx.errCode = REGISTER_ALLOCATION_FAIL;
     const uint32_t regNum = ctx.sel->getRegNum();
     for (uint32_t startID = 0; startID < regNum; ++startID) {
-      const GenRegInterval &interval = *this->starting[startID];
+      GenRegInterval &interval = *this->starting[startID];
       const ir::Register reg = interval.reg;
 
       if (interval.maxID == -INT_MAX)
@@ -830,6 +903,8 @@ namespace gbe
 
   INLINE bool GenRegAllocator::Opaque::expireReg(ir::Register reg)
   {
+    if (this->intervals[reg].isreused)
+      return true;
     auto it = RA.find(reg);
     if (flagBooleans.contains(reg))
       return false;
@@ -1208,10 +1283,12 @@ namespace gbe
     }
 
     // Compute the intervals
-    int32_t insnID = 0;
+    int insnID = 0;
+    int blockID = 0;
     for (auto &block : *selection.blockList) {
+      map<int32_t, BlockRegInterval> blockIntervals;
       int32_t lastID = insnID;
-      int32_t firstID = insnID;
+      int32_t firstID = insnID ? insnID + 2 : insnID;
       // Update the intervals of each used register. Note that we do not
       // register allocate R0, so we skip all sub-registers in r0
       RegIntervalMap *boolsMap = new RegIntervalMap;
@@ -1219,7 +1296,7 @@ namespace gbe
         flag0ReservedBlocks.insert(&block);
       for (auto &insn : block.insnList) {
         const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
-        insn.ID  = insnID;
+        insnID = (int)insn.ID;
         bool is3SrcOp = insn.opcode == SEL_OP_MAD;
         for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
           const GenRegister &selReg = insn.src(srcID);
@@ -1244,8 +1321,14 @@ namespace gbe
           if (this->intervals[reg].conflictReg == 0 ||
               this->intervals[reg].conflictReg > conflictReg)
           this->intervals[reg].conflictReg = conflictReg;
-          this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
-          this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+          int insnsrcID = insnID;
+          // If instruction is simple, src and dst can resed and they will have different IDs
+          if (insn.isNative())
+            insnsrcID -= 1;
+          this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnsrcID);
+          this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnsrcID);
+          BlockRegInterval &blockRegInterval = blockIntervals[reg];
+          blockRegInterval.useID = insnsrcID;
         }
         for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
           const GenRegister &selReg = insn.dst(dstID);
@@ -1261,6 +1344,25 @@ namespace gbe
           }
           this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
           this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
+          // Only find hole reg that not in ocl registers
+          if ( blockIntervals.find(reg) != blockIntervals.end() && reg >= ir::ocl::regNum) {
+            BlockRegInterval &blockRegInterval = blockIntervals[reg];
+            int useID = blockRegInterval.useID;
+            // Check if the reg will leave a hole and add it to HoleRegPool
+            if (useID >= 0 && useID < insnID - 1 && reg >= ir::ocl::regNum) {
+              if (getenv("DUMPHOLE"))
+                printf("reg %d is hole in %d: %d => %d)\n",(int)reg, blockID, useID, insnID);
+              uint32_t regSize;
+              ir::RegisterFamily family;
+              getRegAttrib(reg, regSize, &family);
+              HoleRegTag holeregtag;
+              holeregtag.reg = reg;
+              holeregtag.startID = useID + 1;
+              holeregtag.endID = insnID - 1;
+              holeregtag.regSize = regSize;
+              this->HoleRegPool[blockID].push_back(holeregtag);
+            }
+          }
         }
 
         // OK, a flag is used as a predicate or a conditional modifier
@@ -1303,9 +1405,9 @@ namespace gbe
                        (block.removeSimpleIfEndif && insn.state.flag == 0 && insn.state.subFlag == 0) ));
         }
         lastID = insnID;
-        insnID++;
       }
 
+      blockRanges.push_back(BlockRange(firstID, lastID));
       // All registers alive at the begining of the block must update their intervals.
       const ir::BasicBlock *bb = block.bb;
       bbLastInsnIDMap.insert(std::make_pair(bb, lastID));
@@ -1321,6 +1423,8 @@ namespace gbe
         boolIntervalsMap.insert(std::make_pair(&block, boolsMap));
       else
         delete boolsMap;
+
+      blockID++;
     }
 
     for (auto &it : this->intervals) {
@@ -1422,6 +1526,7 @@ do { \
   INLINE void GenRegAllocator::Opaque::outputAllocation(void) {
     using namespace std;
     cout << "## register allocation ##" << endl;
+    int allmaxID = 0;
     for(auto &i : RA) {
         ir::Register vReg = (ir::Register)i.first;
         ir::RegisterFamily family;
@@ -1437,6 +1542,9 @@ do { \
              << "[  " << setw(8) << this->intervals[(uint)vReg].minID
              << " -> " << setw(8) << this->intervals[(uint)vReg].maxID
              << "]" << setw(8) << "use count: " << this->intervals[(uint)vReg].accessCount << endl;
+        int thisMaxID = this->intervals[(uint)vReg].maxID;
+        if( thisMaxID > 0)
+          allmaxID = thisMaxID > allmaxID ? thisMaxID : allmaxID;
     }
     if (!spilledRegs.empty())
       cout << "## spilled registers: " << spilledRegs.size() << endl;
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp
index 8d5e797..2f2174d 100644
--- a/backend/src/backend/gen_reg_allocation.hpp
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -40,8 +40,27 @@ namespace gbe
     int32_t addr;
   } SpillRegTag;
 
+  typedef struct HoleRegTag {
+    uint32_t startID;
+    uint32_t endID;
+    uint32_t regSize;
+    ir::Register reg;
+  }HoleRegTag;
+
+  typedef struct BlockRegInterval {
+    int32_t blockID, defID, useID;
+  }BlockRegInterval;
+
+  typedef struct BlockRange {
+    BlockRange(int32_t minID, int32_t maxID) : minID(minID), maxID(maxID) {}
+    int32_t minID, maxID;
+  }BlockRange;
+
+
   typedef map<ir::Register, SpillRegTag> SpilledRegs;
 
+  typedef vector<BlockRange> BlockRanges;
+
   /*! Register allocate (i.e. virtual to physical register mapping) */
   class GenRegAllocator
   {
-- 
2.7.4



More information about the Beignet mailing list