[Beignet] [PATCH] GBE: fix one potential register spilling bug.

Zhigang Gong zhigang.gong at intel.com
Wed May 27 23:35:17 PDT 2015


There are some special cases introduced in gen_insn_selection stage
which will do more than one writing on the same virtual registers
with different stride or simd width with different suboffset.

For this type of registers, the current register spilling implementation
is broken. To avoid unecessary complexity, this patch is to gather the
partially writting registers and don't put these register to
spill candidate set.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 45 +++++++++++++++++++-----------
 backend/src/backend/gen_insn_selection.hpp |  2 ++
 backend/src/backend/gen_reg_allocation.cpp | 20 +++++++------
 backend/src/backend/gen_register.hpp       |  1 +
 4 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index a68d0ce..a54c219 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -366,6 +366,9 @@ namespace gbe
     void setLdMsgOrder(uint32_t type)  { ldMsgOrder = type; }
     uint32_t getLdMsgOrder()  const { return ldMsgOrder; }
     /*! indicate whether a register is a scalar/uniform register. */
+    INLINE bool isPartialWrite(const ir::Register &reg) const {
+      return partialWriteRegs.find(reg.value()) != partialWriteRegs.end();
+    }
     INLINE bool isScalarReg(const ir::Register &reg) const {
       const ir::RegisterData &regData = getRegisterData(reg);
       return regData.isUniform();
@@ -386,6 +389,13 @@ namespace gbe
     INLINE GenRegister unpacked_ub(const ir::Register &reg) const {
       return GenRegister::unpacked_ub(reg, isScalarReg(reg));
     }
+
+    INLINE GenRegister getOffsetReg(GenRegister reg, int nr, int subnr, bool isDst = true) {
+      if (isDst)
+        partialWriteRegs.insert(reg.value.reg);
+      return GenRegister::offset(reg, nr, subnr);
+    }
+
     /*! Implement public class */
     INLINE uint32_t getRegNum(void) const { return file.regNum(); }
     /*! Implements public interface */
@@ -479,6 +489,11 @@ namespace gbe
     /*! To make function prototypes more readable */
     typedef const GenRegister &Reg;
 
+    /*! Check for destination register. Major purpose is to find
+        out partially updated dst registers. These registers will
+        be unspillable. */
+    set<uint32_t> partialWriteRegs;
+
 #define ALU1(OP) \
   INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
 #define ALU1WithTemp(OP) \
@@ -2078,6 +2093,10 @@ namespace gbe
     return this->opaque->isScalarReg(reg);
   }
 
+  bool Selection::isPartialWrite(const ir::Register &reg) const {
+    return this->opaque->isPartialWrite(reg);
+  }
+
   SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) {
     return this->opaque->create(opcode, dstNum, srcNum);
   }
@@ -2729,8 +2748,7 @@ namespace gbe
           src0 = GenRegister::retype(sel.unpacked_uw(src0.reg()), GEN_TYPE_B);
           src1 = GenRegister::retype(sel.unpacked_uw(src1.reg()), GEN_TYPE_B);
           sel.MOV(dst, src1);
-          dst.subphysical = 1;
-          dst = dst.offset(dst, 0, typeSize(GEN_TYPE_B));
+          dst = sel.getOffsetReg(dst, 0, typeSize(GEN_TYPE_B));
           sel.MOV(dst, src0);
           break;
         }
@@ -2740,8 +2758,7 @@ namespace gbe
           src0 = sel.unpacked_uw(src0.reg());
           src1 = sel.unpacked_uw(src1.reg());
           sel.MOV(dst, src1);
-          dst.subphysical = 1;
-          dst = dst.offset(dst, 0, typeSize(GEN_TYPE_W));
+          dst = sel.getOffsetReg(dst, 0, typeSize(GEN_TYPE_W));
           sel.MOV(dst, src0);
           break;
         }
@@ -3911,17 +3928,15 @@ namespace gbe
         }
 
         if((!isInt64 || (sel.hasLongType() && multiple != 8)) && index % multiple) {
-          wideReg = GenRegister::offset(wideReg, 0, (index % multiple) * typeSize(wideReg.type));
-          wideReg.subphysical = 1;
+          wideReg = sel.getOffsetReg(wideReg, 0, (index % multiple) * typeSize(wideReg.type));
         }
         if(isInt64 && (multiple == 8 || !sel.hasLongType())) {
-          wideReg.subphysical = 1;
           // Offset to next half
           if((i % multiple) >= multiple/2)
-            wideReg = GenRegister::offset(wideReg, 0, sel.isScalarReg(wideReg.reg()) ? 4 : simdWidth*4);
+            wideReg = sel.getOffsetReg(wideReg, 0, sel.isScalarReg(wideReg.reg()) ? 4 : simdWidth*4);
           // Offset to desired narrow element in wideReg
           if(index % (multiple/2))
-            wideReg = GenRegister::offset(wideReg, 0, (index % (multiple/2)) * typeSize(wideReg.type));
+            wideReg = sel.getOffsetReg(wideReg, 0, (index % (multiple/2)) * typeSize(wideReg.type));
         }
 
         GenRegister xdst = narrowDst ? narrowReg : wideReg;
@@ -3932,13 +3947,11 @@ namespace gbe
         } else if(srcType == TYPE_DOUBLE || dstType == TYPE_DOUBLE) {
           sel.push();
             sel.curr.execWidth = 8;
-            xdst.subphysical = 1;
-            xsrc.subphysical = 1;
             for(int i = 0; i < simdWidth/4; i ++) {
               sel.curr.chooseNib(i);
               sel.MOV(xdst, xsrc);
-              xdst = GenRegister::offset(xdst, 0, 4 * typeSize(getGenType(dstType)));
-              xsrc = GenRegister::offset(xsrc, 0, 4 * typeSize(getGenType(srcType)));
+              xdst = sel.getOffsetReg(xdst, 0, 4 * typeSize(getGenType(dstType)));
+              xsrc = sel.getOffsetReg(xsrc, 0, 4 * typeSize(getGenType(srcType)));
             }
           sel.pop();
         } else
@@ -4689,8 +4702,7 @@ namespace gbe
       sel.MOV(msgs[0], GenRegister::immud(0));
       sel.curr.execWidth = 1;
 
-      GenRegister channelEn = GenRegister::offset(msgs[0], 0, 7*4);
-      channelEn.subphysical = 1;
+      GenRegister channelEn = sel.getOffsetReg(msgs[0], 0, 7*4);
       // Enable all channels.
       sel.MOV(channelEn, GenRegister::immud(0xffff));
       sel.curr.execWidth = 8;
@@ -4806,8 +4818,7 @@ namespace gbe
       GenRegister dst, src;
       dst = sel.selReg(insn.getDst(0), ir::TYPE_U32);
       src = GenRegister::ud1grf(insn.getSrc(0));
-      src.subphysical = 1;
-      src = GenRegister::offset(src, 0, insn.getOffset()*4);
+      src = sel.getOffsetReg(src, 0, insn.getOffset()*4);
 
       sel.push();
         sel.curr.noMask = 1;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 8c6caac..927dbd5 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -254,6 +254,8 @@ namespace gbe
     bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
     /*! Indicate if a register is scalar or not */
     bool isScalarReg(const ir::Register &reg) const;
+    /*! is this register a partially written register.*/
+    bool isPartialWrite(const ir::Register &reg) const;
     /*! Create a new selection instruction */
     SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
     /*! List of emitted blocks */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index b104df4..a44cb2f 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -141,7 +141,7 @@ namespace gbe
     /*! Allocate the vectors detected in the instruction selection pass */
     void allocateVector(Selection &selection);
     /*! Allocate the given interval. Return true if success */
-    bool createGenReg(const GenRegInterval &interval);
+    bool createGenReg(const Selection &selection, const GenRegInterval &interval);
     /*! Indicate if the registers are already allocated in vectors */
     bool isAllocated(const SelectionVector *vector) const;
     /*! Reallocate registers if needed to make the registers in the vector
@@ -180,7 +180,7 @@ namespace gbe
     uint32_t reservedReg;
     /*! Current vector to expire */
     uint32_t expiringID;
-    INLINE void insertNewReg(ir::Register reg, uint32_t grfOffset, bool isVector = false);
+    INLINE void insertNewReg(const Selection &selection, ir::Register reg, uint32_t grfOffset, bool isVector = false);
     INLINE bool expireReg(ir::Register reg);
     INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t alignment);
     INLINE uint32_t allocateReg(GenRegInterval interval, uint32_t size, uint32_t alignment);
@@ -250,7 +250,7 @@ namespace gbe
     }
   }
 
-  bool GenRegAllocator::Opaque::createGenReg(const GenRegInterval &interval) {
+  bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, const GenRegInterval &interval) {
     using namespace ir;
     const ir::Register reg = interval.reg;
     if (RA.contains(reg) == true)
@@ -262,7 +262,7 @@ namespace gbe
     if (grfOffset == 0) {
       return false;
     }
-    insertNewReg(reg, grfOffset);
+    insertNewReg(selection, reg, grfOffset);
     return true;
   }
 
@@ -713,13 +713,13 @@ namespace gbe
           getRegAttrib(reg, alignment, NULL);
           // check all sub registers aligned correctly
           GBE_ASSERT((grfOffset + subOffset) % alignment == 0 || (grfOffset + subOffset) % GEN_REG_SIZE == 0);
-          insertNewReg(reg, grfOffset + subOffset, true);
+          insertNewReg(selection, reg, grfOffset + subOffset, true);
           ctx.splitBlock(grfOffset, subOffset);  //splitBlock will not split if regID == 0
           subOffset += alignment;
         }
       }
       // Case 2: This is a regular scalar register, allocate it alone
-      else if (this->createGenReg(interval) == false) {
+      else if (this->createGenReg(selection, interval) == false) {
         if (!spillReg(interval))
           return false;
       }
@@ -803,7 +803,10 @@ namespace gbe
 
   // insert a new register with allocated offset,
   // put it to the RA map and the spill map if it could be spilled.
-  INLINE void GenRegAllocator::Opaque::insertNewReg(ir::Register reg, uint32_t grfOffset, bool isVector)
+  INLINE void GenRegAllocator::Opaque::insertNewReg(const Selection &selection,
+                                                    ir::Register reg,
+                                                    uint32_t grfOffset,
+                                                    bool isVector)
   {
      RA.insert(std::make_pair(reg, grfOffset));
 
@@ -819,7 +822,8 @@ namespace gbe
          return;
 
        if ((regSize == ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_DWORD)
-          || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_QWORD)) {
+          || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_QWORD)
+          && !selection.isPartialWrite(reg)) {
          GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end());
          offsetReg.insert(std::make_pair(grfOffset, reg));
          spillCandidate.insert(intervals[reg]);
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 794498e..ecb7ff9 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -269,6 +269,7 @@ namespace gbe
       GenRegister r = reg;
       r.nr += nr;
       r.subnr += subnr;
+      r.subphysical = 1;
       return r;
     }
 
-- 
1.9.1



More information about the Beignet mailing list