[Beignet] [PATCH 4/5] GBE: fix post scheduling related bug for spill/unspill.

Zhigang Gong zhigang.gong at intel.com
Wed May 21 18:41:51 PDT 2014


spill/unspill instruction touch some registers directly which
are not in dst/src. This breaks the post scheduling. Simply
work around it by add all the reserved registers to the dst
array.

The scratch memory is not correctly indexed and the barrier is
not handled properly.

After this patch, the post scheduling will be enabled by default.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_scheduling.cpp | 16 +++++++++++++---
 backend/src/backend/gen_insn_selection.cpp  | 20 +++++++++++++++++---
 backend/src/backend/gen_reg_allocation.cpp  |  2 +-
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index eada722..5d74f00 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -138,6 +138,7 @@ namespace gbe
   enum GenMemory : uint8_t {
     GLOBAL_MEMORY = 0,
     LOCAL_MEMORY,
+    SCRATCH_MEMORY,
     MAX_MEM_SYSTEM
   };
 
@@ -348,7 +349,7 @@ namespace gbe
 
   uint32_t DependencyTracker::getIndex(uint32_t bti) const {
     const uint32_t memDelta = grfNum + MAX_FLAG_REGISTER + MAX_ACC_REGISTER;
-    return bti == 0xfe ? memDelta + LOCAL_MEMORY : memDelta + GLOBAL_MEMORY;
+    return bti == 0xfe ? memDelta + LOCAL_MEMORY : (bti == 0xff ? memDelta + SCRATCH_MEMORY : memDelta + GLOBAL_MEMORY);
   }
 
   void DependencyTracker::updateWrites(ScheduleDAGNode *node) {
@@ -383,6 +384,7 @@ namespace gbe
       this->nodes[index] = node;
     }
 
+    // Track writes in scratch memory
     if(insn.opcode == SEL_OP_SPILL_REG) {
       const uint32_t index = this->getIndex(0xff);
       this->nodes[index] = node;
@@ -548,6 +550,12 @@ namespace gbe
         tracker.addDependency(index, node, WRITE_AFTER_READ);
       }
 
+      // write-after-read in scratch memory
+      if (insn.opcode == SEL_OP_UNSPILL_REG) {
+        const uint32_t index = tracker.getIndex(0xff);
+        tracker.addDependency(index, node, WRITE_AFTER_READ);
+      }
+
       // Consider barriers and wait are reading memory (local and global)
       if (insn.opcode == SEL_OP_BARRIER ||
           insn.opcode == SEL_OP_FENCE ||
@@ -573,7 +581,9 @@ namespace gbe
     // Make labels and branches non-schedulable (i.e. they act as barriers)
     for (int32_t insnID = 0; insnID < insnNum; ++insnID) {
       ScheduleDAGNode *node = tracker.insnNodes[insnID];
-      if (node->insn.isBranch() || node->insn.isLabel() || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF)
+      if (node->insn.isBranch() || node->insn.isLabel()
+          || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF
+          || node->insn.opcode == SEL_OP_BARRIER)
         tracker.makeBarrier(insnID, insnNum);
     }
 
@@ -682,7 +692,7 @@ namespace gbe
     }
   }
 
-  BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, false);
+  BVAR(OCL_POST_ALLOC_INSN_SCHEDULE, true);
   BVAR(OCL_PRE_ALLOC_INSN_SCHEDULE, false);
 
   void schedulePostRegAllocation(GenContext &ctx, Selection &selection) {
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index a253c07..686e065 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -775,19 +775,28 @@ namespace gbe
                       << (uint32_t)poolOffset << std::endl;
           return false;
         }
+        // FIXME, to support post register allocation scheduling,
+        // put all the reserved register to the spill/unspill's destination registers.
+        // This is not the best way. We need to refine the spill/unspill instruction to
+        // only use passed in registers and don't access hard coded offset in the future.
         while(!regSet.empty()) {
           struct RegSlot regSlot = regSet.back();
           regSet.pop_back();
           const GenRegister selReg = insn.src(regSlot.srcID);
           if (!regSlot.isTmpReg) {
           /* For temporary registers, we don't need to unspill. */
-            SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0);
+            SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG,
+                                            1 + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth(), 0);
             unspill->state = GenInstructionState(simdWidth);
             unspill->state.noMask = 1;
             unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
                                           registerPool + regSlot.poolOffset, 0,
                                           selReg.type, selReg.vstride,
                                           selReg.width, selReg.hstride);
+            for(uint32_t i = 1; i < 1 + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth(); i++)
+              unspill->dst(i) = ctx.getSimdWidth() == 8 ?
+                                GenRegister::vec8(GEN_GENERAL_REGISTER_FILE, registerPool + (i - 1), 0 ) :
+                                GenRegister::vec16(GEN_GENERAL_REGISTER_FILE, registerPool + (i - 1) * 2, 0);
             unspill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth;
             unspill->extra.scratchMsgHeader = registerPool;
             insn.prepend(*unspill);
@@ -823,7 +832,7 @@ namespace gbe
             struct RegSlot regSlot(reg, dstID, poolOffset,
                                    it->second.isTmpReg,
                                    it->second.addr);
-            if(family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8;
+            if (family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8;
             else poolOffset += simdWidth / 8;
             regSet.push_back(regSlot);
           }
@@ -842,7 +851,8 @@ namespace gbe
           const GenRegister selReg = insn.dst(regSlot.dstID);
           if(!regSlot.isTmpReg) {
             /* For temporary registers, we don't need to unspill. */
-            SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1);
+            SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG,
+                                          (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth() , 1);
             spill->state  = insn.state;//GenInstructionState(simdWidth);
             spill->state.accWrEnable = 0;
             spill->state.saturate = 0;
@@ -854,6 +864,10 @@ namespace gbe
                                         selReg.width, selReg.hstride);
             spill->extra.scratchOffset = regSlot.addr + selReg.quarter * 4 * simdWidth;
             spill->extra.scratchMsgHeader = registerPool;
+            for(uint32_t i = 0; i < 0 + (ctx.reservedSpillRegs * 8) / ctx.getSimdWidth(); i++)
+              spill->dst(i) = ctx.getSimdWidth() == 8 ?
+                                GenRegister::vec8(GEN_GENERAL_REGISTER_FILE, registerPool + (i), 0 ) :
+                                GenRegister::vec16(GEN_GENERAL_REGISTER_FILE, registerPool + (i) * 2, 0);
             insn.append(*spill);
           }
 
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index b23ab92..f642c2e 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -736,7 +736,7 @@ namespace gbe
     for(uint32_t i = 0; i < regNum; i++) {
       const GenRegInterval * cur = starting[i];
       const GenRegInterval * exp = ending[toExpire];
-      if(exp->maxID < cur->minID) {
+      if (exp->maxID < cur->minID) {
         auto it = spilledRegs.find(exp->reg);
         GBE_ASSERT(it != spilledRegs.end());
         if(it->second.addr != -1) {
-- 
1.8.3.2



More information about the Beignet mailing list