[Beignet] [PATCH V2 2/2] Implement spill/unspill
Ruiling Song
ruiling.song at intel.com
Wed Aug 7 00:07:41 PDT 2013
The current implementation works like below:
I reserve a pool of registers for spill/reload. Currently 6 registers
are reserved to handle SelectionVector with at most 5 elements.
The other one is used as scratch message header register. The register
after header register was used as the payload for scratch write.
To do spill, just iterate the instructions. If the virtual register
was used as src, insert reload instruction before it. If the virtual
register was used as dst, insert spill instruction to write the register
content to scratch memory.
Limitations yet:
64bit not support.
SelectionVector > 5 not handled.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_context.cpp | 34 ++++++++++++
backend/src/backend/gen_context.hpp | 2 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
backend/src/backend/gen_insn_scheduling.cpp | 39 +++++++++-----
backend/src/backend/gen_insn_selection.cpp | 57 +++++++++++++++++++-
backend/src/backend/gen_insn_selection.hpp | 6 +++
backend/src/backend/gen_insn_selection.hxx | 2 +
backend/src/backend/gen_reg_allocation.cpp | 56 +++++++++++++++++--
8 files changed, 180 insertions(+), 18 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 29fa1c3..9d681a5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -542,6 +542,40 @@ namespace gbe
p->pop();
}
+ void GenContext::emitSpillRegInstruction(const SelectionInstruction &insn) {
+ uint32_t simdWidth = p->curr.execWidth;
+ uint32_t scratchOffset = insn.extra.scratchOffset;
+ const uint32_t header = insn.extra.scratchMsgHeader;
+ p->push();
+
+ const GenRegister msg = GenRegister::ud8grf(header, 0);
+ const GenRegister src = ra->genReg(insn.src(0));
+ GenRegister payload = src;
+ payload.nr = header + 1;
+ payload.subnr = 0;
+
+ p->MOV(payload, src);
+ uint32_t regType = insn.src(0).type;
+ uint32_t size = typeSize(regType);
+ assert(size <= 4);
+ uint32_t regNum = (stride(src.hstride)*size*simdWidth) > 32 ? 2 : 1;
+ this->scratchWriteHWord(msg, scratchOffset, regNum, regType);
+ p->pop();
+ }
+
+ void GenContext::emitUnSpillRegInstruction(const SelectionInstruction &insn) {
+ uint32_t scratchOffset = insn.extra.scratchOffset;
+ const GenRegister dst = insn.dst(0);
+ uint32_t regType = dst.type;
+ uint32_t simdWidth = p->curr.execWidth;
+ const uint32_t header = insn.extra.scratchMsgHeader;
+ uint32_t size = typeSize(regType);
+ assert(size <= 4);
+ uint32_t regNum = (stride(dst.hstride)*size*simdWidth) > 32 ? 2 : 1;
+ const GenRegister msg = GenRegister::ud8grf(header, 0);
+ this->scratchReadHWord(GenRegister::retype(dst, GEN_TYPE_UD), msg, scratchOffset, regNum, regType);
+ }
+
// For SIMD8, we allocate 2*elemNum temporary registers from dst(0), and
// then follow the real destination registers.
// For SIMD16, we allocate elemNum temporary registers from dst(0).
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index bcf0dc4..694ae98 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -108,6 +108,8 @@ namespace gbe
void emitByteScatterInstruction(const SelectionInstruction &insn);
void emitSampleInstruction(const SelectionInstruction &insn);
void emitTypedWriteInstruction(const SelectionInstruction &insn);
+ void emitSpillRegInstruction(const SelectionInstruction &insn);
+ void emitUnSpillRegInstruction(const SelectionInstruction &insn);
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 6f37c3d..da8f2a2 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -20,5 +20,7 @@ DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1)
DECL_GEN7_SCHEDULE(ByteScatter, 80, 1, 1)
DECL_GEN7_SCHEDULE(Sample, 80, 1, 1)
DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1)
+DECL_GEN7_SCHEDULE(SpillReg, 80, 1, 1)
+DECL_GEN7_SCHEDULE(UnSpillReg, 80, 1, 1)
DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2)
DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1)
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index cb990be..0b720b7 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -283,19 +283,24 @@ namespace gbe
uint32_t DependencyTracker::getIndex(GenRegister reg) const {
// Non GRF physical register
if (reg.physical) {
- GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE);
- const uint32_t file = reg.nr & 0xf0;
- const uint32_t nr = reg.nr & 0x0f;
- if (file == GEN_ARF_FLAG) {
- const uint32_t subnr = reg.subnr / sizeof(uint16_t);
- GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1));
- return grfNum + 2*nr + subnr;
- } else if (file == GEN_ARF_ACCUMULATOR) {
- GBE_ASSERT(nr < MAX_ACC_REGISTER);
- return grfNum + MAX_FLAG_REGISTER + nr;
+ //GBE_ASSERT (reg.file == GEN_ARCHITECTURE_REGISTER_FILE);
+ if(reg.file == GEN_ARCHITECTURE_REGISTER_FILE) {
+ const uint32_t file = reg.nr & 0xf0;
+ const uint32_t nr = reg.nr & 0x0f;
+ if (file == GEN_ARF_FLAG) {
+ const uint32_t subnr = reg.subnr / sizeof(uint16_t);
+ GBE_ASSERT(nr < MAX_FLAG_REGISTER && (subnr == 0 || subnr == 1));
+ return grfNum + 2*nr + subnr;
+ } else if (file == GEN_ARF_ACCUMULATOR) {
+ GBE_ASSERT(nr < MAX_ACC_REGISTER);
+ return grfNum + MAX_FLAG_REGISTER + nr;
+ } else {
+ NOT_SUPPORTED;
+ return 0;
+ }
} else {
- NOT_SUPPORTED;
- return 0;
+ const uint32_t simdWidth = scheduler.ctx.getSimdWidth();
+ return simdWidth == 8 ? reg.nr : reg.nr / 2;
}
}
// We directly manipulate physical GRFs here
@@ -344,6 +349,10 @@ namespace gbe
this->nodes[index] = node;
}
+ if(insn.opcode == SEL_OP_SPILL_REG) {
+ const uint32_t index = this->getIndex(0xff);
+ this->nodes[index] = node;
+ }
// Consider barriers and wait write to memory
if (insn.opcode == SEL_OP_BARRIER ||
insn.opcode == SEL_OP_FENCE ||
@@ -424,6 +433,11 @@ namespace gbe
const uint32_t index = tracker.getIndex(insn.extra.function);
tracker.addDependency(node, index);
}
+ //read-after-write of scratch memory
+ if (insn.opcode == SEL_OP_UNSPILL_REG) {
+ const uint32_t index = tracker.getIndex(0xff);
+ tracker.addDependency(node, index);
+ }
// Consider barriers and wait are reading memory (local and global)
if (insn.opcode == SEL_OP_BARRIER ||
@@ -453,6 +467,7 @@ namespace gbe
tracker.addDependency(node, index);
}
+
// Consider barriers and wait are writing memory (local and global)
if (insn.opcode == SEL_OP_BARRIER ||
insn.opcode == SEL_OP_FENCE ||
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 7e9402d..3610051 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -315,6 +315,8 @@ namespace gbe
INLINE ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
/*! Implement public class */
INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
+ /*! spill a register (insert spill/unspill instructions) */
+ INLINE void spillReg(ir::Register reg, uint32_t registerPool);
/*! Implement public class */
INLINE uint32_t getRegNum(void) const { return file.regNum(); }
/*! Implements public interface */
@@ -617,6 +619,57 @@ namespace gbe
return vector;
}
+ void Selection::Opaque::spillReg(ir::Register spilledReg, uint32_t registerPool) {
+ assert(registerPool != 0);
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ const uint32_t dstStart = registerPool + 1;
+ const uint32_t srcStart = registerPool + 1;
+ uint32_t ptr = ctx.allocateScratchMem(typeSize(GEN_TYPE_D)*simdWidth);
+
+ for (auto &block : blockList)
+ for (auto &insn : block.insnList) {
+ const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
+
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const GenRegister selReg = insn.src(srcID);
+ const ir::Register reg = selReg.reg();
+ if(selReg.file == GEN_GENERAL_REGISTER_FILE && reg == spilledReg) {
+ GBE_ASSERT(srcID < 5);
+ SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0);
+ unspill->state = GenInstructionState(simdWidth);
+ unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE, srcStart+srcID, 0,
+ selReg.type, selReg.vstride, selReg.width, selReg.hstride);
+ GenRegister src = insn.src(srcID);
+ // change nr/subnr, keep other register settings
+ src.nr = srcStart+srcID; src.subnr=0; src.physical=1;
+ insn.src(srcID) = src;
+ unspill->extra.scratchOffset = ptr;
+ unspill->extra.scratchMsgHeader = registerPool;
+ insn.prepend(*unspill);
+ }
+ }
+
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const GenRegister selReg = insn.dst(dstID);
+ const ir::Register reg = selReg.reg();
+ if(selReg.file == GEN_GENERAL_REGISTER_FILE && reg == spilledReg) {
+ GBE_ASSERT(dstID < 5);
+ SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1);
+ spill->state = GenInstructionState(simdWidth);
+ spill->src(0) =GenRegister(GEN_GENERAL_REGISTER_FILE, dstStart + dstID, 0,
+ selReg.type, selReg.vstride, selReg.width, selReg.hstride);
+ GenRegister dst = insn.dst(dstID);
+ // change nr/subnr, keep other register settings
+ dst.physical =1; dst.nr = dstStart+dstID; dst.subnr = 0;
+ insn.dst(dstID)= dst;
+ spill->extra.scratchOffset = ptr;
+ spill->extra.scratchMsgHeader = registerPool;
+ insn.append(*spill);
+ }
+ }
+ }
+ }
+
ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID) {
SelectionBlock *block = insn->parent;
const uint32_t simdWidth = ctx.getSimdWidth();
@@ -820,7 +873,6 @@ namespace gbe
dstVector->regNum = elemNum;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
-
// Source cannot be scalar (yet)
srcVector->regNum = 1;
srcVector->isSrc = 1;
@@ -1188,6 +1240,9 @@ namespace gbe
ir::Register Selection::replaceDst(SelectionInstruction *insn, uint32_t regID) {
return this->opaque->replaceDst(insn, regID);
}
+ void Selection::spillReg(ir::Register reg, uint32_t registerPool) {
+ this->opaque->spillReg(reg, registerPool);
+ }
SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) {
return this->opaque->create(opcode, dstNum, srcNum);
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 5ae6e42..79b73e2 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -107,6 +107,10 @@ namespace gbe
/*! offset (0 to 7) */
uint16_t offset:5;
};
+ struct {
+ uint16_t scratchOffset;
+ uint16_t scratchMsgHeader;
+ };
} extra;
/*! Gen opcode */
uint8_t opcode;
@@ -197,6 +201,8 @@ namespace gbe
ir::Register replaceSrc(SelectionInstruction *insn, uint32_t regID);
/*! Replace a destination to the returned temporary register */
ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
+ /*! spill a register (insert spill/unspill instructions) */
+ void spillReg(ir::Register reg, uint32_t registerPool);
/*! Create a new selection instruction */
SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
/*! List of emitted blocks */
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 7664c8f..495978f 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -48,6 +48,8 @@ DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
DECL_SELECTION_IR(SAMPLE, SampleInstruction)
DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
+DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
+DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
DECL_SELECTION_IR(FBH, UnaryInstruction)
DECL_SELECTION_IR(FBL, UnaryInstruction)
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 4ba03ea..ccbc0da 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -31,6 +31,8 @@
#include <algorithm>
#include <climits>
+#define RESERVED_REG_NUM_FOR_SPILL 6
+
namespace gbe
{
/////////////////////////////////////////////////////////////////////////////
@@ -94,6 +96,10 @@ namespace gbe
vector<GenRegInterval*> starting;
/*! Intervals sorting based on ending point positions */
vector<GenRegInterval*> ending;
+ /*! registers that are spilled */
+ set<ir::Register> spilled;
+ /* reserved registers for register spill/reload */
+ uint32_t reservedReg;
/*! Current vector to expire */
uint32_t expiringID;
/*! Use custom allocator */
@@ -259,6 +265,11 @@ namespace gbe
continue;
}
+ //ignore register that already spilled
+ if(spilled.contains(reg)) {
+ this->expiringID++;
+ continue;
+ }
// Ignore booleans that were allocated with flags
// if (ctx.getRegisterFamily(reg) == ir::FAMILY_BOOL && !grfBooleans.contains(reg)) {
if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL) {
@@ -473,6 +484,9 @@ namespace gbe
auto it = vectorMap.find(reg);
if (it != vectorMap.end()) {
const SelectionVector *vector = it->second.first;
+ // all the reg in the SelectionVector are spilled
+ if(spilled.contains(vector->reg[0].reg()))
+ continue;
const uint32_t simdWidth = ctx.getSimdWidth();
const ir::RegisterData regData = ctx.sel->getRegisterData(reg);
@@ -481,10 +495,28 @@ namespace gbe
const uint32_t alignment = simdWidth*typeSize;
const uint32_t size = vector->regNum * alignment;
+
uint32_t grfOffset;
while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
const bool success = this->expireGRF(interval);
- if (success == false) return false;
+ if (success == false) {
+ // if no spill support, just return false, else simply spill the register
+ if(reservedReg == 0) return false;
+ break;
+ }
+ }
+ if(grfOffset == 0) {
+ // spill all the registers in the SelectionVector
+ // the tricky here is I need to use reservedReg+1 as scratch write payload.
+ // so, i need to write the first register to scratch memory first.
+ // the spillReg() will just append scratch write insn after the def. To spill
+ // the first register, need to call spillReg() last for the vector->reg[0]
+ GBE_ASSERT(vector->regNum < RESERVED_REG_NUM_FOR_SPILL);
+ for(int i = vector->regNum-1; i >= 0; i--) {
+ spilled.insert(vector->reg[i].reg());
+ selection.spillReg(vector->reg[i].reg(), reservedReg);
+ }
+ continue;
}
for (uint32_t regID = 0; regID < vector->regNum; ++regID, grfOffset += alignment) {
const ir::Register reg = vector->reg[regID].reg();
@@ -494,18 +526,25 @@ namespace gbe
}
}
// Case 2: This is a regular scalar register, allocate it alone
- else if (this->createGenReg(interval) == false)
- return false;
+ else if (this->createGenReg(interval) == false) {
+ if(reservedReg == 0) return false;
+ spilled.insert(reg);
+ selection.spillReg(reg, reservedReg);
+ }
}
return true;
}
-
INLINE bool GenRegAllocator::Opaque::allocate(Selection &selection) {
using namespace ir;
const Kernel *kernel = ctx.getKernel();
const Function &fn = ctx.getFunction();
GBE_ASSERT(fn.getProfile() == PROFILE_OCL);
-
+ if (ctx.getSimdWidth() == 8) {
+ reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
+ reservedReg /= GEN_REG_SIZE;
+ } else {
+ reservedReg = 0;
+ }
// Allocate all the vectors first since they need to be contiguous
this->allocateVector(selection);
// schedulePreRegAllocation(ctx, selection);
@@ -690,6 +729,10 @@ namespace gbe
int subreg = offst % 8;
std::cout << "%" << vReg << " g" << reg << "." << subreg << "D" << std::endl;
}
+ std::set<ir::Register>::iterator is;
+ std::cout << "## spilled registers:" << std::endl;
+ for(is = spilled.begin(); is != spilled.end(); is++)
+ std::cout << (int)*is << std::endl;
std::cout << std::endl;
}
@@ -704,6 +747,9 @@ namespace gbe
INLINE GenRegister GenRegAllocator::Opaque::genReg(const GenRegister ®) {
if (reg.file == GEN_GENERAL_REGISTER_FILE) {
+ if(reg.physical == 1) {
+ return reg;
+ }
GBE_ASSERT(RA.contains(reg.reg()) != false);
const uint32_t grfOffset = RA.find(reg.reg())->second;
const GenRegister dst = setGenReg(reg, grfOffset);
--
1.7.9.5
More information about the Beignet
mailing list