[Beignet] [PATCH V2] GBE: Support 64Bit register spill.
Ruiling Song
ruiling.song at intel.com
Sun Feb 9 23:33:43 PST 2014
Now we support DWORD & QWORD register spill/fill.
v2:
only add poolOffset by 1 when we meet QWord register and poolOffset is 1.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_context.cpp | 44 ++++++++++++++++++++-----
backend/src/backend/gen_insn_selection.cpp | 49 ++++++++++++++++++----------
backend/src/backend/gen_reg_allocation.cpp | 17 ++++++----
3 files changed, 79 insertions(+), 31 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index d72b19b..489ce5b 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1662,14 +1662,25 @@ namespace gbe
GenRegister payload = src;
payload.nr = header + 1;
payload.subnr = 0;
+
GBE_ASSERT(src.subnr == 0);
- if (payload.nr != src.nr)
- p->MOV(payload, src);
uint32_t regType = insn.src(0).type;
uint32_t size = typeSize(regType);
- assert(size <= 4);
- uint32_t regNum = (stride(src.hstride)*size*simdWidth) > 32 ? 2 : 1;
- this->scratchWrite(msg, scratchOffset, regNum, regType, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ uint32_t regSize = stride(src.hstride)*size;
+ GBE_ASSERT(regSize == 4 || regSize == 8);
+ if(regSize == 4) {
+ if (payload.nr != src.nr)
+ p->MOV(payload, src);
+ uint32_t regNum = (regSize*simdWidth) > 32 ? 2 : 1;
+ this->scratchWrite(msg, scratchOffset, regNum, GEN_TYPE_UD, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ }
+ else { //size == 8
+ loadBottomHalf(payload, src);
+ uint32_t regNum = (regSize/2*simdWidth) > 32 ? 2 : 1;
+ this->scratchWrite(msg, scratchOffset, regNum, GEN_TYPE_UD, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ loadTopHalf(payload, src);
+ this->scratchWrite(msg, scratchOffset + 4*simdWidth, regNum, GEN_TYPE_UD, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ }
p->pop();
}
@@ -1680,10 +1691,27 @@ namespace gbe
uint32_t simdWidth = p->curr.execWidth;
const uint32_t header = insn.extra.scratchMsgHeader;
uint32_t size = typeSize(regType);
- assert(size <= 4);
- uint32_t regNum = (stride(dst.hstride)*size*simdWidth) > 32 ? 2 : 1;
+ uint32_t regSize = stride(dst.hstride)*size;
+
+
const GenRegister msg = GenRegister::ud8grf(header, 0);
- this->scratchRead(GenRegister::retype(dst, GEN_TYPE_UD), msg, scratchOffset, regNum, regType, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ GenRegister payload = msg;
+ payload.nr = header + 1;
+
+
+ p->push();
+ assert(regSize == 4 || regSize == 8);
+ if(regSize == 4) {
+ uint32_t regNum = (regSize*simdWidth) > 32 ? 2 : 1;
+ this->scratchRead( GenRegister::retype(dst, GEN_TYPE_UD)/*GenRegister::ud8grf(dst.nr, dst.subnr)*/, msg, scratchOffset, regNum, GEN_TYPE_UD, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ } else {
+ uint32_t regNum = (regSize/2*simdWidth) > 32 ? 2 : 1;
+ this->scratchRead(payload, msg, scratchOffset, regNum, GEN_TYPE_UD, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ storeBottomHalf(dst, payload);
+ this->scratchRead(payload, msg, scratchOffset + 4*simdWidth, regNum, GEN_TYPE_UD, GEN_SCRATCH_CHANNEL_MODE_DWORD);
+ storeTopHalf(dst, payload);
+ }
+ p->pop();
}
// For SIMD8, we allocate 2*elemNum temporary registers from dst(0), and
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 54e5ebe..f2f6bf0 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -680,8 +680,6 @@ namespace gbe
bool Selection::Opaque::spillRegs(const SpilledRegs &spilledRegs,
uint32_t registerPool) {
GBE_ASSERT(registerPool != 0);
- const uint32_t dstStart = registerPool + 1;
- const uint32_t srcStart = registerPool + 1;
for (auto &block : blockList)
for (auto &insn : block.insnList) {
@@ -693,17 +691,19 @@ namespace gbe
const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
struct RegSlot {
RegSlot(ir::Register _reg, uint8_t _srcID,
- bool _isTmp, uint32_t _addr)
- : reg(_reg), srcID(_srcID), isTmpReg(_isTmp), addr(_addr)
+ uint8_t _poolOffset, bool _isTmp, uint32_t _addr)
+ : reg(_reg), srcID(_srcID), poolOffset(_poolOffset), isTmpReg(_isTmp), addr(_addr)
{};
ir::Register reg;
union {
uint8_t srcID;
uint8_t dstID;
};
+ uint8_t poolOffset;
bool isTmpReg;
int32_t addr;
};
+ uint8_t poolOffset = 1; // keep one for scratch message header
vector <struct RegSlot> regSet;
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const GenRegister selReg = insn.src(srcID);
@@ -712,18 +712,27 @@ namespace gbe
if(it != spilledRegs.end()
&& selReg.file == GEN_GENERAL_REGISTER_FILE
&& selReg.physical == 0) {
- struct RegSlot regSlot(reg, srcID,
+ ir::RegisterFamily family = getRegisterFamily(reg);
+ if(family == ir::FAMILY_QWORD && poolOffset == 1) {
+ poolOffset += 1; // qword register spill could not share the scratch message payload register
+ }
+ struct RegSlot regSlot(reg, srcID, poolOffset,
it->second.isTmpReg,
it->second.addr);
+ if(family == ir::FAMILY_QWORD) {
+ poolOffset += 2;
+ } else {
+ poolOffset += 1;
+ }
regSet.push_back(regSlot);
}
}
- if (regSet.size() > 5)
+ if (poolOffset > 6) {
+ std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") src too large pooloffset " << (uint32_t)poolOffset << std::endl;
return false;
-
+ }
while(!regSet.empty()) {
- uint32_t scratchID = regSet.size() - 1;
struct RegSlot regSlot = regSet.back();
regSet.pop_back();
const GenRegister selReg = insn.src(regSlot.srcID);
@@ -732,7 +741,7 @@ namespace gbe
SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0);
unspill->state = GenInstructionState(ctx.getSimdWidth());
unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
- srcStart + scratchID, 0,
+ registerPool + regSlot.poolOffset, 0,
selReg.type, selReg.vstride,
selReg.width, selReg.hstride);
unspill->extra.scratchOffset = regSlot.addr;
@@ -742,7 +751,7 @@ namespace gbe
GenRegister src = insn.src(regSlot.srcID);
// change nr/subnr, keep other register settings
- src.nr = srcStart + scratchID; src.subnr = 0; src.physical = 1;
+ src.nr = registerPool + regSlot.poolOffset; src.subnr = 0; src.physical = 1;
insn.src(regSlot.srcID) = src;
};
@@ -757,6 +766,7 @@ namespace gbe
data.
*/
+ poolOffset = 1; // one for scratch message header
for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
const GenRegister selReg = insn.dst(dstID);
const ir::Register reg = selReg.reg();
@@ -764,18 +774,24 @@ namespace gbe
if(it != spilledRegs.end()
&& selReg.file == GEN_GENERAL_REGISTER_FILE
&& selReg.physical == 0) {
- struct RegSlot regSlot(reg, dstID,
+ ir::RegisterFamily family = getRegisterFamily(reg);
+ if(family == ir::FAMILY_QWORD && poolOffset == 1) {
+ poolOffset += 1; // qword register spill could not share the scratch message payload register
+ }
+ struct RegSlot regSlot(reg, dstID, poolOffset,
it->second.isTmpReg,
it->second.addr);
+ if(family == ir::FAMILY_QWORD) poolOffset +=2;
+ else poolOffset += 1;
regSet.push_back(regSlot);
}
}
- if (regSet.size() > 5)
+ if (poolOffset > 6){
+ std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") dst too large pooloffset " << (uint32_t)poolOffset << std::endl;
return false;
-
+ }
while(!regSet.empty()) {
- uint32_t scratchID = regSet.size() - 1;
struct RegSlot regSlot = regSet.back();
regSet.pop_back();
const GenRegister selReg = insn.dst(regSlot.dstID);
@@ -784,7 +800,7 @@ namespace gbe
SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1);
spill->state = GenInstructionState(ctx.getSimdWidth());
spill->src(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
- dstStart + scratchID, 0,
+ registerPool + regSlot.poolOffset, 0,
selReg.type, selReg.vstride,
selReg.width, selReg.hstride);
spill->extra.scratchOffset = regSlot.addr;
@@ -794,9 +810,8 @@ namespace gbe
GenRegister dst = insn.dst(regSlot.dstID);
// change nr/subnr, keep other register settings
- dst.physical =1; dst.nr = dstStart + scratchID; dst.subnr = 0;
+ dst.physical =1; dst.nr = registerPool + regSlot.poolOffset; dst.subnr = 0;
insn.dst(regSlot.dstID)= dst;
- scratchID++;
}
}
return true;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 2aafdb1..b94b809 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -234,7 +234,7 @@ namespace gbe
uint32_t grfOffset = allocateReg(interval, regSize, regSize);
if (grfOffset == 0) {
/* this register is going to be spilled. */
- GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
+ GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD && family != ir::FAMILY_QWORD));
return false;
}
insertNewReg(reg, grfOffset);
@@ -617,7 +617,8 @@ namespace gbe
ir::RegisterFamily family;
getRegAttrib(reg, regSize, &family);
- if (regSize == GEN_REG_SIZE && family == ir::FAMILY_DWORD /*&& !isVector*/) {
+ if ((regSize == GEN_REG_SIZE && family == ir::FAMILY_DWORD)
+ || (regSize == 2*GEN_REG_SIZE && family == ir::FAMILY_QWORD)) {
GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end());
offsetReg.insert(std::make_pair(grfOffset, reg));
spillCandidate.insert(intervals[reg]);
@@ -639,7 +640,8 @@ namespace gbe
if (!spillTag.isTmpReg) {
// FIXME, we can optimize scratch allocation according to
// the interval information.
- spillTag.addr = ctx.allocateScratchMem(typeSize(GEN_TYPE_D)
+ ir::RegisterFamily family = ctx.sel->getRegisterFamily(interval.reg);
+ spillTag.addr = ctx.allocateScratchMem(getFamilySize(family)
* ctx.getSimdWidth());
} else
spillTag.addr = -1;
@@ -682,6 +684,7 @@ namespace gbe
auto vectorIt = vectorMap.find(reg);
bool isVector = vectorIt != vectorMap.end();
bool needRestart = false;
+ ir::RegisterFamily family = ctx.sel->getRegisterFamily(reg);
if (isVector
&& (vectorCanSpill(vectorIt->second.first))) {
const SelectionVector *vector = vectorIt->second.first;
@@ -690,11 +693,12 @@ namespace gbe
== spilledRegs.end());
spillSet.insert(vector->reg[id].reg());
reg = vector->reg[id].reg();
- size -= GEN_REG_SIZE;
+ family = ctx.sel->getRegisterFamily(reg);
+ size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE;
}
} else if (!isVector) {
spillSet.insert(reg);
- size -= GEN_REG_SIZE;
+ size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE;
} else
needRestart = true; // is a vector which could not be spilled.
@@ -702,7 +706,8 @@ namespace gbe
break;
if (!needRestart) {
uint32_t offset = RA.find(reg)->second;
- auto nextRegIt = offsetReg.find(offset + GEN_REG_SIZE);
+ uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2*GEN_REG_SIZE) : (offset + GEN_REG_SIZE);
+ auto nextRegIt = offsetReg.find(nextOffset);
if (nextRegIt != offsetReg.end())
reg = nextRegIt->second;
else
--
1.7.9.5
More information about the Beignet
mailing list