[Beignet] [PATCH 2/2] GBE: Enable register spilling for SIMD16.
Song, Ruiling
ruiling.song at intel.com
Fri Apr 11 01:06:41 PDT 2014
This patch looks good to me.
Thanks for enabling this feature.
Thanks!
Ruiling
-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Thursday, April 10, 2014 12:41 PM
To: beignet at lists.freedesktop.org
Cc: Zhigang Gong
Subject: [Beignet] [PATCH 2/2] GBE: Enable register spilling for SIMD16.
From: Zhigang Gong <zhigang.gong at gmail.com>
Enable register spilling for SIMD16 mode. Introduce an new environment variable OCL_SIMD16_SPILL_THRESHOLD to control the threshold of simd 16 register spilling. Default value is 16, means when the spilled registers are more than 16, beignet will fallback to simd8.
Signed-off-by: Zhigang Gong <zhigang.gong at gmail.com>
---
backend/src/backend/gen_insn_selection.cpp | 23 ++++++----- backend/src/backend/gen_reg_allocation.cpp | 61 ++++++++++++++++++++++++------
2 files changed, 64 insertions(+), 20 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index fea0329..820fbad 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -697,6 +697,7 @@ namespace gbe
if(insn.opcode == SEL_OP_SPILL_REG
|| insn.opcode == SEL_OP_UNSPILL_REG)
continue;
+ const int simdWidth = insn.state.execWidth;
const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
struct RegSlot {
@@ -730,9 +731,9 @@ namespace gbe
it->second.isTmpReg,
it->second.addr);
if(family == ir::FAMILY_QWORD) {
- poolOffset += 2;
+ poolOffset += 2 * simdWidth / 8;
} else {
- poolOffset += 1;
+ poolOffset += simdWidth / 8;
}
regSet.push_back(regSlot);
}
@@ -749,12 +750,13 @@ namespace gbe
if (!regSlot.isTmpReg) {
/* For temporary registers, we don't need to unspill. */
SelectionInstruction *unspill = this->create(SEL_OP_UNSPILL_REG, 1, 0);
- unspill->state = GenInstructionState(ctx.getSimdWidth());
+ unspill->state = GenInstructionState(simdWidth);
+ unspill->state.noMask = 1;
unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
registerPool + regSlot.poolOffset, 0,
selReg.type, selReg.vstride,
selReg.width, selReg.hstride);
- unspill->extra.scratchOffset = regSlot.addr;
+ unspill->extra.scratchOffset = regSlot.addr +
+ selReg.quarter * 4 * simdWidth;
unspill->extra.scratchMsgHeader = registerPool;
insn.prepend(*unspill);
}
@@ -789,8 +791,8 @@ namespace gbe
struct RegSlot regSlot(reg, dstID, poolOffset,
it->second.isTmpReg,
it->second.addr);
- if(family == ir::FAMILY_QWORD) poolOffset +=2;
- else poolOffset += 1;
+ if(family == ir::FAMILY_QWORD) poolOffset += 2 * simdWidth / 8;
+ else poolOffset += simdWidth / 8;
regSet.push_back(regSlot);
}
}
@@ -806,12 +808,16 @@ namespace gbe
if(!regSlot.isTmpReg) {
/* For temporary registers, we don't need to unspill. */
SelectionInstruction *spill = this->create(SEL_OP_SPILL_REG, 0, 1);
- spill->state = GenInstructionState(ctx.getSimdWidth());
+ spill->state = insn.state;//GenInstructionState(simdWidth);
+ spill->state.accWrEnable = 0;
+ spill->state.saturate = 0;
+ if (insn.opcode == SEL_OP_SEL)
+ spill->state.predicate = GEN_PREDICATE_NONE;
spill->src(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
registerPool + regSlot.poolOffset, 0,
selReg.type, selReg.vstride,
selReg.width, selReg.hstride);
- spill->extra.scratchOffset = regSlot.addr;
+ spill->extra.scratchOffset = regSlot.addr + selReg.quarter
+ * 4 * simdWidth;
spill->extra.scratchMsgHeader = registerPool;
insn.append(*spill);
}
@@ -2238,7 +2244,6 @@ namespace gbe
}
sel.pop();
-
// All children are marked as root
markAllChildren(dag);
return true;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 937f5b2..42bb8a6 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -28,6 +28,7 @@
#include "backend/gen_register.hpp"
#include "backend/program.hpp"
#include "sys/exception.hpp"
+#include "sys/cvar.hpp"
#include <algorithm>
#include <climits>
#include <iostream>
@@ -594,6 +595,7 @@ namespace gbe
}
}
+ IVAR(OCL_SIMD16_SPILL_THRESHOLD, 0, 16, 256);
bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) {
// Perform the linear scan allocator
const uint32_t regNum = ctx.sel->getRegNum(); @@ -648,8 +650,16 @@ namespace gbe
}
if (!spilledRegs.empty()) {
GBE_ASSERT(reservedReg != 0);
+ if (ctx.getSimdWidth() == 16) {
+ if (spilledRegs.size() > (unsigned int)OCL_SIMD16_SPILL_THRESHOLD) {
+ if (GBE_DEBUG)
+ std::cerr << "WARN: exceed simd 16 spill threshold ("
+ << spilledRegs.size() << ">" << OCL_SIMD16_SPILL_THRESHOLD
+ << ")" << std::endl;
+ return false;
+ }
+ }
allocateScratchForSpilled();
-
bool success = selection.spillRegs(spilledRegs, reservedReg);
if (!success) {
std::cerr << "Fail to spill registers." << std::endl; @@ -728,9 +738,14 @@ namespace gbe
uint32_t regSize;
ir::RegisterFamily family;
getRegAttrib(reg, regSize, &family);
-
- if ((regSize == GEN_REG_SIZE && family == ir::FAMILY_DWORD)
- || (regSize == 2*GEN_REG_SIZE && family == ir::FAMILY_QWORD)) {
+ // At simd16 mode, we may introduce some simd8 registers in te instruction selection stage.
+ // To spill those simd8 temporary registers will introduce unecessary complexity. We just simply
+ // avoid to spill those temporary registers here.
+ if (ctx.getSimdWidth() == 16 && reg.value() >= ctx.getFunction().getRegisterFile().regNum())
+ return;
+
+ if ((regSize == ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_DWORD)
+ || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE &&
+ family == ir::FAMILY_QWORD)) {
GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end());
offsetReg.insert(std::make_pair(grfOffset, reg));
spillCandidate.insert(intervals[reg]);
@@ -747,6 +762,10 @@ namespace gbe
bool isAllocated) {
if (reservedReg == 0)
return false;
+
+ if (interval.reg.value() >= ctx.getFunction().getRegisterFile().regNum() &&
+ ctx.getSimdWidth() == 16)
+ return false;
SpillRegTag spillTag;
spillTag.isTmpReg = interval.maxID == interval.minID;
spillTag.addr = -1;
@@ -762,9 +781,12 @@ namespace gbe
return true;
}
+ // Check whethere a vector which is allocated can be spilled out //
+ If a partial of a vector is expired, the vector will be unspillable, currently.
+ // FIXME we may need to fix those unspillable vector in the furture.
INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) {
for(uint32_t id = 0; id < vector->regNum; id++)
- if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id]).value.reg])
+ if
+ (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.re
+ g)])
== spillCandidate.end())
return false;
return true;
@@ -779,8 +801,12 @@ namespace gbe
// If there is no spill candidate or current register is spillable and current register's
// endpoint is after all the spillCandidate register's endpoint we return false. The
// caller will spill current register.
+ // At simd16 mode, we will always try to spill here rather than return to the caller.
+ // The reason is that the caller may have a vector to allocate, and some element may be
+ // temporary registers which could not be spilled.
if (it == spillCandidate.end()
- || (it->getMaxID() <= interval.maxID && alignment == GEN_REG_SIZE))
+ || (ctx.getSimdWidth() == 8 && (it->getMaxID() <= interval.maxID
+ && alignment == ctx.getSimdWidth()/8 * GEN_REG_SIZE)))
return false;
ir::Register reg = it->getReg();
@@ -800,11 +826,13 @@ namespace gbe
spillSet.insert(vector->reg[id].reg());
reg = vector->reg[id].reg();
family = ctx.sel->getRegisterFamily(reg);
- size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE;
+ size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
+ : GEN_REG_SIZE *
+ ctx.getSimdWidth()/8;
}
} else if (!isVector) {
spillSet.insert(reg);
- size -= family == ir::FAMILY_QWORD ? 2*GEN_REG_SIZE : GEN_REG_SIZE;
+ size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8
+ : GEN_REG_SIZE *
+ ctx.getSimdWidth()/8;
} else
needRestart = true; // is a vector which could not be spilled.
@@ -812,7 +840,8 @@ namespace gbe
break;
if (!needRestart) {
uint32_t offset = RA.find(reg)->second;
- uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2*GEN_REG_SIZE) : (offset + GEN_REG_SIZE);
+ uint32_t nextOffset = (family == ir::FAMILY_QWORD) ? (offset + 2 * GEN_REG_SIZE * ctx.getSimdWidth() / 8)
+ : (offset +
+ GEN_REG_SIZE * ctx.getSimdWidth() / 8);
auto nextRegIt = offsetReg.find(nextOffset);
if (nextRegIt != offsetReg.end())
reg = nextRegIt->second;
@@ -821,9 +850,18 @@ namespace gbe
}
if (needRestart) {
+#if 0
+ // FIXME, we should enable this code block in the future.
+ // If the spill set is not zero and we need a restart, we can
+ // simply return to try to allocate the registers at first.
+ // As some vectors which have expired elements may be marked as
+ // unspillable vector.
+ if (spillSet.size() > 0)
+ break;
+#endif
+ it++;
// next register is not in spill candidate.
// let's move to next candidate and start over.
- it++;
if (it == spillCandidate.end())
return false;
reg = it->getReg();
@@ -857,7 +895,8 @@ namespace gbe
reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
reservedReg /= GEN_REG_SIZE;
} else {
- reservedReg = 0;
+ reservedReg = ctx.allocate(RESERVED_REG_NUM_FOR_SPILL * GEN_REG_SIZE, GEN_REG_SIZE);
+ reservedReg /= GEN_REG_SIZE;
}
// schedulePreRegAllocation(ctx, selection);
--
1.8.3.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list