[Beignet] [PATCH 1/4] GBE: analyze vector interence to save some extra MOV if possible.
Zhigang Gong
zhigang.gong at intel.com
Mon Nov 3 00:24:31 PST 2014
Some instructions, say send, requires contiguous vector. If one
register is used by two different vectors, then we have to make
a copy to MOV the register from one vector to another.
But if we can check all the different registers in the two vectors,
and make sure there is no liveness intereference. Then we can
simply let those different registers in the same slot share the
same physical register space without any extra MOVs. The side
effect is that may increase a little bit of register pressure.
We can set a liveness threshold to limit this side effet to a
acceptable level.
Tested with opencv benchmark, and observed some test cases
could reduce about 0.8% of the totall instructions.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_reg_allocation.cpp | 83 ++++++++++++++++++++++++++----
1 file changed, 73 insertions(+), 10 deletions(-)
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index a57edb3..9facfca 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -143,7 +143,7 @@ namespace gbe
/*! Allocate the given interval. Return true if success */
bool createGenReg(const GenRegInterval &interval);
/*! Indicate if the registers are already allocated in vectors */
- bool isAllocated(const SelectionVector *vector) const;
+ bool isAllocated(const SelectionVector *vector);
/*! Reallocate registers if needed to make the registers in the vector
* contigous in memory
*/
@@ -156,6 +156,8 @@ namespace gbe
map<uint32_t, ir::Register> offsetReg;
/*! Provides the position of each register in a vector */
map<ir::Register, VectorLocation> vectorMap;
+ /*! a vector element register map to another vector's element register, share the same physical register.*/
+ map<ir::Register, ir::Register> proxyRegMap;
/*! All vectors used in the selection */
vector<SelectionVector*> vectors;
/*! The set of booleans that will go to GRF (cannot be kept into flags) */
@@ -262,11 +264,18 @@ namespace gbe
return true;
}
- bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const {
+ bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) {
const ir::Register first = vector->reg[0].reg();
const auto it = vectorMap.find(first);
// If the first register is not allocated we are done
+ // FIXME, even if first register is not allocated, we still have some chance to
+ // reuse a previous allocated vector.
+ //
+ // {%10, %12, %13, %14} allocated vector
+ // {%22, %12, %13, %14} current vector.
+ // If %22 and %10 have no interference with each other, we can make a proxy pair
+ // here and save 3 extra MOV here.
if (it == vectorMap.end())
return false;
@@ -280,19 +289,52 @@ namespace gbe
// Now check that all the registers in the already allocated vector match
// the current vector
+ map<ir::Register, ir::Register> tmpProxyRegMap;
for (uint32_t regID = 1; regID < vector->regNum; ++regID) {
const ir::Register from = vector->reg[regID].reg();
const ir::Register to = other->reg[regID + otherFirst].reg();
- if (from != to)
+ if (from == to)
+ continue;
+ // If this different register was already allocated in another vector,
+ // we just ignore it as it make things too complex and has little chance
+ // to get a win.
+ if (vectorMap.find(from) != vectorMap.end() ||
+ ctx.sel->getRegisterFamily(to) != ctx.sel->getRegisterFamily(from) ||
+ ctx.sel->isScalarReg(from))
+ return false;
+ if (((intervals[from].maxID < intervals[to].minID &&
+ intervals[from].minID < intervals[to].minID) &&
+ intervals[to].minID - intervals[from].maxID < 200) ||
+ ((intervals[from].maxID > intervals[to].maxID &&
+ intervals[from].minID > intervals[to].maxID &&
+ intervals[from].minID - intervals[to].maxID < 200))) {
+ tmpProxyRegMap.insert(std::make_pair(from, to));
+ } else
return false;
}
+ for(auto &proxyIt : tmpProxyRegMap) {
+ const ir::Register from = proxyIt.first;
+ const ir::Register to = proxyIt.second;
+ // extent to's liveness to cover from's liveness, and set from as unused.
+ // {%10, %11, %12, %14} v0
+ // {%10, %11, %12, %15} v1
+ // If %14 and %15 have no intererence, we can reuse the first vector to
+ // represent the second one, and can save 3 MOVs for this case.
+ // %15 will be mapped to %14 and will not participate the register allocation.
+ intervals[to].maxID = std::max(intervals[from].maxID, intervals[to].maxID);
+ intervals[to].minID = std::min(intervals[from].minID, intervals[to].minID);
+ intervals[from].maxID = -INT_MAX;
+ intervals[from].minID = INT_MAX;
+ proxyRegMap.insert(std::make_pair(from, to));
+ }
return true;
}
void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) {
for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
const ir::Register reg = vector->reg[regID].reg();
- const auto it = this->vectorMap.find(reg);
+ const auto it = this->vectorMap.contains(reg) ? this->vectorMap.find(reg) :
+ this->vectorMap.find(proxyRegMap.find(reg)->second);
// case 1: the register is not already in a vector, so it can stay in this
// vector. Note that local IDs are *non-scalar* special registers but will
// require a MOV anyway since pre-allocated in the CURBE
@@ -308,10 +350,6 @@ namespace gbe
}
// case 2: the register is already in another vector, so we need to move
// it to a temporary register.
- // TODO: we can do better than that if we analyze the liveness of the
- // already allocated registers in the vector. If there is no inteference
- // and the order is maintained, we can reuse the previous vector and avoid
- // the MOVs
else {
ir::Register tmp;
ir::Type type = getIRType(vector->reg[regID].type);
@@ -665,6 +703,7 @@ namespace gbe
const ir::Register reg = interval.reg;
if (interval.maxID == -INT_MAX)
continue; // Unused register
+ GBE_ASSERT(!proxyRegMap.contains(reg));
if (RA.contains(reg))
continue; // already allocated
@@ -741,6 +780,7 @@ namespace gbe
this->ending.resize(regNum);
uint32_t regID = 0;
for(auto it = spilledRegs.begin(); it != spilledRegs.end(); ++it) {
+ GBE_ASSERT(!proxyRegMap.contains(it->first));
this->starting[regID] = this->ending[regID] = &intervals[it->first];
regID++;
}
@@ -768,7 +808,12 @@ namespace gbe
ir::RegisterFamily family = ctx.sel->getRegisterFamily(cur->reg);
it->second.addr = ctx.allocateScratchMem(getFamilySize(family)
* ctx.getSimdWidth());
+ }
+ for(auto &it : proxyRegMap) {
+ if (spilledRegs.contains(it.second)) {
+ spilledRegs.insert(std::make_pair(it.first, spilledRegs.find(it.second)->second));
}
+ }
}
INLINE bool GenRegAllocator::Opaque::expireReg(ir::Register reg)
@@ -796,6 +841,7 @@ namespace gbe
// put it to the RA map and the spill map if it could be spilled.
INLINE void GenRegAllocator::Opaque::insertNewReg(ir::Register reg, uint32_t grfOffset, bool isVector)
{
+ GBE_ASSERT(!proxyRegMap.contains(reg));
RA.insert(std::make_pair(reg, grfOffset));
if (reservedReg != 0) {
@@ -1147,6 +1193,21 @@ namespace gbe
<< " -> " << setw(8) << this->intervals[(uint)vReg].maxID
<< "]" << endl;
}
+ cout << "## proxy registers" << endl;
+ for (auto &it : proxyRegMap) {
+ ir::Register from = (ir::Register)it.first;
+ ir::Register to = (ir::Register)it.second;
+ ir::RegisterFamily toFamily, fromFamily;
+ uint32_t regSize0, regSize1;
+ getRegAttrib(to, regSize0, &toFamily);
+ getRegAttrib(from, regSize1, &fromFamily);
+ cout << "%" << setiosflags(ios::left) << setw(8) << from
+ << setiosflags(ios::left) << ir::getFamilyName(toFamily)
+ << " ----> "
+ << "%" << setiosflags(ios::left) << setw(8) << to
+ << setiosflags(ios::left) << ir::getFamilyName(fromFamily)
+ << endl;
+ }
if (!spilledRegs.empty())
cout << "## spilled registers: " << spilledRegs.size() << endl;
for(auto it = spilledRegs.begin(); it != spilledRegs.end(); it++) {
@@ -1179,8 +1240,10 @@ namespace gbe
if(reg.physical == 1) {
return reg;
}
- GBE_ASSERT(RA.contains(reg.reg()) != false);
- const uint32_t grfOffset = RA.find(reg.reg())->second;
+ GBE_ASSERT(RA.contains(reg.reg()) != false || RA.contains(proxyRegMap.find(reg.reg())->second));
+ const auto &it = RA.contains(reg.reg()) ? RA.find(reg.reg()):
+ RA.find(proxyRegMap.find(reg.reg())->second);
+ const uint32_t grfOffset = it->second;
const uint32_t suboffset = reg.subphysical ? reg.subnr : 0;
const GenRegister dst = setGenReg(reg, grfOffset + suboffset);
if (reg.quarter != 0)
--
1.8.3.2
More information about the Beignet
mailing list