[Beignet] [PATCH 3/5] GBE: don't treat btiUtil as a curbe payload register.
Zhigang Gong
zhigang.gong at intel.com
Sun Sep 13 23:19:34 PDT 2015
Btiutil should be just a normal temporary register and only
alive for those specific laod/store instructions with mixed
BTI used.
Although btiutil only takes one DW register space, but in
practice, it may waste one entire 32-byte register space
as it has very long live range.
This patch fix this issue completely.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen8_context.cpp | 10 +-
backend/src/backend/gen_context.cpp | 47 +++++----
backend/src/backend/gen_context.hpp | 4 +-
backend/src/backend/gen_insn_selection.cpp | 156 +++++++++++++++++------------
backend/src/backend/gen_reg_allocation.cpp | 2 -
backend/src/backend/program.h | 1 -
backend/src/ir/profile.cpp | 4 +-
backend/src/ir/profile.hpp | 3 +-
8 files changed, 128 insertions(+), 99 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index b497ee5..7e51963 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -854,9 +854,10 @@ namespace gbe
p->UNTYPED_READ(dst, src, bti, 2*elemNum);
} else {
const GenRegister tmp = ra->genReg(insn.dst(2*elemNum));
+ const GenRegister btiTmp = ra->genReg(insn.dst(2*elemNum + 1));
unsigned desc = p->generateUntypedReadMessageDesc(0, 2*elemNum);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -864,7 +865,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->UNTYPED_READ(dst, src, GenRegister::retype(GenRegister::addr1(0), GEN_TYPE_UD), 2*elemNum);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
@@ -893,9 +894,10 @@ namespace gbe
p->UNTYPED_WRITE(addr, bti, elemNum*2);
} else {
const GenRegister tmp = ra->genReg(insn.dst(elemNum));
+ const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -903,7 +905,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index ae02fbe..5980db2 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1769,16 +1769,17 @@ namespace gbe
p->ATOMIC(dst, function, src, bti, srcNum);
} else {
GenRegister flagTemp = ra->genReg(insn.dst(1));
+ GenRegister btiTmp = ra->genReg(insn.dst(2));
unsigned desc = p->generateAtomicMessageDesc(function, 0, srcNum);
- unsigned jip0 = beforeMessage(insn, bti, flagTemp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, flagTemp, btiTmp, desc);
p->push();
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->ATOMIC(dst, function, src, GenRegister::addr1(0), srcNum);
p->pop();
- afterMessage(insn, bti, flagTemp, jip0);
+ afterMessage(insn, bti, flagTemp, btiTmp, jip0);
}
}
@@ -1920,9 +1921,10 @@ namespace gbe
p->UNTYPED_READ(dst, src, bti, elemNum);
} else {
const GenRegister tmp = ra->genReg(insn.dst(elemNum));
+ const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
unsigned desc = p->generateUntypedReadMessageDesc(0, elemNum);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -1930,17 +1932,17 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->UNTYPED_READ(dst, src, GenRegister::retype(GenRegister::addr1(0), GEN_TYPE_UD), elemNum);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
- unsigned GenContext::beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, unsigned desc) {
+ unsigned GenContext::beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, GenRegister btiTmp, unsigned desc) {
const GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
setFlag(flagReg, GenRegister::immuw(0));
p->CMP(GEN_CONDITIONAL_NZ, flagReg, GenRegister::immuw(1));
- GenRegister btiUD = ra->genReg(GenRegister::ud1grf(ir::ocl::btiUtil));
- GenRegister btiUW = ra->genReg(GenRegister::uw1grf(ir::ocl::btiUtil));
- GenRegister btiUB = ra->genReg(GenRegister::ub1grf(ir::ocl::btiUtil));
+ GenRegister btiUD = GenRegister::retype(btiTmp, GEN_TYPE_UD);
+ GenRegister btiUW = GenRegister::retype(btiTmp, GEN_TYPE_UW);
+ GenRegister btiUB = GenRegister::retype(btiTmp, GEN_TYPE_UB);
unsigned jip0 = p->n_instruction();
p->push();
p->curr.execWidth = 1;
@@ -1963,8 +1965,8 @@ namespace gbe
p->pop();
return jip0;
}
- void GenContext::afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, unsigned jip0) {
- const GenRegister btiUD = ra->genReg(GenRegister::ud1grf(ir::ocl::btiUtil));
+ void GenContext::afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister tmp, GenRegister btiTmp, unsigned jip0) {
+ const GenRegister btiUD = GenRegister::retype(btiTmp, GEN_TYPE_UD);
//restore flag
setFlag(GenRegister::flag(insn.state.flag, insn.state.subFlag), tmp);
// get active channel
@@ -1988,9 +1990,10 @@ namespace gbe
p->UNTYPED_READ(dst, src, bti, elemNum);
} else {
const GenRegister tmp = ra->genReg(insn.dst(elemNum));
+ const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
unsigned desc = p->generateUntypedReadMessageDesc(0, elemNum);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -1998,7 +2001,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->UNTYPED_READ(dst, src, GenRegister::retype(GenRegister::addr1(0), GEN_TYPE_UD), elemNum);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
@@ -2011,9 +2014,10 @@ namespace gbe
p->UNTYPED_WRITE(src, bti, elemNum*2);
} else {
const GenRegister tmp = ra->genReg(insn.dst(0));
+ const GenRegister btiTmp = ra->genReg(insn.dst(1));
unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -2021,7 +2025,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum*2);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
@@ -2033,9 +2037,10 @@ namespace gbe
p->UNTYPED_WRITE(src, bti, elemNum);
} else {
const GenRegister tmp = ra->genReg(insn.dst(0));
+ const GenRegister btiTmp = ra->genReg(insn.dst(1));
unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -2043,7 +2048,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
@@ -2057,9 +2062,10 @@ namespace gbe
p->BYTE_GATHER(dst, src, bti, elemSize);
} else {
const GenRegister tmp = ra->genReg(insn.dst(1));
+ const GenRegister btiTmp = ra->genReg(insn.dst(2));
unsigned desc = p->generateByteGatherMessageDesc(0, elemSize);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -2067,7 +2073,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->BYTE_GATHER(dst, src, GenRegister::addr1(0), elemSize);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
@@ -2080,9 +2086,10 @@ namespace gbe
p->BYTE_SCATTER(src, bti, elemSize);
} else {
const GenRegister tmp = ra->genReg(insn.dst(0));
+ const GenRegister btiTmp = ra->genReg(insn.dst(1));
unsigned desc = p->generateByteScatterMessageDesc(0, elemSize);
- unsigned jip0 = beforeMessage(insn, bti, tmp, desc);
+ unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
//predicated load
p->push();
@@ -2090,7 +2097,7 @@ namespace gbe
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
p->BYTE_SCATTER(src, GenRegister::addr1(0), elemSize);
p->pop();
- afterMessage(insn, bti, tmp, jip0);
+ afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index b03097e..155b68e 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -175,8 +175,8 @@ namespace gbe
virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
- unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, unsigned desc);
- void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, unsigned jip0);
+ unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned desc);
+ void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned jip0);
/*! Implements base class */
virtual Kernel *allocateKernel(void);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 57dbec9..d258beb 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -620,19 +620,19 @@ namespace gbe
/*! Wait instruction (used for the barrier) */
void WAIT(void);
/*! Atomic instruction */
- void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, GenRegister *flagTemp);
+ void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
/*! Read 64 bits float/int array */
- void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, GenRegister *flagTemp);
+ void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> temps);
/*! Write 64 bits float/int array */
- void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, GenRegister bti, bool native_long, GenRegister *flagTemp);
+ void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, GenRegister bti, bool native_long, vector<GenRegister> temps);
/*! Untyped read (up to 4 elements) */
- void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, GenRegister bti, GenRegister *flagTemp);
+ void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, GenRegister bti, vector<GenRegister> temps);
/*! Untyped write (up to 4 elements) */
- void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, GenRegister bti, GenRegister *flagTemp);
+ void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, GenRegister bti, vector<GenRegister> temps);
/*! Byte gather (for unaligned bytes, shorts and ints) */
- void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp);
+ void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, vector<GenRegister> temps);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
- void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp);
+ void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, vector <GenRegister> temps);
/*! DWord scatter (for constant cache read) */
void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
/*! Unpack the uint to charN */
@@ -736,6 +736,15 @@ namespace gbe
GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
}
+ INLINE vector<GenRegister> getBTITemps(const ir::BTI &bti) {
+ vector<GenRegister> temps;
+ if (!bti.isConst) {
+ temps.push_back(selReg(reg(ir::FAMILY_WORD, true), ir::TYPE_U16));
+ temps.push_back(selReg(reg(ir::FAMILY_DWORD, true), ir::TYPE_U32));
+ }
+ return temps;
+ }
+
/*! Use custom allocators */
GBE_CLASS(Opaque);
friend class SelectionBlock;
@@ -1228,9 +1237,10 @@ namespace gbe
}
void Selection::Opaque::ATOMIC(Reg dst, uint32_t function,
- uint32_t srcNum, Reg src0,
- Reg src1, Reg src2, GenRegister bti, GenRegister *flagTemp) {
- unsigned dstNum = flagTemp == NULL ? 1 : 2;
+ uint32_t srcNum, Reg src0,
+ Reg src1, Reg src2, GenRegister bti,
+ vector<GenRegister> temps) {
+ unsigned dstNum = 1 + temps.size();
SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum, srcNum + 1);
if (bti.file != GEN_IMMEDIATE_VALUE) {
@@ -1239,7 +1249,10 @@ namespace gbe
}
insn->dst(0) = dst;
- if(flagTemp) insn->dst(1) = *flagTemp;
+ if(temps.size()) {
+ insn->dst(1) = temps[0];
+ insn->dst(2) = temps[1];
+ }
insn->src(0) = src0;
if(srcNum > 1) insn->src(1) = src1;
@@ -1265,14 +1278,14 @@ namespace gbe
uint32_t elemNum,
const GenRegister bti,
bool native_long,
- GenRegister *flagTemp)
+ vector<GenRegister> temps)
{
SelectionInstruction *insn = NULL;
SelectionVector *srcVector = NULL;
SelectionVector *dstVector = NULL;
if (!native_long) {
- unsigned dstNum = flagTemp == NULL ? elemNum : elemNum+1;
+ unsigned dstNum = elemNum + temps.size();
insn = this->appendInsn(SEL_OP_READ64, dstNum, 2);
srcVector = this->appendVector();
dstVector = this->appendVector();
@@ -1281,10 +1294,12 @@ namespace gbe
insn->dst(elemID) = dst[elemID];
// flagTemp don't need to be put in SelectionVector
- if (flagTemp)
- insn->dst(elemNum) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(elemNum) = temps[0];
+ insn->dst(elemNum + 1) = temps[1];
+ }
} else {
- unsigned dstNum = flagTemp == NULL ? elemNum*2 : elemNum*2+1;
+ unsigned dstNum = elemNum*2 + temps.size();
insn = this->appendInsn(SEL_OP_READ64, dstNum, 2);
srcVector = this->appendVector();
dstVector = this->appendVector();
@@ -1296,8 +1311,10 @@ namespace gbe
insn->dst(elemID + elemNum) = dst[elemID];
// flagTemp don't need to be put in SelectionVector
- if (flagTemp)
- insn->dst(2*elemNum) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(2*elemNum) = temps[0];
+ insn->dst(2*elemNum + 1) = temps[1];
+ }
}
if (bti.file != GEN_IMMEDIATE_VALUE) {
@@ -1325,9 +1342,9 @@ namespace gbe
const GenRegister *dst,
uint32_t elemNum,
GenRegister bti,
- GenRegister *flagTemp)
+ vector<GenRegister> temps)
{
- unsigned dstNum = flagTemp == NULL ? elemNum : elemNum+1;
+ unsigned dstNum = elemNum + temps.size();
SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_READ, dstNum, 2);
SelectionVector *srcVector = this->appendVector();
SelectionVector *dstVector = this->appendVector();
@@ -1336,8 +1353,10 @@ namespace gbe
// Regular instruction to encode
for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
insn->dst(elemID) = dst[elemID];
- if (flagTemp)
- insn->dst(elemNum) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(elemNum) = temps[0];
+ insn->dst(elemNum + 1) = temps[1];
+ }
insn->src(0) = addr;
insn->src(1) = bti;
@@ -1366,13 +1385,13 @@ namespace gbe
uint32_t srcNum,
GenRegister bti,
bool native_long,
- GenRegister *flagTemp)
+ vector<GenRegister> temps)
{
SelectionVector *vector = NULL;
SelectionInstruction *insn = NULL;
if (!native_long) {
- unsigned dstNum = flagTemp == NULL ? 0 : 1;
+ unsigned dstNum = temps.size();
insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum + 2);
vector = this->appendVector();
// Register layout:
@@ -1383,8 +1402,10 @@ namespace gbe
insn->src(elemID + 1) = src[elemID];
insn->src(srcNum+1) = bti;
- if (flagTemp)
- insn->dst(0) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(0) = temps[0];
+ insn->dst(1) = temps[1];
+ }
insn->extra.elem = srcNum;
vector->regNum = srcNum + 1;
@@ -1392,7 +1413,7 @@ namespace gbe
vector->reg = &insn->src(0);
vector->isSrc = 1;
} else { // handle the native long case
- unsigned dstNum = flagTemp == NULL ? srcNum : srcNum+1;
+ unsigned dstNum = srcNum + temps.size();
// Register layout:
// dst: srcNum, (flagTemp)
// src: srcNum, addr, srcNum, bti.
@@ -1412,8 +1433,10 @@ namespace gbe
for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
insn->dst(elemID) = tmp[0];
- if (flagTemp)
- insn->dst(srcNum) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(srcNum) = temps[0];
+ insn->dst(srcNum + 1) = temps[1];
+ }
insn->extra.elem = srcNum;
vector->regNum = srcNum + 1;
@@ -1432,10 +1455,11 @@ namespace gbe
const GenRegister *src,
uint32_t elemNum,
GenRegister bti,
- GenRegister *flagTemp)
+ vector<GenRegister> temps)
{
- unsigned dstNum = flagTemp == NULL ? 0 : 1;
- SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, dstNum, elemNum+2);
+ unsigned dstNum = temps.size();
+ unsigned srcNum = elemNum + 2 + temps.size();
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, dstNum, srcNum);
SelectionVector *vector = this->appendVector();
if (bti.file != GEN_IMMEDIATE_VALUE) {
@@ -1443,14 +1467,17 @@ namespace gbe
insn->state.subFlag = 1;
}
- if (flagTemp) insn->dst(0) = *flagTemp;
// Regular instruction to encode
insn->src(0) = addr;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
insn->src(elemID+1) = src[elemID];
insn->src(elemNum+1) = bti;
- if (flagTemp)
- insn->src(elemNum+2) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(0) = temps[0];
+ insn->dst(1) = temps[1];
+ insn->src(elemNum + 2) = temps[0];
+ insn->src(elemNum + 3) = temps[1];
+ }
insn->extra.elem = elemNum;
// Sends require contiguous allocation for the sources
@@ -1460,8 +1487,11 @@ namespace gbe
vector->isSrc = 1;
}
- void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp) {
- unsigned dstNum = flagTemp == NULL ? 1 : 2;
+ void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr,
+ uint32_t elemSize,
+ GenRegister bti,
+ vector<GenRegister> temps) {
+ unsigned dstNum = 1 + temps.size();
SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_GATHER, dstNum, 2);
SelectionVector *srcVector = this->appendVector();
SelectionVector *dstVector = this->appendVector();
@@ -1477,8 +1507,10 @@ namespace gbe
insn->src(0) = addr;
insn->src(1) = bti;
insn->dst(0) = dst;
- if (flagTemp)
- insn->dst(1) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(1) = temps[0];
+ insn->dst(2) = temps[1];
+ }
insn->extra.elem = elemSize;
@@ -1494,8 +1526,9 @@ namespace gbe
srcVector->reg = &insn->src(0);
}
- void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, GenRegister *flagTemp) {
- unsigned dstNum = flagTemp == NULL ? 0 : 1;
+ void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize,
+ GenRegister bti, vector<GenRegister> temps) {
+ unsigned dstNum = temps.size();
SelectionInstruction *insn = this->appendInsn(SEL_OP_BYTE_SCATTER, dstNum, 3);
SelectionVector *vector = this->appendVector();
@@ -1504,8 +1537,10 @@ namespace gbe
insn->state.subFlag = 1;
}
- if (flagTemp)
- insn->dst(0) = *flagTemp;
+ if (temps.size()) {
+ insn->dst(0) = temps[0];
+ insn->dst(1) = temps[1];
+ }
// Instruction to encode
insn->src(0) = addr;
insn->src(1) = src;
@@ -3321,8 +3356,7 @@ namespace gbe
//GenRegister temp = getRelativeAddress(sel, addr, sel.selReg(bti.base, ir::TYPE_U32));
GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
- GenRegister tmp = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
- sel.UNTYPED_READ(addr, dst.data(), valueNum, b, bti.isConst ? NULL : &tmp);
+ sel.UNTYPED_READ(addr, dst.data(), valueNum, b, sel.getBTITemps(bti));
}
void emitUntypedRead(Selection::Opaque &sel,
@@ -3383,7 +3417,6 @@ namespace gbe
GBE_ASSERT(bti.isConst == 1);
vector<GenRegister> dst(valueNum);
GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
- GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
@@ -3393,9 +3426,9 @@ namespace gbe
tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
}
- sel.READ64(addr, dst.data(), tmp.data(), valueNum, b, true, bti.isConst ? NULL : &tmpFlag);
+ sel.READ64(addr, dst.data(), tmp.data(), valueNum, b, true, sel.getBTITemps(bti));
} else {
- sel.READ64(addr, dst.data(), NULL, valueNum, b, false, bti.isConst ? NULL : &tmpFlag);
+ sel.READ64(addr, dst.data(), NULL, valueNum, b, false, sel.getBTITemps(bti));
}
}
@@ -3412,7 +3445,6 @@ namespace gbe
GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32);
GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
- GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
// Get dword aligned addr
sel.push();
@@ -3425,7 +3457,7 @@ namespace gbe
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.UNTYPED_READ(tmpAddr, &tmpData, 1, b, bti.isConst ? NULL : &tmpFlag);
+ sel.UNTYPED_READ(tmpAddr, &tmpData, 1, b, sel.getBTITemps(bti));
if (isUniform)
sel.curr.execWidth = 1;
@@ -3593,14 +3625,13 @@ namespace gbe
readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
else {
GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
- GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
// We need a temporary register if we read bytes or words
Register dst = sel.reg(FAMILY_DWORD, isUniform);
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address, elemSize, b, bti.isConst ? NULL : & tmpFlag);
+ sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address, elemSize, b, sel.getBTITemps(bti));
sel.pop();
sel.push();
@@ -3691,6 +3722,7 @@ namespace gbe
return true;
}
};
+
class StoreInstructionPattern : public SelectionPattern
{
public:
@@ -3705,13 +3737,12 @@ namespace gbe
{
using namespace ir;
const uint32_t valueNum = insn.getValueNum();
- vector<GenRegister> value(valueNum);
+ vector<GenRegister> value(valueNum), tmps;
GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_UD);
- GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, true), ir::TYPE_U16);
- sel.UNTYPED_WRITE(address, value.data(), valueNum, b, bti.isConst? NULL : &tmp);
+ sel.UNTYPED_WRITE(address, value.data(), valueNum, b, sel.getBTITemps(bti));
}
void emitWrite64(Selection::Opaque &sel,
@@ -3729,16 +3760,14 @@ namespace gbe
for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
- GenRegister tmpFlag = sel.selReg(sel.reg(FAMILY_WORD, true), ir::TYPE_U16);
-
if (sel.hasLongType()) {
vector<GenRegister> tmp(valueNum);
for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
}
- sel.WRITE64(address, src.data(), tmp.data(), valueNum, b, true, bti.isConst? NULL : &tmpFlag);
+ sel.WRITE64(address, src.data(), tmp.data(), valueNum, b, true, sel.getBTITemps(bti));
} else {
- sel.WRITE64(address, src.data(), NULL, valueNum, b, false, bti.isConst? NULL : &tmpFlag);
+ sel.WRITE64(address, src.data(), NULL, valueNum, b, false, sel.getBTITemps(bti));
}
}
@@ -3753,7 +3782,6 @@ namespace gbe
uint32_t valueNum = insn.getValueNum();
GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
- GenRegister tmpFlag = sel.selReg(sel.reg(FAMILY_WORD, true), ir::TYPE_U16);
if(valueNum > 1) {
const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
vector<GenRegister> value(valueNum);
@@ -3773,7 +3801,7 @@ namespace gbe
sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, typeSize, 4/typeSize);
}
- sel.UNTYPED_WRITE(address, tmp.data(), tmpRegNum, b, bti.isConst ? NULL : &tmpFlag);
+ sel.UNTYPED_WRITE(address, tmp.data(), tmpRegNum, b, sel.getBTITemps(bti));
} else {
const GenRegister value = sel.selReg(insn.getValue(0));
GBE_ASSERT(insn.getValueNum() == 1);
@@ -3790,7 +3818,7 @@ namespace gbe
else if (elemSize == GEN_BYTE_SCATTER_BYTE)
sel.MOV(tmp, GenRegister::retype(value, GEN_TYPE_UB));
sel.pop();
- sel.BYTE_SCATTER(address, tmp, elemSize, b, bti.isConst ? NULL : &tmpFlag);
+ sel.BYTE_SCATTER(address, tmp, elemSize, b, sel.getBTITemps(bti));
}
}
@@ -4558,11 +4586,9 @@ namespace gbe
if(srcNum > 2) src1 = sel.selReg(insn.getSrc(2), TYPE_U32);
if(srcNum > 3) src2 = sel.selReg(insn.getSrc(3), TYPE_U32);
- GenRegister flagTemp = sel.selReg(sel.reg(FAMILY_WORD, true), TYPE_U16);
-
GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
- sel.ATOMIC(dst, genAtomicOp, opNum, src0, src1, src2, bti, b.isConst ? NULL : &flagTemp);
+ sel.ATOMIC(dst, genAtomicOp, opNum, src0, src1, src2, bti, sel.getBTITemps(b));
// for fixed bti, don't generate the useless loadi
if (insn.isFixedBTI())
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index c3d5e29..4430ca5 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -1039,8 +1039,6 @@ namespace gbe
// FIXME stack buffer is not used, we may need to remove it in the furture.
if (curbeType == GBE_CURBE_EXTRA_ARGUMENT && subType == GBE_STACK_BUFFER)
intervals[regID].maxID = 1;
- if (curbeType == GBE_CURBE_BTI_UTIL)
- intervals[regID].maxID = INT_MAX;
}
}
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 0ba0bd5..0758820 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -100,7 +100,6 @@ enum gbe_curbe_type {
GBE_CURBE_THREAD_NUM,
GBE_CURBE_ZERO,
GBE_CURBE_ONE,
- GBE_CURBE_BTI_UTIL,
GBE_GEN_REG,
};
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 75522eb..484e82d 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -43,8 +43,7 @@ namespace ir {
"zero", "one",
"retVal",
"printf_buffer_pointer", "printf_index_buffer_pointer",
- "dwblockip",
- "bti_utility"
+ "dwblockip"
};
#if GBE_DEBUG
@@ -87,7 +86,6 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0, GBE_CURBE_DW_BLOCK_IP);
- DECL_NEW_REG(FAMILY_DWORD, btiUtil, 1, GBE_CURBE_BTI_UTIL);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index b3f2a21..a8445c4 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -71,8 +71,7 @@ namespace ir {
static const Register printfbptr = Register(27); // printf buffer address .
static const Register printfiptr = Register(28); // printf index buffer address.
static const Register dwblockip = Register(29); // blockip
- static const Register btiUtil = Register(30); // used for mixed pointer as bti utility.
- static const uint32_t regNum = 31; // number of special registers
+ static const uint32_t regNum = 30; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
--
1.9.1
More information about the Beignet
mailing list