[Beignet] [PATCH 5/5] GBE: use S16 vector to represent bool.
Zhigang Gong
zhigang.gong at intel.com
Thu Mar 13 18:44:00 PDT 2014
The original purpose of using flag or a S16 scalar to represent
a bool data type is to save register usage. But that bring too
much complex to handle it correctly in each possible case. And
the consequent is we have to take too much care about the bool's
handling in many places in the instruction selection stage. We
even never handle all the cases correctly. The hardest part is
that we can't just touch part of the bit in a S16 scalar register.
There is no instruction to support that. So if a bool is from
another BB, or even the bool is from the same BB but there is
a backward JMP and the bool is still a possible livein register,
thus we need to make some instructions to keep the inactive lane's
bit the original value.
I change to use a S16 vector to represent bool type, then all
the complicate cases are gone. And the only big side effect is
that the register consumption. But considering that a real
application will not have many bools active concurrently, this
may not be a big issue.
I measured the performance impact by using luxmark. And only
observed 2%-3% perfomance regression. There are some easy
performance optimization opportunity remains such as reduce
the unecessary MOVs between flag and bool within the same
block. I think this performance regression should be not a
big deal. Especially, this change will make the following if/endif
optimization a little bit easier.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_context.cpp | 10 +-
backend/src/backend/gen_encoder.cpp | 9 +-
backend/src/backend/gen_encoder.hpp | 2 +-
backend/src/backend/gen_insn_selection.cpp | 236 +++++++++++------------------
backend/src/backend/gen_reg_allocation.cpp | 18 +--
5 files changed, 106 insertions(+), 169 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 51c6c97..88d4866 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1032,6 +1032,7 @@ namespace gbe
GenRegister tmp0 = ra->genReg(insn.dst(0));
GenRegister tmp1 = ra->genReg(insn.dst(1));
GenRegister tmp2 = ra->genReg(insn.dst(2));
+ GenRegister dst = ra->genReg(insn.dst(3));
tmp0.type = (src0.type == GEN_TYPE_L) ? GEN_TYPE_D : GEN_TYPE_UD;
tmp1.type = (src1.type == GEN_TYPE_L) ? GEN_TYPE_D : GEN_TYPE_UD;
int flag = p->curr.flag, subFlag = p->curr.subFlag;
@@ -1106,6 +1107,12 @@ namespace gbe
p->AND(f1, f1, f4);
p->MOV(GenRegister::flag(flag, subFlag), f1);
p->pop();
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MOV(dst, GenRegister::immd(0));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(dst, GenRegister::immd(-1));
+ p->pop();
}
void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) {
@@ -1589,8 +1596,9 @@ namespace gbe
void GenContext::emitCompareInstruction(const SelectionInstruction &insn) {
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
+ const GenRegister dst = ra->genReg(insn.dst(0));
if (insn.opcode == SEL_OP_CMP)
- p->CMP(insn.extra.function, src0, src1);
+ p->CMP(insn.extra.function, src0, src1, dst);
else {
GBE_ASSERT(insn.opcode == SEL_OP_SEL_CMP);
const GenRegister dst = ra->genReg(insn.dst(0));
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 0664d77..9853a56 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1088,12 +1088,13 @@ namespace gbe
}
}
- void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1) {
+ void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
if (needToSplitCmp(this, src0, src1) == false) {
GenInstruction *insn = this->next(GEN_OPCODE_CMP);
this->setHeader(insn);
insn->header.destreg_or_condmod = conditional;
- this->setDst(insn, GenRegister::null());
+ insn->header.thread_control = GEN_THREAD_SWITCH;
+ this->setDst(insn, dst);
this->setSrc0(insn, src0);
this->setSrc1(insn, src1);
} else {
@@ -1105,7 +1106,7 @@ namespace gbe
insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
insnQ1->header.execution_size = GEN_WIDTH_8;
insnQ1->header.destreg_or_condmod = conditional;
- this->setDst(insnQ1, GenRegister::null());
+ this->setDst(insnQ1, dst);
this->setSrc0(insnQ1, src0);
this->setSrc1(insnQ1, src1);
@@ -1115,7 +1116,7 @@ namespace gbe
insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
insnQ2->header.execution_size = GEN_WIDTH_8;
insnQ2->header.destreg_or_condmod = conditional;
- this->setDst(insnQ2, GenRegister::null());
+ this->setDst(insnQ2, GenRegister::Qn(dst, 1));
this->setSrc0(insnQ2, GenRegister::Qn(src0, 1));
this->setSrc1(insnQ2, GenRegister::Qn(src1, 1));
}
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 094a5c2..8d9a497 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -135,7 +135,7 @@ namespace gbe
/*! Jump indexed instruction */
void JMPI(GenRegister src);
/*! Compare instructions */
- void CMP(uint32_t conditional, GenRegister src0, GenRegister src1);
+ void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
/*! Select with embedded compare (like sel.le ...) */
void SEL_CMP(uint32_t conditional, GenRegister dst, GenRegister src0, GenRegister src1);
/*! EOT is used to finish GPGPU threads */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b9d4e23..7555d10 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -388,9 +388,9 @@ namespace gbe
/*! Create a new register in the register file and append it in the
* temporary list of the current block
*/
- INLINE ir::Register reg(ir::RegisterFamily family) {
+ INLINE ir::Register reg(ir::RegisterFamily family, bool scalar = false) {
GBE_ASSERT(block != NULL);
- const ir::Register reg = file.append(family);
+ const ir::Register reg = file.append(family, scalar);
block->append(reg);
return reg;
}
@@ -525,7 +525,7 @@ namespace gbe
/*! Shift a 64-bit integer */
void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]);
/*! Compare 64-bit integer */
- void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]);
+ void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3], Reg dst = GenRegister::null());
/*! Saturated addition of 64-bit integer */
void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
/*! Saturated subtraction of 64-bit integer */
@@ -539,7 +539,7 @@ namespace gbe
/*! Jump indexed instruction */
void JMPI(Reg src, ir::LabelIndex target);
/*! Compare instructions */
- void CMP(uint32_t conditional, Reg src0, Reg src1);
+ void CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst = GenRegister::null());
/*! Select instruction with embedded comparison */
void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
/* Constant buffer move instruction */
@@ -895,10 +895,7 @@ namespace gbe
bool Selection::Opaque::isScalarOrBool(ir::Register reg) const {
if (isScalarReg(reg))
return true;
- else {
- const ir::RegisterFamily family = file.get(reg).family;
- return family == ir::FAMILY_BOOL;
- }
+ return false;
}
#define SEL_REG(SIMD16, SIMD8, SIMD1) \
@@ -918,7 +915,7 @@ namespace gbe
const RegisterData data = file.get(reg);
const RegisterFamily family = data.family;
switch (family) {
- case FAMILY_BOOL: SEL_REG(uw1grf, uw1grf, uw1grf); break;
+ case FAMILY_BOOL: SEL_REG(uw16grf, uw8grf, uw1grf); break;
case FAMILY_WORD: SEL_REG(uw16grf, uw8grf, uw1grf); break;
case FAMILY_BYTE: SEL_REG(ub16grf, ub8grf, ub1grf); break;
case FAMILY_DWORD: SEL_REG(f16grf, f8grf, f1grf); break;
@@ -963,10 +960,11 @@ namespace gbe
insn->index = uint16_t(index);
}
- void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_CMP, 0, 2);
+ void Selection::Opaque::CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CMP, 1, 2);
insn->src(0) = src0;
insn->src(1) = src1;
+ insn->dst(0) = dst;
insn->extra.function = conditional;
}
@@ -1246,12 +1244,13 @@ namespace gbe
insn->src(2) = src2;
}
- void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 3, 2);
+ void Selection::Opaque::I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3], Reg dst) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64CMP, 4, 2);
insn->src(0) = src0;
insn->src(1) = src1;
for(int i=0; i<3; i++)
insn->dst(i) = tmp[i];
+ insn->dst(3) = dst;
insn->extra.function = conditional;
}
@@ -1667,25 +1666,7 @@ namespace gbe
}
break;
case ir::OP_MOV:
- if(insn.getType() == ir::TYPE_BOOL) {
- GenRegister flagReg;
- uint32_t predicate = sel.curr.predicate;
- sel.push();
- sel.curr.execWidth = 1;
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.noMask = 1;
- if(predicate == GEN_PREDICATE_NONE)
- sel.MOV(dst, src);
- else {
- if(sel.curr.physicalFlag)
- flagReg = GenRegister::flag(sel.curr.flag, sel.curr.subFlag);
- else
- flagReg = sel.selReg(ir::Register(sel.curr.flagIndex), ir::TYPE_U16);
-
- sel.AND(dst, flagReg, src);
- }
- sel.pop();
- } else if (dst.isdf()) {
+ if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r));
} else
@@ -1770,7 +1751,7 @@ namespace gbe
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL, true));
if(op == OP_DIV)
sel.I64DIV(dst, src0, src1, tmp);
else
@@ -1859,7 +1840,7 @@ namespace gbe
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64SATADD(dst, src0, src1, tmp);
break;
}
@@ -1900,7 +1881,7 @@ namespace gbe
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64SATSUB(dst, src0, src1, tmp);
break;
}
@@ -1914,7 +1895,7 @@ namespace gbe
GenRegister tmp[7];
for(int i = 0; i < 6; i ++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64SHL(dst, src0, src1, tmp);
} else
sel.SHL(dst, src0, src1);
@@ -1924,7 +1905,7 @@ namespace gbe
GenRegister tmp[7];
for(int i = 0; i < 6; i ++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64SHR(dst, src0, src1, tmp);
} else
sel.SHR(dst, src0, src1);
@@ -1934,7 +1915,7 @@ namespace gbe
GenRegister tmp[7];
for(int i = 0; i < 6; i ++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64ASR(dst, src0, src1, tmp);
} else
sel.ASR(dst, src0, src1);
@@ -1951,7 +1932,7 @@ namespace gbe
temp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
temp[i].type = GEN_TYPE_UD;
}
- temp[9] = sel.selReg(sel.reg(FAMILY_BOOL));
+ temp[9] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64_MUL_HI(dst, src0, src1, temp);
break;
}
@@ -2316,26 +2297,13 @@ namespace gbe
sel.push();
if (sel.isScalarOrBool(insn.getDst(0)) == true) {
sel.curr.execWidth = 1;
- if(type == TYPE_BOOL) {
- if(imm.data.b) {
- if(sel.curr.predicate == GEN_PREDICATE_NONE)
- flagReg = GenRegister::immuw(0xffff);
- else {
- if(sel.curr.physicalFlag)
- flagReg = GenRegister::flag(sel.curr.flag, sel.curr.subFlag);
- else
- flagReg = sel.selReg(Register(sel.curr.flagIndex), TYPE_U16);
- }
- } else
- flagReg = GenRegister::immuw(0x0);
- }
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.noMask = 1;
}
switch (type) {
case TYPE_BOOL:
- sel.MOV(dst, flagReg);
+ sel.MOV(dst, imm.data.b ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
break;
case TYPE_U32:
case TYPE_S32:
@@ -2367,24 +2335,22 @@ namespace gbe
using namespace ir;
const ir::Register reg = sel.reg(FAMILY_DWORD);
const GenRegister barrierMask = sel.selReg(ocl::barriermask, TYPE_BOOL);
- const GenRegister tempFlag = sel.selReg(sel.reg(FAMILY_BOOL), TYPE_BOOL);
- const GenRegister flagReg = GenRegister::flag(0, 0);
const uint32_t params = insn.getParameters();
sel.push();
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.noMask = 1;
sel.curr.execWidth = 1;
- sel.OR(barrierMask, flagReg, barrierMask);
- sel.MOV(tempFlag, barrierMask);
+ sel.OR(barrierMask, GenRegister::flag(0, 0), barrierMask);
+ sel.MOV(GenRegister::flag(1, 1), barrierMask);
sel.pop();
// A barrier is OK to start the thread synchronization *and* SLM fence
sel.push();
- //sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.flagIndex = (uint16_t)tempFlag.value.reg;
- sel.curr.physicalFlag = 0;
- sel.BARRIER(GenRegister::ud8grf(reg), sel.selReg(sel.reg(FAMILY_DWORD)), params);
+ //sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.flag = 1;
+ sel.curr.subFlag = 1;
+ sel.BARRIER(GenRegister::ud8grf(reg), sel.selReg(sel.reg(FAMILY_DWORD)), params);
sel.pop();
return true;
}
@@ -2690,33 +2656,12 @@ namespace gbe
const Opcode opcode = insn.getOpcode();
const Type type = insn.getType();
const Register dst = insn.getDst(0);
- Register tmpDst;
+ GenRegister tmpDst;
- const ir::BasicBlock *insnBlock = insn.getParent();
- const ir::Liveness &liveness = sel.ctx.getLiveness();
- const ir::Liveness::UEVar &livein = liveness.getLiveIn(insnBlock);
- if (!livein.contains(dst))
- tmpDst = dst;
+ if (type == TYPE_BOOL || type == TYPE_U16 || type == TYPE_S16)
+ tmpDst = sel.selReg(sel.reg(FAMILY_WORD), TYPE_BOOL);
else
- tmpDst = sel.reg(FAMILY_BOOL);
-
- // Limit the compare to the active lanes. Use the same compare as for f0.0
- sel.push();
- const LabelIndex label = insn.getParent()->getLabelIndex();
- const GenRegister blockip = sel.selReg(ocl::blockip, TYPE_U16);
- const GenRegister labelReg = GenRegister::immuw(label);
-
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = uint16_t(tmpDst);
- if (tmpDst != dst) {
- sel.CMP(GEN_CONDITIONAL_G, blockip, labelReg);
- sel.curr.execWidth = 1;
- sel.AND(sel.selReg(dst, TYPE_BOOL), sel.selReg(dst, TYPE_BOOL), sel.selReg(tmpDst, TYPE_BOOL));
- sel.XOR(sel.selReg(tmpDst, TYPE_BOOL), sel.selReg(tmpDst, TYPE_BOOL), GenRegister::immuw(0xFFFF));
- } else
- sel.CMP(GEN_CONDITIONAL_LE, blockip, labelReg);
- sel.pop();
+ tmpDst = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_S32);
// Look for immediate values for the right source
GenRegister src0, src1;
@@ -2740,26 +2685,38 @@ namespace gbe
}
sel.push();
- sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = uint16_t(tmpDst);
+ sel.curr.flag = 1;
+ sel.curr.subFlag = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
if (type == TYPE_S64 || type == TYPE_U64) {
GenRegister tmp[3];
for(int i=0; i<3; i++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- sel.I64CMP(getGenCompare(opcode), src0, src1, tmp);
+ sel.push();
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.MOV(GenRegister::flag(1, 1), GenRegister::flag(0, 0));
+ sel.pop();
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
+ sel.I64CMP(getGenCompare(opcode), src0, src1, tmp, tmpDst);
} else if(opcode == OP_ORD) {
- sel.CMP(GEN_CONDITIONAL_EQ, src0, src0);
- sel.CMP(GEN_CONDITIONAL_EQ, src1, src1);
+ sel.push();
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.MOV(GenRegister::flag(1, 1), GenRegister::flag(0, 0));
+ sel.pop();
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
+
+ sel.CMP(GEN_CONDITIONAL_EQ, src0, src0, tmpDst);
+ sel.CMP(GEN_CONDITIONAL_EQ, src1, src1, tmpDst);
} else
- sel.CMP(getGenCompare(opcode), src0, src1);
+ sel.CMP(getGenCompare(opcode), src0, src1, tmpDst);
sel.pop();
- if (tmpDst != dst) {
- sel.push();
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.execWidth = 1;
- sel.OR(sel.selReg(dst, TYPE_U16), sel.selReg(dst, TYPE_U16), sel.selReg(tmpDst, TYPE_U16));
- sel.pop();
- }
+
+ if (!(type == TYPE_BOOL || type == TYPE_U16 || type == TYPE_S16))
+ sel.MOV(sel.selReg(dst, TYPE_U16), GenRegister::unpacked_uw((ir::Register)tmpDst.value.reg));
+ else
+ sel.MOV(sel.selReg(dst, TYPE_U16), tmpDst);
return true;
}
};
@@ -2893,7 +2850,7 @@ namespace gbe
for(int i=0; i<6; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
}
- tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL), TYPE_BOOL);
+ tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL, true), TYPE_BOOL);
sel.CONVI64_TO_F(dst, src, tmp);
} else if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
@@ -2905,7 +2862,7 @@ namespace gbe
GenRegister tmp[3];
tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
- tmp[2] = sel.selReg(sel.reg(FAMILY_BOOL), TYPE_BOOL);
+ tmp[2] = sel.selReg(sel.reg(FAMILY_BOOL, true), TYPE_BOOL);
sel.CONVF_TO_I64(dst, src, tmp);
break;
}
@@ -2993,11 +2950,13 @@ namespace gbe
const uint32_t simdWidth = sel.ctx.getSimdWidth();
const Register pred = insn.getPredicate();
sel.push();
- sel.curr.predicate = GEN_PREDICATE_NORMAL;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.execWidth = simdWidth;
- sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = uint16_t(pred);
+ sel.curr.flag = 1;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, sel.selReg(pred, TYPE_U16), GenRegister::immuw(0));
sel.curr.noMask = 0;
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
if(type == ir::TYPE_S64 || type == ir::TYPE_U64)
sel.SEL_INT64(tmp, src0, src1);
else
@@ -3027,7 +2986,7 @@ namespace gbe
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
- tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL));
+ tmp[9] = sel.selReg(sel.reg(FAMILY_BOOL, true));
sel.I64MADSAT(dst, src0, src1, src2, tmp);
break;
}
@@ -3230,37 +3189,6 @@ namespace gbe
DECL_PATTERN(BranchInstruction)
{
- // Get active pred.
- const ir::Register getActivePred(Selection::Opaque &sel,
- const ir::BranchInstruction &insn,
- const ir::Register pred) const
- {
- using namespace ir;
- GenRegister flagReg;
- Register activePred;
- const ir::BasicBlock *insnBlock = insn.getParent();
- const ir::Liveness &liveness = sel.ctx.getLiveness();
- const ir::Liveness::UEVar &livein = liveness.getLiveIn(insnBlock);
-
- /* If the pred is not in the livein set, then this pred should be defined
- in this block and we don't need to validate it. */
- if (!livein.contains(pred))
- return pred;
-
- activePred = sel.reg(FAMILY_BOOL);
- sel.push();
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.execWidth = 1;
- sel.curr.noMask = 1;
- if(sel.curr.physicalFlag)
- flagReg = GenRegister::flag(sel.curr.flag, sel.curr.subFlag);
- else
- flagReg = sel.selReg(ir::Register(sel.curr.flagIndex), ir::TYPE_U16);
- sel.AND(sel.selReg(activePred, TYPE_U16), flagReg, sel.selReg(pred, TYPE_U16));
- sel.pop();
- return activePred;
- }
-
void emitForwardBranch(Selection::Opaque &sel,
const ir::BranchInstruction &insn,
ir::LabelIndex dst,
@@ -3278,15 +3206,20 @@ namespace gbe
if (insn.isPredicated() == true) {
const Register pred = insn.getPredicateIndex();
- const Register activePred = getActivePred(sel, insn, pred);
- // Update the PcIPs
sel.push();
// we don't need to set next label to the pcip
// as if there is no backward jump latter, then obviously everything will work fine.
// If there is backward jump latter, then all the pcip will be updated correctly there.
- sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = uint16_t(activePred);
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 1;
+ sel.MOV(GenRegister::flag(1, 1), GenRegister::flag(0, 0));
+ sel.pop();
+ sel.push();
+ sel.curr.flag = 1;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, sel.selReg(pred, TYPE_U16), GenRegister::immuw(0));
sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
sel.pop();
@@ -3299,7 +3232,8 @@ namespace gbe
// will check those bits as well.
sel.push();
- sel.curr.flag = 0;
+ //sel.curr.physicalFlag = 0;
+ sel.curr.flag = 1;
sel.curr.subFlag = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel));
@@ -3354,27 +3288,33 @@ namespace gbe
if (insn.isPredicated() == true) {
const Register pred = insn.getPredicateIndex();
- const Register activePred = getActivePred(sel, insn, pred);
// Update the PcIPs for all the branches. Just put the IPs of the next
- // block. Next instruction will properly reupdate the IPs of the lanes
+ // block. Next instruction will properly update the IPs of the lanes
// that actually take the branch
const LabelIndex next = bb.getNextBlock()->getLabelIndex();
sel.MOV(ip, GenRegister::immuw(uint16_t(next)));
sel.push();
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 1;
+ sel.MOV(GenRegister::flag(1, 1), GenRegister::flag(0, 0));
+ sel.pop();
+ sel.push();
+ sel.curr.flag = 1;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_NEQ, sel.selReg(pred, TYPE_U16), GenRegister::immuw(0));
// Re-update the PcIPs for the branches that takes the backward jump
- sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = uint16_t(activePred);
sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
- // We clear all the inactive channel to 0 as the GEN_PREDICATE_ALIGN1_ANY8/16
- // will check those bits as well.
+ // We clear all the inactive channel to 0 as the GEN_PREDICATE_ALIGN1_ANY8/16
+ // will check those bits as well.
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.execWidth = 1;
sel.curr.noMask = 1;
GenRegister emaskReg = GenRegister::uw1grf(ocl::emask);
- sel.AND(sel.selReg(activePred, TYPE_U16), sel.selReg(activePred, TYPE_U16), emaskReg);
+ sel.AND(GenRegister::flag(0, 1), GenRegister::flag(0, 1), emaskReg);
// Branch to the jump target
if (simdWidth == 8)
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 9dfe7c6..2935f36 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -223,7 +223,6 @@ namespace gbe
const ir::Register reg = interval.reg;
if (RA.contains(reg) == true)
return true; // already allocated
- GBE_ASSERT(ctx.sel->isScalarReg(reg) == false);
uint32_t regSize;
ir::RegisterFamily family;
getRegAttrib(reg, regSize, &family);
@@ -298,7 +297,6 @@ namespace gbe
intervals.push_back(tmp);
intervals[tmp].minID = vector->insn->ID;
intervals[tmp].maxID = vector->insn->ID;
- //printf("tmp reg %d minID %d \n", tmp.value(), vector->insn->ID);
}
}
}
@@ -354,11 +352,6 @@ namespace gbe
this->expiringID++;
continue;
}
- // Ignore booleans that were allocated with flags
- if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL && !grfBooleans.contains(reg)) {
- this->expiringID++;
- continue;
- }
if (toExpire->maxID >= limit.minID)
break;
@@ -537,12 +530,8 @@ namespace gbe
if (RA.contains(reg))
continue; // already allocated
- if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL && !grfBooleans.contains(reg))
- continue;
-
// Case 1: the register belongs to a vector, allocate all the registers in
// one piece
- //printf("prepare to allocate reg %d \n", reg.value());
auto it = vectorMap.find(reg);
if (it != vectorMap.end()) {
const SelectionVector *vector = it->second.first;
@@ -550,7 +539,6 @@ namespace gbe
if(spilledRegs.find(vector->reg[0].reg())
!= spilledRegs.end())
continue;
- //printf("vector %p \n", vector);
uint32_t alignment;
ir::RegisterFamily family;
@@ -568,7 +556,6 @@ namespace gbe
}
for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
const ir::Register reg = vector->reg[regID].reg();
- //printf("allocate regID %d reg %d RA.contains? %d family %d regFamily %d\n", regID, reg.value(), RA.contains(reg), family, ctx.sel->getRegisterData(reg).family);
GBE_ASSERT(RA.contains(reg) == false
&& ctx.sel->getRegisterData(reg).family == family);
insertNewReg(reg, grfOffset + alignment * regID, true);
@@ -845,6 +832,7 @@ namespace gbe
}
// Flag registers can only go to src[0]
+#if 0
const SelectionOpcode opcode = SelectionOpcode(insn.opcode);
if (opcode == SEL_OP_AND || opcode == SEL_OP_OR || opcode == SEL_OP_XOR
|| opcode == SEL_OP_I64AND || opcode == SEL_OP_I64OR || opcode == SEL_OP_I64XOR) {
@@ -854,7 +842,7 @@ namespace gbe
grfBooleans.insert(reg);
}
}
-
+#endif
// OK, a flag is used as a predicate or a conditional modifier
if (insn.state.physicalFlag == 0) {
const ir::Register reg = ir::Register(insn.state.flagIndex);
@@ -911,7 +899,7 @@ namespace gbe
}
// First we try to put all booleans registers into flags
- this->allocateFlags(selection);
+ //this->allocateFlags(selection);
// Allocate all the GRFs now (regular register and boolean that are not in
// flag registers)
--
1.8.3.2
More information about the Beignet
mailing list