[Beignet] [PATCH 1/2] Backend: Refine bool register patch and alloction
Xiuli Pan
xiuli.pan at intel.com
Wed May 3 08:24:06 UTC 2017
From: Pan Xiuli <xiuli.pan at intel.com>
Bool values can just be flag registers and some operations need grf register
to be involved. So we add two kinds of helper register BOOL_BIT and BOOL_UW
to handle liveout bool values and bool operations.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 43 +++-
.../src/backend/gen_insn_selection_optimize.cpp | 4 +
backend/src/backend/gen_reg_allocation.cpp | 151 +++++-------
backend/src/backend/gen_register.hpp | 2 +-
backend/src/ir/context.hpp | 2 +-
backend/src/ir/function.hpp | 3 +
backend/src/ir/instruction.cpp | 11 +-
backend/src/ir/instruction.hpp | 2 +
backend/src/ir/register.cpp | 2 +
backend/src/ir/register.hpp | 6 +-
backend/src/llvm/llvm_gen_backend.cpp | 257 +++++++++++++++++++++
11 files changed, 370 insertions(+), 113 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 41ef7b8..aefae5e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1256,6 +1256,8 @@ namespace gbe
SEL_REG(ul16grf, ul8grf, ul1grf);
}
break;
+ case FAMILY_BOOL_BIT: SEL_REG(uw1grf, uw1grf, uw1grf); break;
+ case FAMILY_BOOL_UW: SEL_REG(uw16grf, uw8grf, uw1grf); break;
default: NOT_SUPPORTED;
}
GBE_ASSERT(false);
@@ -3105,13 +3107,25 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
{
sel.push();
auto dag = sel.regDAG[insn.getDst(0)];
- if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL &&
- dag->isUsed) {
- sel.curr.physicalFlag = 0;
- sel.curr.flagIndex = insn.getDst(0).value();
- sel.curr.modFlag = 1;
- }
- sel.MOV(dst, src);
+ // BOOL now is flag register, we need handle these situtaion carefully
+ if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL) {
+ sel.curr.execWidth = 1;
+ sel.curr.noMask = 1;
+ sel.curr.physicalFlag = 0;
+ sel.curr.flagIndex = insn.getDst(0).value();
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.modFlag = 1;
+ }
+ else if (sel.getRegisterFamily(insn.getSrc(0)) == ir::FAMILY_BOOL ||
+ sel.getRegisterFamily(insn.getSrc(0)) == ir::FAMILY_BOOL_BIT)
+ {
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.modFlag = 1;
+ }
+
+ sel.MOV(dst, src);
sel.pop();
}
break;
@@ -3384,8 +3398,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
bool inverse = false;
sel.getSrcGenRegImm(dag, src0, src1, type, inverse);
// Output the binary instruction
- if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL &&
- dag.isUsed) {
+ if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL ) {
GBE_ASSERT(insn.getOpcode() == OP_AND ||
insn.getOpcode() == OP_OR ||
insn.getOpcode() == OP_XOR);
@@ -3394,6 +3407,16 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
sel.curr.modFlag = 1;
}
+ if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL_BIT ) {
+ GBE_ASSERT(insn.getOpcode() == OP_AND ||
+ insn.getOpcode() == OP_OR ||
+ insn.getOpcode() == OP_XOR);
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.curr.modFlag = 1;
+ }
+
switch (opcode) {
case OP_ADD:
if ((type == Type::TYPE_U64 || type == Type::TYPE_S64) && !sel.hasLongType()) {
@@ -5393,8 +5416,6 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
const ir::Liveness &liveness = sel.ctx.getLiveness();
const ir::Liveness::LiveOut &liveOut = liveness.getLiveOut(curr);
bool needStoreBool = false;
- if (liveOut.contains(dst) || dag.computeBool)
- needStoreBool = true;
// why we set the tmpDst to null?
// because for the listed type compare instruction could not
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index d2e0fb9..d60ed41 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -162,6 +162,10 @@ namespace gbe
assert(insn.opcode == SEL_OP_MOV);
const GenRegister& src = insn.src(0);
const GenRegister& dst = insn.dst(0);
+
+ if ( dst.file == GEN_GENERAL_REGISTER_FILE && ctx.sel->getRegisterFamily(dst.reg()) == ir::FAMILY_BOOL)
+ return;
+
if (src.type != dst.type || src.file != dst.file)
return;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 9183a24..193e75c 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -87,14 +87,16 @@ namespace gbe
INLINE void getRegAttrib(ir::Register reg, uint32_t ®Size, ir::RegisterFamily *regFamily = NULL) const {
// Note that byte vector registers use two bytes per byte (and can be
// interleaved)
- static const size_t familyVectorSize[] = {2,2,2,4,8,16,32};
- static const size_t familyScalarSize[] = {2,2,2,4,8,16,32};
+ static const size_t familyVectorSize[] = {2,2,2,4,8,16,32,4,2,2};
+ static const size_t familyScalarSize[] = {2,2,2,4,8,16,32,4,2,2};
using namespace ir;
const bool isScalar = ctx.sel->isScalarReg(reg);
const RegisterData regData = ctx.sel->getRegisterData(reg);
const RegisterFamily family = regData.family;
if (family == ir::FAMILY_REG)
regSize = 32;
+ else if (family == ir::FAMILY_BOOL_BIT || family == ir::FAMILY_BOOL)
+ regSize = 2;
else {
const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family];
regSize = isScalar ? typeSize : ctx.getSimdWidth() * typeSize;
@@ -480,8 +482,8 @@ namespace gbe
insn.opcode == SEL_OP_XOR))
#define IS_SCALAR_FLAG(insn) selection.isScalarReg(ir::Register(insn.state.flagIndex))
- #define GET_FLAG_REG(insn) GenRegister::uwxgrf(IS_SCALAR_FLAG(insn) ? 1 : 8,\
- ir::Register(insn.state.flagIndex));
+
+ #define GET_FLAG_REG(insn) GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
#define IS_TEMP_FLAG(insn) (insn.state.flag == 0 && insn.state.subFlag == 1)
#define NEED_DST_GRF_TYPE_FIX(ty) \
(ty == GEN_TYPE_F || \
@@ -489,39 +491,6 @@ namespace gbe
ty == GEN_TYPE_DF || \
ty == GEN_TYPE_UL || \
ty == GEN_TYPE_L)
- // Flag is a virtual flag, this function is to validate the virtual flag
- // to a physical flag. It is used to validate both temporary flag and the
- // non-temporary flag registers.
- // We track the last temporary validate register, if it's the same as
- // current, we can avoid the revalidation.
- void GenRegAllocator::Opaque::validateFlag(Selection &selection,
- SelectionInstruction &insn) {
- GBE_ASSERT(insn.state.physicalFlag == 1);
- if (!IS_TEMP_FLAG(insn) && validatedFlags.find(insn.state.flagIndex) != validatedFlags.end())
- return;
- else if (IS_TEMP_FLAG(insn) && validTempFlagReg == insn.state.flagIndex)
- return;
- SelectionInstruction *cmp0 = selection.create(SEL_OP_CMP, 1, 2);
- cmp0->state = GenInstructionState(ctx.getSimdWidth());
- cmp0->state.flag = insn.state.flag;
- cmp0->state.subFlag = insn.state.subFlag;
- if (IS_SCALAR_FLAG(insn))
- cmp0->state.noMask = 1;
- cmp0->src(0) = GET_FLAG_REG(insn);
- cmp0->src(1) = GenRegister::immuw(0);
- cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
- cmp0->extra.function = GEN_CONDITIONAL_NEQ;
- insn.prepend(*cmp0);
- if (!IS_TEMP_FLAG(insn))
- validatedFlags.insert(insn.state.flagIndex);
- else {
- if (insn.state.modFlag == 0)
- validTempFlagReg = insn.state.flagIndex;
- else
- validTempFlagReg = 0;
- }
- }
-
void GenRegAllocator::Opaque::allocateFlags(Selection &selection) {
// Previously, we have a global flag allocation implemntation.
@@ -653,53 +622,61 @@ namespace gbe
// Patch the predicate now. Note that only compares actually modify it (it
// is called a "conditional modifier"). The other instructions just read
// it
- if (insn.state.physicalFlag == 0) {
- // SEL.bool instruction, the dst register should be stored in GRF
- // the pred flag is used by flag register
- if (insn.opcode == SEL_OP_SEL) {
- ir::Register dst = insn.dst(0).reg();
- if (ctx.sel->getRegisterFamily(dst) == ir::FAMILY_BOOL &&
- allocatedFlags.find(dst) != allocatedFlags.end())
- allocatedFlags.erase(dst);
+ if (IS_IMPLICITLY_MOD_FLAG(insn))
+ {
+ //outputSelectionInst(insn);
+ const uint32_t srcNum = insn.srcNum, dstNum = insn.dstNum;
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const GenRegister &selReg = insn.src(srcID);
+ const ir::Register reg = selReg.reg();
+ if (selReg.file != GEN_GENERAL_REGISTER_FILE)
+ continue;
+ if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL);
+ {
+ auto it = allocatedFlags.find(reg);
+ if (it != allocatedFlags.end())
+ insn.src(srcID) = GenRegister::flag(it->second / 2 ,it->second & 1);
+ }
}
+ for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {
+ const GenRegister &selReg = insn.dst(dstID);
+ const ir::Register reg = selReg.reg();
+ if (selReg.file != GEN_GENERAL_REGISTER_FILE)
+ continue;
+ if (ctx.sel->getRegisterFamily(reg) == ir::FAMILY_BOOL);
+ {
+ auto it = allocatedFlags.find(reg);
+ if (it != allocatedFlags.end())
+ insn.dst(dstID) = GenRegister::flag(it->second / 2 ,it->second & 1);
+ }
+ }
+ }
+
+ if (insn.state.physicalFlag == 0) {
auto it = allocatedFlags.find(ir::Register(insn.state.flagIndex));
if (it != allocatedFlags.end()) {
insn.state.physicalFlag = 1;
insn.state.flag = it->second / 2;
insn.state.subFlag = it->second & 1;
- // modFlag is for the LOADI/MOV/AND/OR/XOR instructions which will modify a
- // flag register. We set the condition for them to save one instruction if possible.
- if (IS_IMPLICITLY_MOD_FLAG(insn)) {
- // If this is a modFlag on a scalar bool, we need to remove it
- // from the allocated flags map. Then latter, the user could
- // validate the flag from the scalar value correctly.
- // The reason is we can not predicate the active channel when we
- // need to use this flag.
- if (IS_SCALAR_FLAG(insn)) {
- allocatedFlags.erase(ir::Register(insn.state.flagIndex));
- continue;
- }
- insn.extra.function = GEN_CONDITIONAL_NEQ;
- }
- // If this is an external bool, we need to validate it if it is not validated yet.
- if ((insn.state.externFlag &&
- insn.state.predicate != GEN_PREDICATE_NONE))
- validateFlag(selection, insn);
} else {
insn.state.physicalFlag = 1;
insn.state.flag = 0;
insn.state.subFlag = 1;
- // If this is for MOV/AND/OR/... we don't need to waste an extra instruction
- // to generate the flag here, just continue to next instruction. And the validTempFlagReg
- // will not be destroyed.
- if (IS_IMPLICITLY_MOD_FLAG(insn))
- continue;
// This bool doesn't have a deadicated flag, we use temporary flag here.
// each time we need to validate it from the grf register.
if (insn.state.predicate != GEN_PREDICATE_NONE)
- validateFlag(selection, insn);
+ {
+ SelectionInstruction *movf = selection.create(SEL_OP_MOV, 1, 1);
+ movf->state = GenInstructionState(1);
+ movf->state.noMask = 1;
+ movf->state.predicate = GEN_PREDICATE_NONE;
+ movf->state.execWidth = 1;
+ movf->src(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
+ movf->dst(0) = GenRegister::flag(insn.state.flag,insn.state.subFlag);
+ insn.prepend(*movf);
+ }
}
if (insn.opcode == SEL_OP_CMP &&
(flagBooleans.contains(insn.dst(0).reg()) ||
@@ -729,35 +706,15 @@ namespace gbe
// register.
if (insn.state.flagGen == 1 &&
!flagBooleans.contains((ir::Register)(insn.state.flagIndex))) {
- SelectionInstruction *sel0 = selection.create(SEL_OP_SEL, 1, 2);
- uint32_t simdWidth;
- simdWidth = IS_SCALAR_FLAG(insn) ? 1 : ctx.getSimdWidth();
-
- sel0->state = GenInstructionState(simdWidth);
- if (IS_SCALAR_FLAG(insn))
- sel0->state.noMask = 1;
- sel0->state.flag = insn.state.flag;
- sel0->state.subFlag = insn.state.subFlag;
- sel0->state.predicate = GEN_PREDICATE_NORMAL;
- sel0->src(0) = GenRegister::uw1grf(ir::ocl::one);
- sel0->src(1) = GenRegister::uw1grf(ir::ocl::zero);
- sel0->dst(0) = GET_FLAG_REG(insn);
- liveInSet01.insert(insn.parent->bb);
- insn.append(*sel0);
- // We use the zero one after the liveness analysis, we have to update
- // the liveness data manually here.
- GenRegInterval &interval0 = intervals[ir::ocl::zero];
- GenRegInterval &interval1 = intervals[ir::ocl::one];
- interval0.minID = std::min(interval0.minID, (int32_t)insn.ID);
- interval0.maxID = std::max(interval0.maxID, (int32_t)insn.ID);
- interval1.minID = std::min(interval1.minID, (int32_t)insn.ID);
- interval1.maxID = std::max(interval1.maxID, (int32_t)insn.ID);
+
+ SelectionInstruction *movg = selection.create(SEL_OP_MOV, 1, 1);
+ movg->state = GenInstructionState(1);
+ movg->state.predicate = GEN_PREDICATE_NONE;
+ movg->state.noMask = 1;
+ movg->src(0) = GenRegister::flag(insn.state.flag, insn.state.subFlag);
+ movg->dst(0) = GenRegister::uw1grf(ir::Register(insn.state.flagIndex));
+ insn.append(*movg);
}
- } else {
- // If the instruction use the temporary flag register manually,
- // we should invalidate the temp flag reg here.
- if (insn.state.flag == 0 && insn.state.subFlag == 1)
- validTempFlagReg = 0;
}
}
}
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 6c73f5e..da58805 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -120,7 +120,7 @@ namespace gbe
this->noMask = 0;
this->flag = 0;
this->subFlag = 0;
- this->grfFlag = 1;
+ this->grfFlag = 0;
this->externFlag = 0;
this->modFlag = 0;
this->flagGen = 0;
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 877d639..6945bfe 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -142,7 +142,7 @@ namespace ir {
/*! Append a new tuple */
template <typename... Args> INLINE Tuple tuple(Args...args) {
GBE_ASSERTM(fn != NULL, "No function currently defined");
- return fn->file.appendTuple(args...);
+ return fn->makeTuple(args...);
}
/*! Make a tuple from an array of register */
INLINE Tuple arrayTuple(const Register *reg, uint32_t regNum) {
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 64d9727..ce7412b 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -563,6 +563,9 @@ namespace ir {
INLINE bool setUseDeviceEnqueue(bool useDeviceEnqueue) {
return this->useDeviceEnqueue = useDeviceEnqueue;
}
+ template <typename... Args> INLINE Tuple makeTuple(Args...args) {
+ return this->file.appendTuple(args...);
+ }
private:
friend class Context; //!< Can freely modify a function
std::string name; //!< Function name
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index a9156ff..7525138 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -2290,7 +2290,7 @@ END_FUNCTION(Instruction, Register)
#if GBE_DEBUG
const RegisterData oldData = this->getDstData(dstID);
const RegisterData newData = fn.getRegisterData(reg);
- GBE_ASSERT(oldData.family == newData.family);
+ GBE_ASSERT(oldData.family == newData.family || oldData.family == FAMILY_BOOL || oldData.family == FAMILY_BOOL_BIT);
#endif /* GBE_DEBUG */
const Opcode op = this->getOpcode();
switch (op) {
@@ -2339,6 +2339,15 @@ END_FUNCTION(Instruction, Register)
*new_ins = insn;
}
+ void Instruction::insertbefore(Instruction *next, Instruction ** new_ins) {
+ Function &fn = next->getFunction();
+ Instruction *insn = fn.newInstruction(*this);
+ insn->parent = next->parent;
+ prepend(insn, next);
+ if (new_ins)
+ *new_ins = insn;
+ }
+
bool Instruction::hasSideEffect(void) const {
return opcode == OP_STORE ||
opcode == OP_TYPED_WRITE ||
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 8685dd4..9dd775b 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -209,6 +209,8 @@ namespace ir {
void remove(void);
/* Insert the instruction after the previous one. */
void insert(Instruction *prev, Instruction ** new_ins = NULL);
+ /* Insert the instruction before the next one. */
+ void insertbefore(Instruction *next, Instruction ** new_ins = NULL);
void setDBGInfo(DebugInfo in) { DBGInfo = in; }
/*! Indicates if the instruction belongs to instruction type T. Typically, T
* can be BinaryInstruction, UnaryInstruction, LoadInstruction and so on
diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp
index 1e78722..ec59e61 100644
--- a/backend/src/ir/register.cpp
+++ b/backend/src/ir/register.cpp
@@ -38,6 +38,8 @@ namespace ir {
case FAMILY_OWORD: return out << "oword";
case FAMILY_HWORD: return out << "hword";
case FAMILY_REG: return out << "reg";
+ case FAMILY_BOOL_BIT: return out << "boolbit";
+ case FAMILY_BOOL_UW: return out << "booluw";
};
return out;
}
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index 09af24e..e2194dd 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -48,11 +48,13 @@ namespace ir {
FAMILY_QWORD = 4,
FAMILY_OWORD = 5,
FAMILY_HWORD = 6,
- FAMILY_REG = 7
+ FAMILY_REG = 7,
+ FAMILY_BOOL_BIT = 8,
+ FAMILY_BOOL_UW = 9
};
INLINE char getFamilyName(RegisterFamily family) {
- static char registerFamilyName[] = {'b', 'B', 'W', 'D', 'Q', 'O', 'H', 'R'};
+ static char registerFamilyName[] = {'b', 'B', 'W', 'D', 'Q', 'O', 'H', 'R', 'T', 'U'};
return registerFamilyName[family];
}
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 9954021..c8e29c5 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -705,6 +705,8 @@ namespace gbe
map <ir::Register, ir::Register> &redundantPhiCopyMap);
/*! Will try to remove redundants LOADI in basic blocks */
void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn);
+ /*! Will fix bool values live out basic blocks */
+ void fixBools(const ir::Liveness &liveness, ir::Function &fn);
/*! To avoid lost copy, we need two values for PHI. This function create a
* fake value for the copy (basically ptr+1)
*/
@@ -2919,6 +2921,260 @@ namespace gbe
});
}
+ INLINE ir::Register getRegFromMap(ir::Function &fn, map<ir::Register, ir::Register> &map, ir::Register reg, ir::RegisterFamily family = ir::FAMILY_BOOL_BIT)
+ {
+ auto it = map.find(reg);
+ ir::Register ret;
+ if (it != map.end())
+ ret = it->second;
+ else
+ {
+ ret = fn.newRegister(family);
+ map.insert(std::make_pair(reg, ret));
+ }
+ return ret;
+ }
+
+ void GenWriter::fixBools(const ir::Liveness &liveness, ir::Function &fn)
+ {
+ // We have two kinds of helper register BOOL_BIT adn BOOL_UW for BOOL registers
+ // BOOL_BIT use per bit for a channel's flag thus the same size as flag register
+ // BOOL_UW use a UW for a channel's flag thus same as UINT16 register.
+ // We store these helper register as well as all bool registers and PHI bool registers.
+ set<ir::Register> boolRegs;
+ set<ir::Register> boolPHIs;
+ map<ir::Register, ir::Register> boolBits;
+ map<ir::Register, ir::Register> boolUWs;
+
+ // Traverse all blocks and insert helper registers to help handle bool operation.
+ fn.foreachBlock([&](ir::BasicBlock &bb)
+ {
+ // Liveinfo helps us to know if the bool value outlives the block
+ const ir::Liveness::BlockInfo &info = liveness.getBlockInfo(&bb);
+
+ // Top bottom traversal to handle all of the bool registers
+ bb.foreach([&](ir::Instruction &insn)
+ {
+ bool isChanged = false;
+ ir::Opcode op = insn.getOpcode();
+
+ // Handle compare that generate the bool values
+ if (insn.isMemberOf<ir::CompareInstruction>()) {
+ ir::Register reg = insn.getDst(0);
+ if (info.inLiveOut(reg))
+ {
+ GBE_ASSERT(fn.getRegisterFamily(reg) == ir::FAMILY_BOOL);
+ ir::Register boolBit = fn.newRegister(ir::FAMILY_BOOL_BIT);
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit, reg);
+ mov.insert(&insn);
+ boolBits.insert(std::make_pair(reg, boolBit));
+ }
+ }
+
+ // Replace the bool calculate with helper register.
+ if (op == ir::OP_OR || op == ir::OP_AND ||
+ op == ir::OP_XOR)
+ {
+ ir::Register dst = insn.getDst(0);
+ ir::Register src0 = insn.getSrc(0);
+ ir::Register src1 = insn.getSrc(1);
+ if(fn.getRegisterFamily(dst) == ir::FAMILY_BOOL)
+ {
+ // In simple case, we could use BOOL_BIT to calculate bool values,
+ // but if the bool value is a PHI value thus it will be changed in
+ // differnt block and not all channal can be changed thus we need a
+ // UW register to enable channel mask.
+ // TODO: Now we use BOOL_UW to handle all cases for there will be
+ // some error when there are too many layers of IF branch.
+ if (boolPHIs.find(dst) != boolPHIs.end() || 1 ||
+ boolPHIs.find(src0) != boolPHIs.end() ||
+ boolPHIs.find(src1) != boolPHIs.end())
+ {
+ auto it0 = boolUWs.find(src0);
+ ir::Register boolUW0;
+ if (it0 != boolUWs.end())
+ boolUW0 = it0->second;
+ else
+ {
+ boolUW0 = fn.newRegister(ir::FAMILY_BOOL_UW);
+ boolUWs.insert(std::make_pair(src0, boolUW0));
+ ir::Register flag = src0;
+ auto it = boolBits.find(src0);
+ if (it != boolBits.end())
+ {
+ ir::Register boolBit = it->second;
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, flag , boolBit);
+ mov.insertbefore(&insn);
+ }
+ ir::Register zeroReg = ir::Register(ir::ocl::zero);
+ ir::Register oneReg = ir::Register(ir::ocl::one);
+ const ir::Tuple index = fn.makeTuple(flag, oneReg ,zeroReg);
+ ir::Instruction sel = ir::SEL(ir::TYPE_U16, boolUW0 ,index);
+ sel.insertbefore(&insn);
+ }
+
+ auto it1 = boolUWs.find(src1);
+ ir::Register boolUW1;
+ if (it1 != boolUWs.end())
+ boolUW1 = it1->second;
+ else
+ {
+ boolUW1 = fn.newRegister(ir::FAMILY_BOOL_UW);
+ boolUWs.insert(std::make_pair(src1, boolUW1));
+ ir::Register flag = src1;
+ auto it = boolBits.find(src1);
+ if (it != boolBits.end())
+ {
+ ir::Register boolBit = it->second;
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, flag , boolBit);
+ mov.insertbefore(&insn);
+ }
+ ir::Register zeroReg = ir::Register(ir::ocl::zero);
+ ir::Register oneReg = ir::Register(ir::ocl::one);
+ const ir::Tuple index = fn.makeTuple(flag, oneReg ,zeroReg);
+ ir::Instruction sel = ir::SEL(ir::TYPE_U16, boolUW1 ,index);
+ sel.insertbefore(&insn);
+ }
+
+ auto it = boolUWs.find(dst);
+ ir::Register boolUW;
+ if (it != boolUWs.end())
+ boolUW = it->second;
+ else
+ {
+ boolUW = fn.newRegister(ir::FAMILY_BOOL_UW);
+ boolUWs.insert(std::make_pair(dst, boolUW));
+ }
+
+ ir::Register boolBit= getRegFromMap(fn, boolBits, dst);
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit, dst);
+ mov.insert(&insn);
+ // USE CMP to get UW ==> flag
+ ir::Register zeroReg = ir::Register(ir::ocl::zero);
+ ir::Instruction cmp = ir::NE(ir::TYPE_U16, dst, boolUW ,zeroReg);
+ cmp.insert(&insn);
+
+ if (op == ir::OP_OR)
+ {
+ ir::Instruction newinsn = ir::OR(ir::TYPE_U16, boolUW, boolUW0, boolUW1);
+ newinsn.replace(&insn);
+ }
+ else if (op == ir::OP_AND)
+ {
+ ir::Instruction newinsn = ir::AND(ir::TYPE_U16, boolUW, boolUW0, boolUW1);
+ newinsn.replace(&insn);
+ }
+ else if (op == ir::OP_XOR)
+ {
+ ir::Instruction newinsn = ir::XOR(ir::TYPE_U16, boolUW, boolUW0, boolUW1);
+ newinsn.replace(&insn);
+ }
+ else
+ GBE_ASSERT(0 && "UNSOPPORTED");
+
+ }
+ else
+ {
+ auto it = boolBits.find(dst);
+ ir::Register boolBit;
+ if (it != boolBits.end())
+ boolBit = it->second;
+ else
+ boolBit = fn.newRegister(ir::FAMILY_BOOL_BIT);
+ boolBits.insert(std::make_pair(dst, boolBit));
+
+ auto it0 = boolBits.find(src0);
+ ir::Register boolBit0;
+ if (it != boolBits.end())
+ boolBit0 = it0->second;
+ else
+ {
+ boolBit0 = fn.newRegister(ir::FAMILY_BOOL_BIT);
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit0, src0);
+ mov.insertbefore(&insn);
+ boolBits.insert(std::make_pair(src0, boolBit0));
+ }
+
+ auto it1 = boolBits.find(src1);
+ ir::Register boolBit1;
+ if (it != boolBits.end())
+ boolBit1 = it1->second;
+ else
+ {
+ boolBit1 = fn.newRegister(ir::FAMILY_BOOL_BIT);
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, boolBit1, src1);
+ mov.insertbefore(&insn);
+ boolBits.insert(std::make_pair(src1, boolBit1));
+ }
+
+ if (op == ir::OP_OR)
+ {
+ ir::Instruction newinsn = ir::OR(ir::TYPE_U16, boolBit, boolBit0, boolBit1);
+ newinsn.replace(&insn);
+ }
+ else if (op == ir::OP_AND)
+ {
+ ir::Instruction newinsn = ir::AND(ir::TYPE_U16, boolBit, boolBit0, boolBit1);
+ newinsn.replace(&insn);
+ }
+ else if (op == ir::OP_XOR)
+ {
+ ir::Instruction newinsn = ir::XOR(ir::TYPE_U16, boolBit, boolBit0, boolBit1);
+ newinsn.replace(&insn);
+ }
+ else
+ GBE_ASSERT(0 && "UNSOPPORTED");
+ }
+ isChanged = true;
+ }
+ }
+
+ if (insn.getOpcode() == ir::OP_MOV)
+ {
+ ir::Register dst = insn.getDst(0);
+ ir::Register src = insn.getSrc(0);
+ if(fn.getRegisterFamily(dst) == ir::FAMILY_BOOL &&
+ fn.getRegisterFamily(src) == ir::FAMILY_BOOL)
+ {
+ boolPHIs.insert(dst);
+ ir::Register boolBit= getRegFromMap(fn, boolBits, dst);
+ ir::Register boolBit0= getRegFromMap(fn, boolBits, src);
+ ir::Instruction newinsn = ir::MOV(ir::TYPE_U16, boolBit, boolBit0);
+ newinsn.replace(&insn);
+ isChanged = true;
+ }
+ }
+
+ if (insn.getOpcode() == ir::OP_LOADI)
+ {
+ ir::Register reg = insn.getDst(0);
+ if(fn.getRegisterFamily(reg) == ir::FAMILY_BOOL) {
+ ir::Register boolBit = getRegFromMap(fn, boolBits, reg);
+ replaceDst(&insn, reg, boolBit);
+ }
+ }
+
+ // Convert BOOL_BIT into BOOL
+ for (uint32_t i = 0; i < insn.getSrcNum(); ++i)
+ {
+ if (isChanged) break;
+ ir::Register reg = insn.getSrc(i);
+ if (fn.getRegisterFamily(reg) != ir::FAMILY_BOOL)
+ continue;
+ if (!(insn.getOpcode() == ir::OP_BRA || insn.getOpcode() == ir::OP_SEL))
+ continue;
+ auto it = boolBits.find(reg);
+ if (it == boolBits.end())
+ break;
+ ir::Register boolBit = it->second;
+ ir::Instruction mov = ir::MOV(ir::TYPE_U16, reg, boolBit);
+ mov.insertbefore(&insn);
+ }
+ });
+ });
+ }
+
+
BVAR(OCL_OPTIMIZE_PHI_MOVES, true);
BVAR(OCL_OPTIMIZE_LOADI, true);
@@ -3221,6 +3477,7 @@ namespace gbe
this->postPhiCopyOptimization(liveness, fn, replaceMap, redundantPhiCopyMap);
this->removeMOVs(liveness, fn);
}
+ this->fixBools(liveness, fn);
}
void GenWriter::regAllocateReturnInst(ReturnInst &I) {}
--
2.7.4
More information about the Beignet
mailing list