[Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination.
Ruiling Song
ruiling.song at intel.com
Fri Jul 22 07:28:28 UTC 2016
Take below example:
bb2:
mov x, x-copy
add x2, x, 1
mul x3, x2, 2
mov x-copy, x3
this is a self-loop block. In previous implementation, we have try our
best to coaleasce phi/phiCopy/phiCopySrc. But in this example, we can
do more, we can even coaleasce x2 to x, x-copy and x3. in real world,
there may be many x2's lies in the use/def chain. So we do some check
to see whether we can totally coaleasce the use/def chain lies between
phi/phiCopySrc. We place various careful check in this path. Some may
be too strict, so we need further refine to this logic.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/ir/instruction.cpp | 18 ++++++
backend/src/ir/instruction.hpp | 11 ++++
backend/src/llvm/llvm_gen_backend.cpp | 101 ++++++++++++++++++++++++++++++++--
3 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index ed64580..a5bf89f 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -2292,6 +2292,24 @@ END_FUNCTION(Instruction, Register)
if (new_ins)
*new_ins = insn;
}
+ bool Instruction::isNativeInstruction() const {
+ if (BinaryInstruction::isClassOf(*this)) {
+ //const BinaryInstruction * const insn = cast<BinaryInstruction *>(this);
+ const BinaryInstruction &insn = cast<BinaryInstruction>(*this);
+ if (insn.getType() == ir::TYPE_U32 ||
+ insn.getType() == ir::TYPE_S32 ||
+ insn.getType() == ir::TYPE_FLOAT) {
+ if (opcode == OP_MUL || opcode == OP_ADD || opcode == OP_MOV)
+ return true;
+ }
+ }
+
+ if (TernaryInstruction::isClassOf(*this)) {
+ if (opcode == OP_MAD)
+ return true;
+ }
+ return false;
+ }
bool Instruction::hasSideEffect(void) const {
return opcode == OP_STORE ||
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index b2b0b49..f710b5e 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -184,10 +184,21 @@ namespace ir {
RegisterData getDstData(uint32_t ID = 0u) const;
/*! Get the register of the given destination */
RegisterData getSrcData(uint32_t ID = 0u) const;
+ /*! whether the instruction directly maps to native instr.
+ * this is used to determine whether we can do aggressive
+ * register coaleascing for source and destination */
+ bool isNativeInstruction() const;
/*! Set a register in src srcID */
void setSrc(uint32_t srcID, Register reg);
/*! Set a register in dst dstID */
void setDst(uint32_t dstID, Register reg);
+ bool use(Register reg) const {
+ for (uint32_t i = 0; i < getSrcNum(); i++) {
+ if (getSrc(i) == reg)
+ return true;
+ }
+ return false;
+ }
/*! Is there any side effect in the memory sub-system? */
bool hasSideEffect(void) const;
/*! Get / set the parent basic block */
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 41cb783..7d78163 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2272,6 +2272,64 @@ namespace gbe
});
}
+ bool useRegisters(const ir::Instruction *inst, std::vector<ir::Register> ®s) {
+ if (regs.empty()) return false;
+ for (auto & r : regs) {
+ if (inst->use(r))
+ return true;
+ }
+ return false;
+ }
+ void findPossibleReplaceChain(ir::Liveness &liveness, ir::Function &fn,
+ std::vector<ir::Register> &replaceChain,
+ ir::Register phi, ir::Register phiCopy,
+ const ir::BasicBlock *bb, ir::BasicBlock::const_iterator searchEnd) {
+ ir::BasicBlock::const_iterator iter = bb->begin();
+ ir::Register targetReg = phi;
+ const ir::Liveness::LiveOut &out = liveness.getLiveOut(bb);
+
+ for ( ; iter != searchEnd; ++iter) {
+ const ir::Instruction *inst = iter.node();
+ if (useRegisters(inst, replaceChain)) {
+ replaceChain.clear();
+ // should stop
+ break;
+ }
+ if (inst->use(targetReg)) {
+ if (!inst->isNativeInstruction()) {
+ replaceChain.clear();
+ // we need to stop further coaleascing
+ // in fact we need careful consideration here.
+ // I just break and clear the replaceChain now.
+ break;
+ }
+
+ const ir::RegisterData oldData = fn.getRegisterData(targetReg);
+ const ir::RegisterData newData = fn.getRegisterData(inst->getDst(0));
+ if(oldData.family != newData.family) {
+ replaceChain.clear();
+ break;
+ }
+ // the value cannot be an liveout
+ if (out.contains(targetReg)) {
+ replaceChain.clear();
+ break;
+ }
+
+ replaceChain.push_back(targetReg);
+ targetReg = inst->getDst(0);
+ }
+ }
+ // check no use of the registers in replaceChain till the bb->end()
+ if (!replaceChain.empty()) {
+ for (; iter != bb->end(); ++iter) {
+ if (useRegisters(iter.node(), replaceChain)) {
+ replaceChain.clear();
+ }
+ }
+ }
+ }
+
void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
map<ir::Register, ir::Register> &replaceMap,
map<ir::Register, ir::Register> &redundantPhiCopyMap)
@@ -2292,7 +2350,7 @@ namespace gbe
const ir::DefSet *phiCopyDef = dag->getRegDef(phiCopy);
const ir::UseSet *phiUse = dag->getRegUse(phi);
const DefSet *phiDef = dag->getRegDef(phi);
- bool isOpt = true;
+ bool coalescePhiPhiCopy = true;
// FIXME, I find under some situation, the phiDef maybe null, seems a bug when building FunctionDAg.
// need fix it there.
@@ -2307,7 +2365,7 @@ namespace gbe
// phi & phiCopy are both alive at the endpoint of bb,
// thus can not be optimized.
if (out.contains(phi)) {
- isOpt = false;
+ coalescePhiPhiCopy = false;
break;
}
@@ -2333,7 +2391,17 @@ namespace gbe
// add x2, x, 1
// mov x-copy, x2
// obviously x2, x-copy and x2 can be mapped to same virtual register
-
+ // the situation may be more complex that this example, like
+ // bb2:
+ // mov x, x-copy
+ // add x2, x, 1
+ // mul x3, x2, 2
+ // mov x-copy, x3
+ // that's why we add findPossibleReplaceChain() to solve such case.
+ // that is to replace all registers in the chain (x2, x3) by x-copy.
+ // note I did not merge the new 'ReplaceChain' with previous phiCopy/phiCopySrc
+ // coaleascing, the reason is 'ReplaceChain' is very hard to match. But
+ // phiCopy/phiCopySrc is eady to meet. The code here need further refine.
ir::BasicBlock::const_iterator iter = ir::BasicBlock::const_iterator(phiCopySrcDefInsn);
ir::BasicBlock::const_iterator iterE = bb->end();
@@ -2351,6 +2419,12 @@ namespace gbe
}
++iter;
}
+
+ // Go through the basic block, to find chain of [phi, ... phiCopy]
+ std::vector<ir::Register> replaceChain;
+ findPossibleReplaceChain(liveness, fn, replaceChain, phi, phiCopy, bb,
+ ir::BasicBlock::const_iterator(phiCopyDefInsn));
+
if (!phiPhiCopySrcInterfere) {
replaceSrc(const_cast<Instruction *>(phiCopyDefInsn), phiCopySrc, phiCopy);
@@ -2364,6 +2438,21 @@ namespace gbe
replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc, phiCopy);
}
replaceMap.insert(std::make_pair(phiCopySrc, phiCopy));
+
+ if (!replaceChain.empty()) {
+ for (auto &r : replaceChain) {
+ const ir::DefSet *d = dag->getRegDef(r);
+ const ir::UseSet *u = dag->getRegUse(r);
+ replaceMap.insert(std::make_pair(r, phiCopy));
+
+ for (auto &dd : *d) {
+ replaceDst(const_cast<Instruction *>(dd->getInstruction()), r, phiCopy);
+ }
+ for (auto &uu : *u) {
+ replaceSrc(const_cast<Instruction *>(uu->getInstruction()), r, phiCopy);
+ }
+ }
+ }
}
}
} else {
@@ -2396,7 +2485,7 @@ namespace gbe
if (&p == phiCopyDefInsn) break;
// we only care MOV here
if (p.getSrcNum() == 1 && p.getSrc(0) == phi) {
- isOpt = false;
+ coalescePhiPhiCopy = false;
break;
}
}
@@ -2404,14 +2493,14 @@ namespace gbe
}
// coalease phi and phiCopy
- if (isOpt) {
+ if (coalescePhiPhiCopy) {
+ replaceMap.insert(std::make_pair(phi, phiCopy));
for (auto &x : *phiDef) {
replaceDst(const_cast<Instruction *>(x->getInstruction()), phi, phiCopy);
}
for (auto &x : *phiUse) {
const Instruction *phiUseInsn = x->getInstruction();
replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy);
- replaceMap.insert(std::make_pair(phi, phiCopy));
}
}
}
--
2.4.1
More information about the Beignet
mailing list