[Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination.
Yang, Rong R
rong.r.yang at intel.com
Fri Aug 12 06:29:00 UTC 2016
I think this optimization is too special.
Maybe we could analyze the hole in IR, optimize register allocate to approach the same optimize. It is more common.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Friday, July 22, 2016 15:28
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling <ruiling.song at intel.com>
> Subject: [Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination.
>
> Take below example:
> bb2:
> mov x, x-copy
> add x2, x, 1
> mul x3, x2, 2
> mov x-copy, x3
>
> this is a self-loop block. In previous implementation, we have try our best to
> coaleasce phi/phiCopy/phiCopySrc. But in this example, we can do more, we
> can even coaleasce x2 to x, x-copy and x3. in real world, there may be many
> x2's lies in the use/def chain. So we do some check to see whether we can
> totally coaleasce the use/def chain lies between phi/phiCopySrc. We place
> various careful check in this path. Some may be too strict, so we need further
> refine to this logic.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/ir/instruction.cpp | 18 ++++++
> backend/src/ir/instruction.hpp | 11 ++++
> backend/src/llvm/llvm_gen_backend.cpp | 101
> ++++++++++++++++++++++++++++++++--
> 3 files changed, 124 insertions(+), 6 deletions(-)
>
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index ed64580..a5bf89f 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -2292,6 +2292,24 @@ END_FUNCTION(Instruction, Register)
> if (new_ins)
> *new_ins = insn;
> }
> + bool Instruction::isNativeInstruction() const {
> + if (BinaryInstruction::isClassOf(*this)) {
> + //const BinaryInstruction * const insn = cast<BinaryInstruction *>(this);
> + const BinaryInstruction &insn = cast<BinaryInstruction>(*this);
> + if (insn.getType() == ir::TYPE_U32 ||
> + insn.getType() == ir::TYPE_S32 ||
> + insn.getType() == ir::TYPE_FLOAT) {
> + if (opcode == OP_MUL || opcode == OP_ADD || opcode == OP_MOV)
> + return true;
> + }
> + }
> +
> + if (TernaryInstruction::isClassOf(*this)) {
> + if (opcode == OP_MAD)
> + return true;
> + }
> + return false;
> + }
>
> bool Instruction::hasSideEffect(void) const {
> return opcode == OP_STORE ||
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index b2b0b49..f710b5e 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -184,10 +184,21 @@ namespace ir {
> RegisterData getDstData(uint32_t ID = 0u) const;
> /*! Get the register of the given destination */
> RegisterData getSrcData(uint32_t ID = 0u) const;
> + /*! whether the instruction directly maps to native instr.
> + * this is used to determine whether we can do aggressive
> + * register coaleascing for source and destination */
> + bool isNativeInstruction() const;
> /*! Set a register in src srcID */
> void setSrc(uint32_t srcID, Register reg);
> /*! Set a register in dst dstID */
> void setDst(uint32_t dstID, Register reg);
> + bool use(Register reg) const {
> + for (uint32_t i = 0; i < getSrcNum(); i++) {
> + if (getSrc(i) == reg)
> + return true;
> + }
> + return false;
> + }
> /*! Is there any side effect in the memory sub-system? */
> bool hasSideEffect(void) const;
> /*! Get / set the parent basic block */ diff --git
> a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 41cb783..7d78163 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2272,6 +2272,64 @@ namespace gbe
> });
> }
>
> + bool useRegisters(const ir::Instruction *inst, std::vector<ir::Register>
> ®s) {
> + if (regs.empty()) return false;
> + for (auto & r : regs) {
> + if (inst->use(r))
> + return true;
> + }
> + return false;
> + }
> + void findPossibleReplaceChain(ir::Liveness &liveness, ir::Function &fn,
> + std::vector<ir::Register> &replaceChain,
> + ir::Register phi, ir::Register phiCopy,
> + const ir::BasicBlock *bb, ir::BasicBlock::const_iterator
> searchEnd) {
> + ir::BasicBlock::const_iterator iter = bb->begin();
> + ir::Register targetReg = phi;
> + const ir::Liveness::LiveOut &out = liveness.getLiveOut(bb);
> +
> + for ( ; iter != searchEnd; ++iter) {
> + const ir::Instruction *inst = iter.node();
> + if (useRegisters(inst, replaceChain)) {
> + replaceChain.clear();
> + // should stop
> + break;
> + }
> + if (inst->use(targetReg)) {
> + if (!inst->isNativeInstruction()) {
> + replaceChain.clear();
> + // we need to stop further coaleascing
> + // in fact we need careful consideration here.
> + // I just break and clear the replaceChain now.
> + break;
> + }
> +
> + const ir::RegisterData oldData = fn.getRegisterData(targetReg);
> + const ir::RegisterData newData = fn.getRegisterData(inst->getDst(0));
> + if(oldData.family != newData.family) {
> + replaceChain.clear();
> + break;
> + }
> + // the value cannot be an liveout
> + if (out.contains(targetReg)) {
> + replaceChain.clear();
> + break;
> + }
> +
> + replaceChain.push_back(targetReg);
> + targetReg = inst->getDst(0);
> + }
> + }
> + // check no use of the registers in replaceChain till the bb->end()
> + if (!replaceChain.empty()) {
> + for (; iter != bb->end(); ++iter) {
> + if (useRegisters(iter.node(), replaceChain)) {
> + replaceChain.clear();
> + }
> + }
> + }
> + }
> +
> void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
> map<ir::Register, ir::Register> &replaceMap,
> map<ir::Register, ir::Register> &redundantPhiCopyMap) @@ -2292,7
> +2350,7 @@ namespace gbe
> const ir::DefSet *phiCopyDef = dag->getRegDef(phiCopy);
> const ir::UseSet *phiUse = dag->getRegUse(phi);
> const DefSet *phiDef = dag->getRegDef(phi);
> - bool isOpt = true;
> + bool coalescePhiPhiCopy = true;
>
> // FIXME, I find under some situation, the phiDef maybe null, seems a
> bug when building FunctionDAg.
> // need fix it there.
> @@ -2307,7 +2365,7 @@ namespace gbe
> // phi & phiCopy are both alive at the endpoint of bb,
> // thus can not be optimized.
> if (out.contains(phi)) {
> - isOpt = false;
> + coalescePhiPhiCopy = false;
> break;
> }
>
> @@ -2333,7 +2391,17 @@ namespace gbe
> // add x2, x, 1
> // mov x-copy, x2
> // obviously x2, x-copy and x2 can be mapped to same virtual register
> -
> + // the situation may be more complex that this example, like
> + // bb2:
> + // mov x, x-copy
> + // add x2, x, 1
> + // mul x3, x2, 2
> + // mov x-copy, x3
> + // that's why we add findPossibleReplaceChain() to solve such case.
> + // that is to replace all registers in the chain (x2, x3) by x-copy.
> + // note I did not merge the new 'ReplaceChain' with previous
> phiCopy/phiCopySrc
> + // coaleascing, the reason is 'ReplaceChain' is very hard to match. But
> + // phiCopy/phiCopySrc is eady to meet. The code here need further
> refine.
> ir::BasicBlock::const_iterator iter =
> ir::BasicBlock::const_iterator(phiCopySrcDefInsn);
> ir::BasicBlock::const_iterator iterE = bb->end();
>
> @@ -2351,6 +2419,12 @@ namespace gbe
> }
> ++iter;
> }
> +
> + // Go through the basic block, to find chain of [phi, ... phiCopy]
> + std::vector<ir::Register> replaceChain;
> + findPossibleReplaceChain(liveness, fn, replaceChain, phi, phiCopy, bb,
> +
> + ir::BasicBlock::const_iterator(phiCopyDefInsn));
> +
> if (!phiPhiCopySrcInterfere) {
> replaceSrc(const_cast<Instruction *>(phiCopyDefInsn), phiCopySrc,
> phiCopy);
>
> @@ -2364,6 +2438,21 @@ namespace gbe
> replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc,
> phiCopy);
> }
> replaceMap.insert(std::make_pair(phiCopySrc, phiCopy));
> +
> + if (!replaceChain.empty()) {
> + for (auto &r : replaceChain) {
> + const ir::DefSet *d = dag->getRegDef(r);
> + const ir::UseSet *u = dag->getRegUse(r);
> + replaceMap.insert(std::make_pair(r, phiCopy));
> +
> + for (auto &dd : *d) {
> + replaceDst(const_cast<Instruction *>(dd->getInstruction()), r,
> phiCopy);
> + }
> + for (auto &uu : *u) {
> + replaceSrc(const_cast<Instruction *>(uu->getInstruction()), r,
> phiCopy);
> + }
> + }
> + }
> }
> }
> } else {
> @@ -2396,7 +2485,7 @@ namespace gbe
> if (&p == phiCopyDefInsn) break;
> // we only care MOV here
> if (p.getSrcNum() == 1 && p.getSrc(0) == phi) {
> - isOpt = false;
> + coalescePhiPhiCopy = false;
> break;
> }
> }
> @@ -2404,14 +2493,14 @@ namespace gbe
> }
>
> // coalease phi and phiCopy
> - if (isOpt) {
> + if (coalescePhiPhiCopy) {
> + replaceMap.insert(std::make_pair(phi, phiCopy));
> for (auto &x : *phiDef) {
> replaceDst(const_cast<Instruction *>(x->getInstruction()), phi,
> phiCopy);
> }
> for (auto &x : *phiUse) {
> const Instruction *phiUseInsn = x->getInstruction();
> replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy);
> - replaceMap.insert(std::make_pair(phi, phiCopy));
> }
> }
> }
> --
> 2.4.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list