[Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination.

Yang, Rong R rong.r.yang at intel.com
Fri Aug 12 06:29:00 UTC 2016


I think this optimization is too special. 
Maybe we could analyze the hole in IR, optimize register allocate to approach the same optimize. It is more common.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Friday, July 22, 2016 15:28
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling <ruiling.song at intel.com>
> Subject: [Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination.
> 
> Take below example:
> bb2:
>     mov x, x-copy
>     add x2, x, 1
>     mul x3, x2, 2
>     mov x-copy, x3
> 
> this is a self-loop block. In previous implementation, we have try our best to
> coaleasce phi/phiCopy/phiCopySrc. But in this example, we can do more, we
> can even coaleasce x2 to x, x-copy and x3. in real world, there may be many
> x2's lies in the use/def chain. So we do some check to see whether we can
> totally coaleasce the use/def chain lies between phi/phiCopySrc. We place
> various careful check in this path. Some may be too strict, so we need further
> refine to this logic.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/ir/instruction.cpp        |  18 ++++++
>  backend/src/ir/instruction.hpp        |  11 ++++
>  backend/src/llvm/llvm_gen_backend.cpp | 101
> ++++++++++++++++++++++++++++++++--
>  3 files changed, 124 insertions(+), 6 deletions(-)
> 
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index ed64580..a5bf89f 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -2292,6 +2292,24 @@ END_FUNCTION(Instruction, Register)
>      if (new_ins)
>        *new_ins = insn;
>    }
> +  bool Instruction::isNativeInstruction() const {
> +    if (BinaryInstruction::isClassOf(*this)) {
> +      //const BinaryInstruction * const insn = cast<BinaryInstruction *>(this);
> +      const BinaryInstruction &insn = cast<BinaryInstruction>(*this);
> +      if (insn.getType() == ir::TYPE_U32 ||
> +          insn.getType() == ir::TYPE_S32 ||
> +          insn.getType() == ir::TYPE_FLOAT) {
> +        if (opcode == OP_MUL || opcode == OP_ADD || opcode == OP_MOV)
> +          return true;
> +      }
> +    }
> +
> +    if (TernaryInstruction::isClassOf(*this)) {
> +      if (opcode == OP_MAD)
> +        return true;
> +    }
> +    return false;
> +  }
> 
>    bool Instruction::hasSideEffect(void) const {
>      return opcode == OP_STORE ||
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index b2b0b49..f710b5e 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -184,10 +184,21 @@ namespace ir {
>      RegisterData getDstData(uint32_t ID = 0u) const;
>      /*! Get the register of the given destination */
>      RegisterData getSrcData(uint32_t ID = 0u) const;
> +    /*! whether the instruction directly maps to native instr.
> +     *  this is used to determine whether we can do aggressive
> +     *  register coaleascing for source and destination */
> +    bool isNativeInstruction() const;
>      /*! Set a register in src srcID */
>      void setSrc(uint32_t srcID, Register reg);
>      /*! Set a register in dst dstID */
>      void setDst(uint32_t dstID, Register reg);
> +    bool use(Register reg) const {
> +      for (uint32_t i = 0; i < getSrcNum(); i++) {
> +        if (getSrc(i) == reg)
> +          return true;
> +      }
> +      return false;
> +    }
>      /*! Is there any side effect in the memory sub-system? */
>      bool hasSideEffect(void) const;
>      /*! Get / set the parent basic block */ diff --git
> a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 41cb783..7d78163 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2272,6 +2272,64 @@ namespace gbe
>      });
>    }
> 
> +  bool useRegisters(const ir::Instruction *inst, std::vector<ir::Register>
> &regs) {
> +    if (regs.empty()) return false;
> +    for (auto & r : regs) {
> +      if (inst->use(r))
> +        return true;
> +    }
> +    return false;
> +  }
> +  void findPossibleReplaceChain(ir::Liveness &liveness, ir::Function &fn,
> +                           std::vector<ir::Register> &replaceChain,
> +                           ir::Register phi, ir::Register phiCopy,
> +                           const ir::BasicBlock *bb, ir::BasicBlock::const_iterator
> searchEnd) {
> +    ir::BasicBlock::const_iterator iter = bb->begin();
> +    ir::Register targetReg = phi;
> +    const ir::Liveness::LiveOut &out = liveness.getLiveOut(bb);
> +
> +    for ( ; iter != searchEnd; ++iter) {
> +      const ir::Instruction *inst = iter.node();
> +      if (useRegisters(inst, replaceChain)) {
> +        replaceChain.clear();
> +        // should stop
> +        break;
> +      }
> +      if (inst->use(targetReg)) {
> +        if (!inst->isNativeInstruction()) {
> +          replaceChain.clear();
> +          // we need to stop further coaleascing
> +          // in fact we need careful consideration here.
> +          // I just break and clear the replaceChain now.
> +          break;
> +        }
> +
> +        const ir::RegisterData oldData = fn.getRegisterData(targetReg);
> +        const ir::RegisterData newData = fn.getRegisterData(inst->getDst(0));
> +        if(oldData.family != newData.family) {
> +          replaceChain.clear();
> +          break;
> +        }
> +        // the value cannot be an liveout
> +        if (out.contains(targetReg)) {
> +          replaceChain.clear();
> +          break;
> +        }
> +
> +        replaceChain.push_back(targetReg);
> +        targetReg = inst->getDst(0);
> +      }
> +    }
> +    // check no use of the registers in replaceChain till the bb->end()
> +    if (!replaceChain.empty()) {
> +      for (; iter != bb->end(); ++iter) {
> +        if (useRegisters(iter.node(), replaceChain)) {
> +          replaceChain.clear();
> +        }
> +      }
> +    }
> +  }
> +
>    void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
>            map<ir::Register, ir::Register> &replaceMap,
>            map<ir::Register, ir::Register> &redundantPhiCopyMap) @@ -2292,7
> +2350,7 @@ namespace gbe
>        const ir::DefSet *phiCopyDef = dag->getRegDef(phiCopy);
>        const ir::UseSet *phiUse = dag->getRegUse(phi);
>        const DefSet *phiDef = dag->getRegDef(phi);
> -      bool isOpt = true;
> +      bool coalescePhiPhiCopy = true;
> 
>        // FIXME, I find under some situation, the phiDef maybe null, seems a
> bug when building FunctionDAg.
>        // need fix it there.
> @@ -2307,7 +2365,7 @@ namespace gbe
>          // phi & phiCopy are both alive at the endpoint of bb,
>          // thus can not be optimized.
>          if (out.contains(phi)) {
> -          isOpt = false;
> +          coalescePhiPhiCopy = false;
>            break;
>          }
> 
> @@ -2333,7 +2391,17 @@ namespace gbe
>              //    add x2, x, 1
>              //    mov x-copy, x2
>              //  obviously x2, x-copy and x2 can be mapped to same virtual register
> -
> +            //  the situation may be more complex that this example, like
> +            // bb2:
> +            //    mov x, x-copy
> +            //    add x2, x, 1
> +            //    mul x3, x2, 2
> +            //    mov x-copy, x3
> +            // that's why we add findPossibleReplaceChain() to solve such case.
> +            // that is to replace all registers in the chain (x2, x3) by x-copy.
> +            // note I did not merge the new 'ReplaceChain' with previous
> phiCopy/phiCopySrc
> +            // coaleascing, the reason is 'ReplaceChain' is very hard to match. But
> +            // phiCopy/phiCopySrc is eady to meet. The code here need further
> refine.
>              ir::BasicBlock::const_iterator iter =
> ir::BasicBlock::const_iterator(phiCopySrcDefInsn);
>              ir::BasicBlock::const_iterator iterE = bb->end();
> 
> @@ -2351,6 +2419,12 @@ namespace gbe
>                }
>                ++iter;
>              }
> +
> +            // Go through the basic block, to find chain of [phi, ... phiCopy]
> +            std::vector<ir::Register> replaceChain;
> +            findPossibleReplaceChain(liveness, fn, replaceChain, phi, phiCopy, bb,
> +
> + ir::BasicBlock::const_iterator(phiCopyDefInsn));
> +
>              if (!phiPhiCopySrcInterfere) {
>                replaceSrc(const_cast<Instruction *>(phiCopyDefInsn), phiCopySrc,
> phiCopy);
> 
> @@ -2364,6 +2438,21 @@ namespace gbe
>                  replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc,
> phiCopy);
>                }
>                replaceMap.insert(std::make_pair(phiCopySrc, phiCopy));
> +
> +              if (!replaceChain.empty()) {
> +                for (auto &r : replaceChain) {
> +                  const ir::DefSet *d = dag->getRegDef(r);
> +                  const ir::UseSet *u = dag->getRegUse(r);
> +                  replaceMap.insert(std::make_pair(r, phiCopy));
> +
> +                  for (auto &dd : *d) {
> +                    replaceDst(const_cast<Instruction *>(dd->getInstruction()), r,
> phiCopy);
> +                  }
> +                  for (auto &uu : *u) {
> +                    replaceSrc(const_cast<Instruction *>(uu->getInstruction()), r,
> phiCopy);
> +                  }
> +                }
> +              }
>              }
>            }
>          } else {
> @@ -2396,7 +2485,7 @@ namespace gbe
>              if (&p == phiCopyDefInsn) break;
>              // we only care MOV here
>              if (p.getSrcNum() == 1 && p.getSrc(0) == phi) {
> -              isOpt = false;
> +              coalescePhiPhiCopy = false;
>                break;
>              }
>            }
> @@ -2404,14 +2493,14 @@ namespace gbe
>        }
> 
>        // coalease phi and phiCopy
> -      if (isOpt) {
> +      if (coalescePhiPhiCopy) {
> +        replaceMap.insert(std::make_pair(phi, phiCopy));
>          for (auto &x : *phiDef) {
>            replaceDst(const_cast<Instruction *>(x->getInstruction()), phi,
> phiCopy);
>          }
>          for (auto &x : *phiUse) {
>            const Instruction *phiUseInsn = x->getInstruction();
>            replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy);
> -          replaceMap.insert(std::make_pair(phi, phiCopy));
>          }
>        }
>      }
> --
> 2.4.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list