[Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination.

Ruiling Song ruiling.song at intel.com
Fri Jul 22 07:28:28 UTC 2016


Take below example:
bb2:
    mov x, x-copy
    add x2, x, 1
    mul x3, x2, 2
    mov x-copy, x3

this is a self-loop block. In previous implementation, we have try our
best to coaleasce phi/phiCopy/phiCopySrc. But in this example, we can
do more, we can even coaleasce x2 to x, x-copy and x3. in real world,
there may be many x2's lies in the use/def chain. So we do some check
to see whether we can totally coaleasce the use/def chain lies between
phi/phiCopySrc. We place various careful check in this path. Some may
be too strict, so we need further refine to this logic.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/ir/instruction.cpp        |  18 ++++++
 backend/src/ir/instruction.hpp        |  11 ++++
 backend/src/llvm/llvm_gen_backend.cpp | 101 ++++++++++++++++++++++++++++++++--
 3 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index ed64580..a5bf89f 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -2292,6 +2292,24 @@ END_FUNCTION(Instruction, Register)
     if (new_ins)
       *new_ins = insn;
   }
+  bool Instruction::isNativeInstruction() const {
+    if (BinaryInstruction::isClassOf(*this)) {
+      //const BinaryInstruction * const insn = cast<BinaryInstruction *>(this);
+      const BinaryInstruction &insn = cast<BinaryInstruction>(*this);
+      if (insn.getType() == ir::TYPE_U32 ||
+          insn.getType() == ir::TYPE_S32 ||
+          insn.getType() == ir::TYPE_FLOAT) {
+        if (opcode == OP_MUL || opcode == OP_ADD || opcode == OP_MOV)
+          return true;
+      }
+    }
+
+    if (TernaryInstruction::isClassOf(*this)) {
+      if (opcode == OP_MAD)
+        return true;
+    }
+    return false;
+  }
 
   bool Instruction::hasSideEffect(void) const {
     return opcode == OP_STORE ||
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index b2b0b49..f710b5e 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -184,10 +184,21 @@ namespace ir {
     RegisterData getDstData(uint32_t ID = 0u) const;
     /*! Get the register of the given destination */
     RegisterData getSrcData(uint32_t ID = 0u) const;
+    /*! whether the instruction directly maps to native instr.
+     *  this is used to determine whether we can do aggressive
+     *  register coaleascing for source and destination */
+    bool isNativeInstruction() const;
     /*! Set a register in src srcID */
     void setSrc(uint32_t srcID, Register reg);
     /*! Set a register in dst dstID */
     void setDst(uint32_t dstID, Register reg);
+    bool use(Register reg) const {
+      for (uint32_t i = 0; i < getSrcNum(); i++) {
+        if (getSrc(i) == reg)
+          return true;
+      }
+      return false;
+    }
     /*! Is there any side effect in the memory sub-system? */
     bool hasSideEffect(void) const;
     /*! Get / set the parent basic block */
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 41cb783..7d78163 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2272,6 +2272,64 @@ namespace gbe
     });
   }
 
+  bool useRegisters(const ir::Instruction *inst, std::vector<ir::Register> &regs) {
+    if (regs.empty()) return false;
+    for (auto & r : regs) {
+      if (inst->use(r))
+        return true;
+    }
+    return false;
+  }
+  void findPossibleReplaceChain(ir::Liveness &liveness, ir::Function &fn,
+                           std::vector<ir::Register> &replaceChain,
+                           ir::Register phi, ir::Register phiCopy,
+                           const ir::BasicBlock *bb, ir::BasicBlock::const_iterator searchEnd) {
+    ir::BasicBlock::const_iterator iter = bb->begin();
+    ir::Register targetReg = phi;
+    const ir::Liveness::LiveOut &out = liveness.getLiveOut(bb);
+
+    for ( ; iter != searchEnd; ++iter) {
+      const ir::Instruction *inst = iter.node();
+      if (useRegisters(inst, replaceChain)) {
+        replaceChain.clear();
+        // should stop
+        break;
+      }
+      if (inst->use(targetReg)) {
+        if (!inst->isNativeInstruction()) {
+          replaceChain.clear();
+          // we need to stop further coaleascing
+          // in fact we need careful consideration here.
+          // I just break and clear the replaceChain now.
+          break;
+        }
+
+        const ir::RegisterData oldData = fn.getRegisterData(targetReg);
+        const ir::RegisterData newData = fn.getRegisterData(inst->getDst(0));
+        if(oldData.family != newData.family) {
+          replaceChain.clear();
+          break;
+        }
+        // the value cannot be an liveout
+        if (out.contains(targetReg)) {
+          replaceChain.clear();
+          break;
+        }
+
+        replaceChain.push_back(targetReg);
+        targetReg = inst->getDst(0);
+      }
+    }
+    // check no use of the registers in replaceChain till the bb->end()
+    if (!replaceChain.empty()) {
+      for (; iter != bb->end(); ++iter) {
+        if (useRegisters(iter.node(), replaceChain)) {
+          replaceChain.clear();
+        }
+      }
+    }
+  }
+
   void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
           map<ir::Register, ir::Register> &replaceMap,
           map<ir::Register, ir::Register> &redundantPhiCopyMap)
@@ -2292,7 +2350,7 @@ namespace gbe
       const ir::DefSet *phiCopyDef = dag->getRegDef(phiCopy);
       const ir::UseSet *phiUse = dag->getRegUse(phi);
       const DefSet *phiDef = dag->getRegDef(phi);
-      bool isOpt = true;
+      bool coalescePhiPhiCopy = true;
 
       // FIXME, I find under some situation, the phiDef maybe null, seems a bug when building FunctionDAg.
       // need fix it there.
@@ -2307,7 +2365,7 @@ namespace gbe
         // phi & phiCopy are both alive at the endpoint of bb,
         // thus can not be optimized.
         if (out.contains(phi)) {
-          isOpt = false;
+          coalescePhiPhiCopy = false;
           break;
         }
 
@@ -2333,7 +2391,17 @@ namespace gbe
             //    add x2, x, 1
             //    mov x-copy, x2
             //  obviously x2, x-copy and x2 can be mapped to same virtual register
-
+            //  the situation may be more complex that this example, like
+            // bb2:
+            //    mov x, x-copy
+            //    add x2, x, 1
+            //    mul x3, x2, 2
+            //    mov x-copy, x3
+            // that's why we add findPossibleReplaceChain() to solve such case.
+            // that is to replace all registers in the chain (x2, x3) by x-copy.
+            // note I did not merge the new 'ReplaceChain' with previous phiCopy/phiCopySrc
+            // coaleascing, the reason is 'ReplaceChain' is very hard to match. But
+            // phiCopy/phiCopySrc is eady to meet. The code here need further refine.
             ir::BasicBlock::const_iterator iter = ir::BasicBlock::const_iterator(phiCopySrcDefInsn);
             ir::BasicBlock::const_iterator iterE = bb->end();
 
@@ -2351,6 +2419,12 @@ namespace gbe
               }
               ++iter;
             }
+
+            // Go through the basic block, to find chain of [phi, ... phiCopy]
+            std::vector<ir::Register> replaceChain;
+            findPossibleReplaceChain(liveness, fn, replaceChain, phi, phiCopy, bb,
+                                     ir::BasicBlock::const_iterator(phiCopyDefInsn));
+
             if (!phiPhiCopySrcInterfere) {
               replaceSrc(const_cast<Instruction *>(phiCopyDefInsn), phiCopySrc, phiCopy);
 
@@ -2364,6 +2438,21 @@ namespace gbe
                 replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc, phiCopy);
               }
               replaceMap.insert(std::make_pair(phiCopySrc, phiCopy));
+
+              if (!replaceChain.empty()) {
+                for (auto &r : replaceChain) {
+                  const ir::DefSet *d = dag->getRegDef(r);
+                  const ir::UseSet *u = dag->getRegUse(r);
+                  replaceMap.insert(std::make_pair(r, phiCopy));
+
+                  for (auto &dd : *d) {
+                    replaceDst(const_cast<Instruction *>(dd->getInstruction()), r, phiCopy);
+                  }
+                  for (auto &uu : *u) {
+                    replaceSrc(const_cast<Instruction *>(uu->getInstruction()), r, phiCopy);
+                  }
+                }
+              }
             }
           }
         } else {
@@ -2396,7 +2485,7 @@ namespace gbe
             if (&p == phiCopyDefInsn) break;
             // we only care MOV here
             if (p.getSrcNum() == 1 && p.getSrc(0) == phi) {
-              isOpt = false;
+              coalescePhiPhiCopy = false;
               break;
             }
           }
@@ -2404,14 +2493,14 @@ namespace gbe
       }
 
       // coalease phi and phiCopy
-      if (isOpt) {
+      if (coalescePhiPhiCopy) {
+        replaceMap.insert(std::make_pair(phi, phiCopy));
         for (auto &x : *phiDef) {
           replaceDst(const_cast<Instruction *>(x->getInstruction()), phi, phiCopy);
         }
         for (auto &x : *phiUse) {
           const Instruction *phiUseInsn = x->getInstruction();
           replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy);
-          replaceMap.insert(std::make_pair(phi, phiCopy));
         }
       }
     }
-- 
2.4.1



More information about the Beignet mailing list