[Beignet] [PATCH 3/3] add optimization for local copy propagation

Guo Yejun yejun.guo at intel.com
Sun Sep 6 14:28:09 PDT 2015


it is done at selection ir level, for instructions like:
    MOV(8)              %42<2>:UB	:	%53<32,8,4>:UB
    ADD(8)              %43<2>:B	:	%40<16,8,2>:B	-%42<16,8,2>:B
can be optimized as:
    MOV(8)              %42<2>:UB	:	%53<32,8,4>:UB
    ADD(8)              %43<2>:UB	:	%56<32,8,4>:UB	-%53<32,8,4>:UB

the optimization is done for each basic block, we here can not remove
instruction "MOV %42 ..." since it is possible that %42 could be used
at other place in the same block or even in other blocks. We need to
add another path such as dead code elimination at global level to remove it.

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 .../src/backend/gen_insn_selection_optimize.cpp    | 145 +++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index c82fbe5..b196a4d 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -20,6 +20,40 @@ namespace gbe
     virtual void run() = 0;
     virtual ~SelOptimizer() {}
   protected:
+    //we need info derived from execWdith, but it is not contained inside GenRegister
+    class ExpandedRegister
+    {
+    public:
+      ExpandedRegister(const GenRegister& reg, uint32_t execWidth) : genreg(reg)
+      {
+        elements = CalculateElements(reg, execWidth);
+      }
+      ~ExpandedRegister() {}
+      static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth)
+      {
+        uint32_t elements = 0;
+        uint32_t elementSize = typeSize(reg.type);
+        uint32_t width = GenRegister::width_size(reg);
+        assert(execWidth >= width);
+        uint32_t height = execWidth / width;
+        uint32_t vstride = GenRegister::vstride_size(reg);
+        uint32_t hstride = GenRegister::hstride_size(reg);
+        uint32_t base = reg.subnr;
+        for (uint32_t i = 0; i < height; ++i) {
+          uint32_t offsetInByte = base;
+          for (uint32_t j = 0; j < width; ++j) {
+            uint32_t offsetInType = offsetInByte / elementSize;
+            elements |= (1 << offsetInType);            //right for dest register?
+            offsetInByte += hstride * elementSize;
+          }
+          offsetInByte += vstride * elementSize;
+        }
+        return elements;
+      }
+      const GenRegister& genreg;
+      uint32_t elements;
+    };
+
     uint32_t features;
   };
 
@@ -31,13 +65,124 @@ namespace gbe
     virtual void run();
 
   private:
+    // local copy propagation
+    typedef std::map<const ExpandedRegister*, const GenRegister*> RegisterMap;
+    static bool replaceWithCopytable(const RegisterMap& copytable, uint32_t execWidth, GenRegister& var);
+    static void addToCopytable(RegisterMap& copytable, uint32_t execWidth, const GenRegister& src, const GenRegister& dst);
+    static void removeFromCopytable(RegisterMap& copytable, const GenRegister& var);
+    static void cleanCopytable(RegisterMap& copytable);
+    static void propagateRegister(GenRegister& dst, const GenRegister& src);
+    bool doLocalCopyPropagation();
+
     SelectionBlock &bb;
     static const size_t MaxTries = 1;   //the times for optimization
   };
 
+  void SelBasicBlockOptimizer::propagateRegister(GenRegister& dst, const GenRegister& src)
+  {
+    dst.type = src.type;
+    dst.file = src.file;
+    dst.physical = src.physical;
+    dst.subphysical = src.subphysical;
+    dst.value.reg = src.value.reg;
+    dst.vstride = src.vstride;
+    dst.width = src.width;
+    dst.hstride = src.hstride;
+    dst.quarter = src.quarter;
+    dst.nr = src.nr;
+    dst.subnr = src.subnr;
+    dst.address_mode = src.address_mode;
+    dst.a0_subnr = src.a0_subnr;
+    dst.addr_imm = src.addr_imm;
+
+    dst.negation = dst.negation ^ src.negation;
+    dst.absolute = dst.absolute | src.absolute;
+  }
+
+  void SelBasicBlockOptimizer::cleanCopytable(RegisterMap& copytable)
+  {
+    for (RegisterMap::const_iterator pos = copytable.begin(); pos != copytable.end(); ++pos) {
+      const ExpandedRegister* key = pos->first;
+      delete key;
+    }
+    copytable.clear();
+  }
+
+  void SelBasicBlockOptimizer::removeFromCopytable(RegisterMap& copytable, const GenRegister& var)
+  {
+    for (RegisterMap::const_iterator pos = copytable.begin(); pos != copytable.end(); ) {
+      if (pos->first->genreg.reg() == var.reg() || pos->second->reg() == var.reg()) {
+        const ExpandedRegister* key = pos->first;
+        copytable.erase(pos++);
+	delete key;
+      }
+      else
+        ++pos;
+    }
+  }
+
+  void SelBasicBlockOptimizer::addToCopytable(RegisterMap& copytable, uint32_t execWidth, const GenRegister& src, const GenRegister& dst)
+  {
+    if (src.type != dst.type || src.file != dst.file)
+      return;
+
+    ExpandedRegister* expanded = new ExpandedRegister(dst, execWidth);
+    copytable[expanded] = &src;
+  }
+
+  bool SelBasicBlockOptimizer::replaceWithCopytable(const RegisterMap& copytable, uint32_t execWidth, GenRegister& var)
+  {
+    bool replaced = false;
+    for (RegisterMap::const_iterator pos = copytable.begin(); pos != copytable.end(); ++pos) {
+      const ExpandedRegister* key = pos->first;
+      if (key->genreg.reg() == var.reg()) {
+        if (key->genreg.type == var.type) {
+          uint32_t elements = ExpandedRegister::CalculateElements(var, execWidth);
+          if (key->elements == elements) {
+            propagateRegister(var, *(pos->second));
+            replaced = true;
+          }
+        }
+        break;
+      }
+    }
+    return replaced;
+  }
+
+  bool SelBasicBlockOptimizer::doLocalCopyPropagation()
+  {
+    bool optimized = false;
+    RegisterMap copytable;
+    for (SelectionInstruction &insn : bb.insnList) {
+      uint32_t execWidth = insn.state.execWidth;
+      if (insn.opcode != SEL_OP_BSWAP && !insn.isWrite()) {
+        for (uint8_t i = 0; i < insn.srcNum; ++i) {
+          bool replaced = replaceWithCopytable(copytable, execWidth, insn.src(i));
+          optimized = optimized || replaced;
+        }
+      }
+
+      for (uint8_t i = 0; i < insn.dstNum; ++i)
+        removeFromCopytable(copytable, insn.dst(i));
+
+      if (insn.opcode == SEL_OP_MOV)
+        addToCopytable(copytable, execWidth, insn.src(0), insn.dst(0));
+    }
+    cleanCopytable(copytable);
+    return optimized;
+  }
+
   void SelBasicBlockOptimizer::run()
   {
+    for (size_t i = 0; i < MaxTries; ++i) {
+      bool again = false;
+      again = again || doLocalCopyPropagation();
+
+      //again = again || doOtherLocalOptimization();
 
+      if (!again)
+        break;      //break since no optimization found at this round
+    }
   }
 
   class SelGlobalOptimizer : public SelOptimizer
-- 
1.9.1



More information about the Beignet mailing list