[Beignet] [PATCH V2 3/3] add local copy propagation optimization for each basic block

Guo Yejun yejun.guo at intel.com
Thu Sep 10 17:50:49 PDT 2015


It is done at selection ir level, it removes MOV instructions
and helps to reduce the register pressure.

For instructions like:
    MOV(8)              %42<2>:UB	:	%53<32,8,4>:UB
    ADD(8)              %43<2>:B	:	%40<16,8,2>:B	-%42<16,8,2>:B
can be optimized as:
    ADD(8)              %43<2>:UB	:	%56<32,8,4>:UB	-%53<32,8,4>:UB

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp         |   4 +
 backend/src/backend/gen_insn_selection.hpp         |   2 +
 .../src/backend/gen_insn_selection_optimize.cpp    | 201 ++++++++++++++++++++-
 3 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 28c7ed2..038b839 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2062,6 +2062,10 @@ namespace gbe
   ///////////////////////////////////////////////////////////////////////////
   // Code selection public implementation
   ///////////////////////////////////////////////////////////////////////////
+  const GenContext& Selection::getCtx()
+  {
+    return this->opaque->ctx;
+  }
 
   Selection::Selection(GenContext &ctx) {
     this->blockList = NULL;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 86542b0..275eb9c 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -271,6 +271,8 @@ namespace gbe
     void optimize(void);
     uint32_t opt_features;
 
+    const GenContext &getCtx();
+
     /*! Use custom allocators */
     GBE_CLASS(Selection);
   };
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index c82fbe5..1c2f559 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -12,38 +12,229 @@
 
 namespace gbe
 {
+  //helper functions
+  static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth)
+  {
+    uint32_t elements = 0;
+    uint32_t elementSize = typeSize(reg.type);
+    uint32_t width = GenRegister::width_size(reg);
+    assert(execWidth >= width);
+    uint32_t height = execWidth / width;
+    uint32_t vstride = GenRegister::vstride_size(reg);
+    uint32_t hstride = GenRegister::hstride_size(reg);
+    uint32_t base = reg.subnr;
+    for (uint32_t i = 0; i < height; ++i) {
+      uint32_t offsetInByte = base;
+      for (uint32_t j = 0; j < width; ++j) {
+        uint32_t offsetInType = offsetInByte / elementSize;
+        elements |= (1 << offsetInType);
+        offsetInByte += hstride * elementSize;
+      }
+      offsetInByte += vstride * elementSize;
+    }
+    return elements;
+  }
 
   class SelOptimizer
   {
   public:
-    SelOptimizer(uint32_t features) : features(features) {}
+    SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), features(features) {}
     virtual void run() = 0;
     virtual ~SelOptimizer() {}
   protected:
+    const GenContext &ctx;      //in case that we need it
     uint32_t features;
   };
 
   class SelBasicBlockOptimizer : public SelOptimizer
   {
   public:
-    SelBasicBlockOptimizer(uint32_t features, SelectionBlock &bb) : SelOptimizer(features), bb(bb) {}
+    SelBasicBlockOptimizer(const GenContext& ctx,
+                           const ir::Liveness::LiveOut& liveout,
+                           uint32_t features,
+                           SelectionBlock &bb) :
+        SelOptimizer(ctx, features), bb(bb), liveout(liveout), optimized(false)
+    {
+    }
     ~SelBasicBlockOptimizer() {}
     virtual void run();
 
   private:
+    // local copy propagation
+    class ReplaceInfo
+    {
+    public:
+      ReplaceInfo(SelectionInstruction& insn,
+                  const GenRegister& intermedia,
+                  const GenRegister& replacement,
+                  uint32_t execWidth) :
+                  insn(insn), intermedia(intermedia), replacement(replacement)
+      {
+        assert(insn.opcode == SEL_OP_MOV);
+        assert(&(insn.src(0)) == &replacement);
+        assert(&(insn.dst(0)) == &intermedia);
+        this->elements = CalculateElements(intermedia, execWidth);
+        replacementChanged = false;
+      }
+      ~ReplaceInfo()
+      {
+        this->toBeReplacements.clear();
+      }
+
+      SelectionInstruction& insn;
+      const GenRegister& intermedia;
+      uint32_t elements;
+      const GenRegister& replacement;
+      set<GenRegister*> toBeReplacements;
+      bool replacementChanged;
+      GBE_CLASS(ReplaceInfo);
+    };
+    set<ReplaceInfo*> replaceInfos;
+    void doLocalCopyPropagation();
+    void addToReplaceInfos(SelectionInstruction& insn);
+    void changeInsideReplaceInfos(const SelectionInstruction& insn, GenRegister& var);
+    void removeFromReplaceInfos(const GenRegister& var);
+    void doReplacement(ReplaceInfo* info);
+    void cleanReplaceInfos();
+    static void propagateRegister(GenRegister& dst, const GenRegister& src);
+
     SelectionBlock &bb;
-    static const size_t MaxTries = 1;   //the times for optimization
+    const ir::Liveness::LiveOut& liveout;
+    bool optimized;
+    static const size_t MaxTries = 1;   //the max times of optimization try
   };
 
+  void SelBasicBlockOptimizer::propagateRegister(GenRegister& dst, const GenRegister& src)
+  {
+    dst.type = src.type;
+    dst.file = src.file;
+    dst.physical = src.physical;
+    dst.subphysical = src.subphysical;
+    dst.value.reg = src.value.reg;
+    dst.vstride = src.vstride;
+    dst.width = src.width;
+    dst.hstride = src.hstride;
+    dst.quarter = src.quarter;
+    dst.nr = src.nr;
+    dst.subnr = src.subnr;
+    dst.address_mode = src.address_mode;
+    dst.a0_subnr = src.a0_subnr;
+    dst.addr_imm = src.addr_imm;
+
+    dst.negation = dst.negation ^ src.negation;
+    dst.absolute = dst.absolute | src.absolute;
+  }
+
+  void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info)
+  {
+    for (GenRegister* reg : info->toBeReplacements) {
+      propagateRegister(*reg, info->replacement);
+    }
+    bb.insnList.erase(&(info->insn));
+  }
+
+  void SelBasicBlockOptimizer::cleanReplaceInfos()
+  {
+    for (ReplaceInfo* info : replaceInfos) {
+      doReplacement(info);
+      delete info;
+    }
+    replaceInfos.clear();
+  }
+
+  void SelBasicBlockOptimizer::removeFromReplaceInfos(const GenRegister& var)
+  {
+    for (set<ReplaceInfo*>::iterator pos = replaceInfos.begin(); pos != replaceInfos.end(); ) {
+      ReplaceInfo* info = *pos;
+      if (info->intermedia.reg() == var.reg()) {
+        if (info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr)
+          doReplacement(info);
+        replaceInfos.erase(pos++);
+        delete info;
+      }
+      else if (info->replacement.reg() == var.reg()) {
+        info->replacementChanged = true;
+        ++pos;
+      }
+      else
+        ++pos;
+    }
+  }
+
+  void SelBasicBlockOptimizer::addToReplaceInfos(SelectionInstruction& insn)
+  {
+    assert(insn.opcode == SEL_OP_MOV);
+    const GenRegister& src = insn.src(0);
+    const GenRegister& dst = insn.dst(0);
+    if (src.type != dst.type || src.file != dst.file)
+      return;
+
+    if (liveout.find(dst.reg()) != liveout.end())
+      return;
+
+    ReplaceInfo* info = new ReplaceInfo(insn, dst, src, insn.state.execWidth);
+    replaceInfos.insert(info);
+  }
+
+  void SelBasicBlockOptimizer::changeInsideReplaceInfos(const SelectionInstruction& insn, GenRegister& var)
+  {
+    for (ReplaceInfo* info : replaceInfos) {
+      if (info->intermedia.reg() == var.reg()) {
+        bool replaceable = false;
+        if (insn.opcode != SEL_OP_BSWAP && !insn.isWrite()) {
+          if (!info->replacementChanged && info->intermedia.type == var.type && info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) {
+            uint32_t elements = CalculateElements(var, insn.state.execWidth);
+            if (info->elements == elements) {
+              info->toBeReplacements.insert(&var);
+              replaceable = true;
+            }
+          }
+        }
+
+        //if we found the same register, but could not be replaced for some reason,
+        //that means we could not remove MOV instruction, and so no replacement,
+        //so we'll remove the info for this case.
+        if (!replaceable) {
+          replaceInfos.erase(info);
+          delete info;
+        }
+        break;
+      }
+    }
+  }
+
+  void SelBasicBlockOptimizer::doLocalCopyPropagation()
+  {
+    for (SelectionInstruction &insn : bb.insnList) {
+      for (uint8_t i = 0; i < insn.srcNum; ++i)
+        changeInsideReplaceInfos(insn, insn.src(i));
+
+      for (uint8_t i = 0; i < insn.dstNum; ++i)
+        removeFromReplaceInfos(insn.dst(i));
+
+      if (insn.opcode == SEL_OP_MOV)
+        addToReplaceInfos(insn);
+    }
+    cleanReplaceInfos();
+  }
+
   void SelBasicBlockOptimizer::run()
   {
+    for (size_t i = 0; i < MaxTries; ++i) {
+      optimized = false;
 
+      doLocalCopyPropagation();
+      //doOtherLocalOptimization();
+
+      if (!optimized)
+        break;      //break since no optimization found at this round
+    }
   }
 
   class SelGlobalOptimizer : public SelOptimizer
   {
   public:
-    SelGlobalOptimizer(uint32_t features) : SelOptimizer(features) {}
+    SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : SelOptimizer(ctx, features) {}
     ~SelGlobalOptimizer() {}
     virtual void run();
   };
@@ -57,7 +248,7 @@ namespace gbe
   {
     //do basic block level optimization
     for (SelectionBlock &block : *blockList) {
-      SelBasicBlockOptimizer bbopt(opt_features, block);
+      SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
       bbopt.run();
     }
 
-- 
1.9.1



More information about the Beignet mailing list