[Beignet] [PATCH V3] backend: add global immediate optimization

rander.wang rander.wang at intel.com
Mon Jun 12 07:10:02 UTC 2017


        there are some global immediates in global var list of LLVM.
        these imm can be integrated in instructions. for compiler_global_immediate_optimized test
        in utest, there are two global immediates:
        L0:
            MOV(1)              %42<0>:UD       :       0x0:UD
            MOV(1)              %43<0>:UD       :       0x30:UD

            used by:
            ADD(16)             %49<1>:D        :       %42<0,1,0>:D    %48<8,8,1>:D
            ADD(16)             %54<1>:D        :       %43<0,1,0>:D    %53<8,8,1>:D

        it can be
            ADD(16)             %49<1>:D    :       %48<8,8,1>:D   0x0:UD
            ADD(16)             %54<1>:D    :       %53<8,8,1>:D   0x30:UD

	Then the MOV can be removed. And after this optimization, ADD 0 can be change
	to MOV, then local copy propagation can be done.

	V2: (1) add environment variable to enable/disable the optimization
	    (2) refine the architecture of imm optimization, inherit from global
                optimizer not local block optimizer

	V3: merge with latest master driver

Signed-off-by: rander.wang <rander.wang at intel.com>
---
 .../src/backend/gen_insn_selection_optimize.cpp    | 356 +++++++++++++++++++--
 1 file changed, 331 insertions(+), 25 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index 07547ec..4d4232f 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -40,6 +40,33 @@ namespace gbe
     return elements;
   }
 
+  class ReplaceInfo
+  {
+  public:
+    ReplaceInfo(SelectionInstruction &insn,
+                const GenRegister &intermedia,
+                const GenRegister &replacement) : insn(insn), intermedia(intermedia), replacement(replacement)
+    {
+      assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD);
+      assert(&(insn.dst(0)) == &intermedia);
+      this->elements = CalculateElements(intermedia, insn.state.execWidth);
+      replacementOverwritten = false;
+    }
+    ~ReplaceInfo()
+    {
+      this->toBeReplaceds.clear();
+    }
+
+    SelectionInstruction &insn;
+    const GenRegister &intermedia;
+    uint32_t elements;
+    const GenRegister &replacement;
+    set<GenRegister *> toBeReplaceds;
+    set<SelectionInstruction*> toBeReplacedInsns;
+    bool replacementOverwritten;
+    GBE_CLASS(ReplaceInfo);
+  };
+
   class SelOptimizer
   {
   public:
@@ -66,32 +93,7 @@ namespace gbe
 
   private:
     // local copy propagation
-    class ReplaceInfo
-    {
-    public:
-      ReplaceInfo(SelectionInstruction& insn,
-                  const GenRegister& intermedia,
-                  const GenRegister& replacement) :
-                  insn(insn), intermedia(intermedia), replacement(replacement)
-      {
-        assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD);
-        assert(&(insn.dst(0)) == &intermedia);
-        this->elements = CalculateElements(intermedia, insn.state.execWidth);
-        replacementOverwritten = false;
-      }
-      ~ReplaceInfo()
-      {
-        this->toBeReplaceds.clear();
-      }
 
-      SelectionInstruction& insn;
-      const GenRegister& intermedia;
-      uint32_t elements;
-      const GenRegister& replacement;
-      set<GenRegister*> toBeReplaceds;
-      bool replacementOverwritten;
-      GBE_CLASS(ReplaceInfo);
-    };
     typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
     ReplaceInfoMap replaceInfoMap;
     void doLocalCopyPropagation();
@@ -298,13 +300,317 @@ namespace gbe
     virtual void run();
   };
 
+  class SelGlobalImmMovOpt : public SelGlobalOptimizer
+  {
+  public:
+    SelGlobalImmMovOpt(const GenContext& ctx, uint32_t features, intrusive_list<SelectionBlock> *blockList) :
+      SelGlobalOptimizer(ctx, features)
+      {
+        mblockList = blockList;
+      }
+
+    virtual void run();
+
+    void addToReplaceInfoMap(SelectionInstruction& insn);
+    void doGlobalCopyPropagation();
+    bool CanBeReplaced(const ReplaceInfo* info, SelectionInstruction& insn, const GenRegister& var);
+    void cleanReplaceInfoMap();
+    void doReplacement(ReplaceInfo* info);
+
+  private:
+    intrusive_list<SelectionBlock> *mblockList;
+
+    typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
+    ReplaceInfoMap replaceInfoMap;
+
+  };
+
+  extern void outputSelectionInst(SelectionInstruction &insn);
+
+  void SelGlobalImmMovOpt::cleanReplaceInfoMap()
+  {
+    for (auto& pair : replaceInfoMap) {
+      ReplaceInfo* info = pair.second;
+      doReplacement(info);
+      delete info;
+    }
+    replaceInfoMap.clear();
+  }
+
+  void SelGlobalImmMovOpt::doReplacement(ReplaceInfo* info)
+  {
+    for (GenRegister* reg : info->toBeReplaceds) {
+      GenRegister::propagateRegister(*reg, info->replacement);
+    }
+
+    //after imm opt, maybe both src are imm, convert it to mov
+    for (SelectionInstruction* insn : info->toBeReplacedInsns) {
+      GenRegister& src0 = insn->src(0);
+      GenRegister& src1 = insn->src(1);
+      if (src0.file == GEN_IMMEDIATE_VALUE)
+      {
+        if (src1.file == GEN_IMMEDIATE_VALUE)
+        {
+          if (src0.type == GEN_TYPE_F)
+          {
+            float s0 = src0.value.f;
+            if (src0.absolute)
+              s0 = fabs(s0);
+            if (src0.negation)
+              s0 = -s0;
+
+            float s1 = src1.value.f;
+            if (src1.absolute)
+              s1 = fabs(s1);
+            if (src1.negation)
+              s1 = -s1;
+
+            float result;
+            switch(insn->opcode)
+            {
+              case SEL_OP_ADD:
+                result = s0 + s1;
+                break;
+              case SEL_OP_MUL:
+                result = s0 * s1;
+                break;
+              default:
+                assert(0);
+                break;
+            }
+
+            insn->src(0) = GenRegister::immf(result);
+          }
+          else if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD)
+          {
+            int s0 = src0.value.d;
+            if (src0.absolute)
+              s0 = fabs(s0);
+            if (src0.negation)
+              s0 = -s0;
+
+            int s1 = src1.value.d;
+            if (src1.absolute)
+              s1 = fabs(s1);
+            if (src1.negation)
+              s1 = -s1;
+
+            int result;
+            switch(insn->opcode)
+            {
+              case SEL_OP_ADD:
+                result = s0 + s1;
+                break;
+              case SEL_OP_MUL:
+                result = s0 * s1;
+                break;
+              case SEL_OP_AND:
+                result = s0 & s1;
+                break;
+              case SEL_OP_OR:
+                result = s0 | s1;
+                break;
+              case SEL_OP_XOR:
+                result = s0 ^ s1;
+                break;
+              default:
+                assert(0);
+                break;
+            }
+
+            insn->src(0) = GenRegister::immd(result);
+          }
+
+          insn->opcode = SEL_OP_MOV;
+          insn->srcNum = 1;
+        }
+        else
+        {
+          //src0 cant be immediate, so exchange with src1
+          GenRegister tmp;
+          tmp = src0;
+          src0 = src1;
+          src1 = tmp;
+        }
+      }
+    }
+
+    info->insn.parent->insnList.erase(&(info->insn));
+  }
+
+  void SelGlobalImmMovOpt::addToReplaceInfoMap(SelectionInstruction& insn)
+  {
+    assert(insn.opcode == SEL_OP_MOV);
+    const GenRegister& src = insn.src(0);
+    const GenRegister& dst = insn.dst(0);
+    if (src.type != dst.type)
+      return;
+
+    ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
+    replaceInfoMap[dst.reg()] = info;
+  }
+
+  bool SelGlobalImmMovOpt::CanBeReplaced(const ReplaceInfo* info, SelectionInstruction& insn, const GenRegister& var)
+  {
+    switch(insn.opcode)
+    {
+      // imm source is supported by these instructions. And
+      // the src operators can be exchange in these instructions
+      case SEL_OP_ADD:
+      case SEL_OP_MUL:
+      case SEL_OP_AND:
+      case SEL_OP_OR:
+      case SEL_OP_XOR:
+        break;
+      default:
+        return false;
+    }
+
+    if(info->intermedia.type != var.type)
+    {
+        bool isOk = false;
+        assert(info->replacement.file == GEN_IMMEDIATE_VALUE);
+        if(var.type == GEN_TYPE_D && info->intermedia.type == GEN_TYPE_UD)
+        {
+          if(info->replacement.value.ud >= 0x80000000)
+            return false;
+          isOk = true;
+        }
+
+        if(var.type == GEN_TYPE_UD && info->intermedia.type == GEN_TYPE_D)
+        {
+          if(info->replacement.value.d < 0)
+            return false;
+          isOk = true;
+        }
+
+        if(var.file == GEN_IMMEDIATE_VALUE)
+          isOk = false;
+
+        if(!isOk) return false;
+    }
+
+    if (info->intermedia.quarter == var.quarter &&
+          info->intermedia.subnr == var.subnr &&
+            info->intermedia.nr == var.nr)
+    {
+      uint32_t elements = CalculateElements(var, insn.state.execWidth);  //considering width, hstrid, vstrid and execWidth
+      if (info->elements != elements)
+        return false;
+    }
+
+#ifdef DEBUG_GLOBAL_IMM_OPT
+    outputSelectionInst(insn);
+#endif
+
+    return true;
+  }
+
+  void SelGlobalImmMovOpt::doGlobalCopyPropagation()
+  {
+    for(SelectionBlock &block : *mblockList)
+    {
+      for (SelectionInstruction &insn :block.insnList)
+      {
+        for (uint8_t i = 0; i < insn.srcNum; ++i)
+        {
+            ReplaceInfoMap::iterator it = replaceInfoMap.find(insn.src(i).reg());
+            if (it != replaceInfoMap.end())
+            {
+              ReplaceInfo *info = it->second;
+              if (CanBeReplaced(info, insn, insn.src(i)))
+              {
+                info->toBeReplaceds.insert(&insn.src(i));
+                info->toBeReplacedInsns.insert(&insn);
+              }
+              else
+              {
+                replaceInfoMap.erase(it);
+                delete info;
+              }
+            }
+        }
+
+        for (uint8_t i = 0; i < insn.dstNum; ++i)
+        {
+          ReplaceInfoMap::iterator it = replaceInfoMap.find(insn.dst(i).reg());
+          if (it != replaceInfoMap.end())
+          {
+            ReplaceInfo *info = it->second;
+            if(&(info->insn) != &insn)
+            {
+              replaceInfoMap.erase(it);
+              delete info;
+            }
+          }
+        }
+      }
+
+      if(replaceInfoMap.empty())
+        break;
+    }
+
+    cleanReplaceInfoMap();
+  }
+
+  void SelGlobalImmMovOpt::run()
+  {
+    bool canbeOpt = false;
+
+    /*global immediates are set in entry block, the following is example of GEN IR
+
+          decl_input.global %41 dst
+            ## 0 output register ##
+            ## 0 pushed register
+            ## 3 blocks ##
+          LABEL $0
+            LOADI.uint32 %42 0
+            LOADI.uint32 %43 48
+
+          LABEL $1
+            MUL.int32 %44 %3   %12
+            ADD.int32 %49 %42 %48
+            ...
+    */
+    SelectionBlock &block = *mblockList->begin();
+    for(SelectionInstruction &insn : block.insnList)
+    {
+        GenRegister src0 = insn.src(0);
+        if(insn.opcode == SEL_OP_MOV &&
+            src0.file == GEN_IMMEDIATE_VALUE &&
+            (src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) &&
+              insn.state.predicate == GEN_PREDICATE_NONE)
+        {
+          addToReplaceInfoMap(insn);
+          canbeOpt = true;
+
+#ifdef DEBUG_GLOBAL_IMM_OPT
+          outputSelectionInst(insn);
+#endif
+        }
+    }
+
+    if(!canbeOpt) return;
+
+    doGlobalCopyPropagation();
+  }
+
   void SelGlobalOptimizer::run()
   {
 
   }
 
+  BVAR(OCL_GLOBAL_IMM_OPTIMIZATION, true);
+
   void Selection::optimize()
   {
+    //do global imm opt first to make more local opt
+    if(OCL_GLOBAL_IMM_OPTIMIZATION)
+    {
+      SelGlobalImmMovOpt gopt(getCtx(), opt_features, blockList);
+      gopt.run();
+    }
+
     //do basic block level optimization
     for (SelectionBlock &block : *blockList) {
       SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
-- 
2.7.4



More information about the Beignet mailing list