[Beignet] [PATCH] do constant folding for kernel struct args

Pan, Xiuli xiuli.pan at intel.com
Thu Jun 8 05:09:19 UTC 2017


Rander seems to have a similar optimization about imm value at sel ir.
If your case here need the optimization done in GEN IR level then  rander's patch may no longer be needed.

-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Guo, Yejun
Sent: Thursday, June 8, 2017 12:41
To: beignet at lists.freedesktop.org
Cc: Guo, Yejun <yejun.guo at intel.com>
Subject: [Beignet] [PATCH] do constant folding for kernel struct args

for the following GEN IR, %41 is kernel argument (struct) the first LOAD will be mov, and the second LOAD will be indirect move (see lowerFunctionArguments). It hurts performance, and even impacts the correctness of reg liveness of indriect mov

LOADI.uint64 %1114 72
ADD.int64 %78 %41 %1114
LOAD.int64.private.aligned {%79} %78 bti:255
LOADI.int64 %1115 8
ADD.int64 %1116 %78 %1115
LOAD.int64.private.aligned {%80} %1116 bti:255

this function folds the constants of 72 and 8 together, and so it will be direct mov.
the GEN IR looks like:
LOADI.int64 %1115 80
ADD.int64 %1116 %41 %1115
---
 backend/src/CMakeLists.txt     |   2 +
 backend/src/ir/constopt.cpp    | 144 +++++++++++++++++++++++++++++++++++++++++
 backend/src/ir/constopt.hpp    |  54 ++++++++++++++++
 backend/src/ir/context.cpp     |   5 ++
 backend/src/ir/instruction.cpp |   7 ++
 backend/src/ir/instruction.hpp |   1 +
 6 files changed, 213 insertions(+)
 create mode 100644 backend/src/ir/constopt.cpp  create mode 100644 backend/src/ir/constopt.hpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index c9ff833..74d7bab 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -73,6 +73,8 @@ set (GBE_SRC
     ir/value.hpp
     ir/lowering.cpp
     ir/lowering.hpp
+    ir/constopt.cpp
+    ir/constopt.hpp
     ir/profiling.cpp
     ir/profiling.hpp
     ir/printf.cpp
diff --git a/backend/src/ir/constopt.cpp b/backend/src/ir/constopt.cpp new file mode 100644 index 0000000..24878b8
--- /dev/null
+++ b/backend/src/ir/constopt.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Guo Yejun <yejun.guo at intel.com>  */
+
+#include <assert.h>
+#include "ir/context.hpp"
+#include "ir/value.hpp"
+#include "ir/constopt.hpp"
+#include "sys/set.hpp"
+
+namespace gbe {
+namespace ir {
+
+  class FunctionStructArgConstOffsetFolder : public Context  {
+  public:
+    /*! Build the helper structure */
+    FunctionStructArgConstOffsetFolder(Unit &unit) : Context(unit) {
+      records.clear();
+      loadImms.clear();
+    }
+    /*! Free everything we needed */
+    virtual ~FunctionStructArgConstOffsetFolder() {
+      for (size_t i = 0; i < records.size(); ++i) {
+        delete records[i];
+      }
+      records.clear();
+      loadImms.clear();
+    }
+    /*! Perform all function arguments substitution if needed */
+    void folding(const std::string &name);
+
+  private:
+    class Record {  //add dst, arg (kernel struct arg base reg), imm_value
+    public:
+      Record(Register dst, Register arg, int64_t immv) :
+                                        _dst(dst), _arg(arg), _immv(immv) { }
+      Register _dst;
+      Register _arg;
+      int64_t _immv;
+    };
+    std::vector<Record*> records;
+    std::map<Register, LoadImmInstruction*> loadImms; //<ir reg, load 
+ reg imm>
+
+    void AddRecord(Register dst, Register arg, int64_t immv) {
+      Record* rec = new Record(dst, arg, immv);
+      records.push_back(rec);
+    }
+  };
+
+  void FunctionStructArgConstOffsetFolder::folding(const std::string &name) {
+    Function *fn = unit.getFunction(name);
+    if (fn == NULL)
+      return;
+
+    const uint32_t argNum = fn->argNum();
+    for (uint32_t argID = 0; argID < argNum; ++argID) {
+      FunctionArgument &arg = fn->getArg(argID);
+      if (arg.type != FunctionArgument::STRUCTURE)
+        continue;
+      AddRecord(arg.reg, arg.reg, 0);
+    }
+
+    fn->foreachInstruction([&](Instruction &insn) {
+      if (insn.getOpcode() == OP_LOADI) {
+        LoadImmInstruction *loadImm = cast<LoadImmInstruction>(&insn);
+        if(!loadImm)
+          return;
+
+        //to avoid regression, limit for the case: LOADI.int64 %1164 32
+        //we can loose the limit if necessary
+        if (loadImm->getImmediate().getType() != TYPE_S64 &&
+            loadImm->getImmediate().getType() != TYPE_U64)
+          return;
+
+        Register dst = insn.getDst();
+        loadImms[dst] = loadImm;
+        return;
+      }
+
+      //we will change imm of loadi directly, so it should not be dst
+      for (size_t i = 0; i < insn.getDstNum(); ++i) {
+        Register dst = insn.getDst(i);
+        assert(loadImms.find(dst) == loadImms.end());
+      }
+
+      if (insn.getOpcode() != OP_ADD)
+        return;
+
+      Register src0 = insn.getSrc(0);
+      Register src1 = insn.getSrc(1);
+      Register dst = insn.getDst();
+
+      //check if src0 is derived from kernel struct arg
+      std::vector<Record*>::iterator it =
+            std::find_if(records.begin(), records.end(), [=](Record* rec){
+                                                            return rec->_dst == src0;
+                                                            } );
+      if (it == records.end())
+        return;
+
+      //check if src1 is imm value
+      if (loadImms.find(src1) == loadImms.end())
+        return;
+
+      Record* rec = *it;
+      LoadImmInstruction *loadImm = loadImms[src1];
+      Immediate imm = loadImm->getImmediate();
+      int64_t newvalue = imm.getIntegerValue() + rec->_immv;
+
+      if (rec->_dst != rec->_arg) {  //directly dervied from arg if they are equal
+        //change src0 to be the kernel struct arg
+        insn.setSrc(0, rec->_arg);
+
+        //change the value of src1
+        ImmediateIndex immIndex = fn->newImmediate(newvalue);
+        loadImm->setImmediateIndex(immIndex);
+      }
+      AddRecord(dst, rec->_arg, newvalue);
+    });
+  }
+
+  void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName) {
+    FunctionStructArgConstOffsetFolder folder(unit);
+    folder.folding(functionName);
+  }
+
+} /* namespace ir */
+}
diff --git a/backend/src/ir/constopt.hpp b/backend/src/ir/constopt.hpp new file mode 100644 index 0000000..f272637
--- /dev/null
+++ b/backend/src/ir/constopt.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Guo Yejun <yejun.guo at intel.com>  */
+
+#ifndef __GBE_IR_CONSTOPT_HPP__
+#define __GBE_IR_CONSTOPT_HPP__
+
+namespace gbe {
+namespace ir {
+
+  // Structure to update
+  class Unit;
+
+  // TODO
+  void foldConstant(Unit &unit, const std::string &functionName);  void 
+ propagateConstant(Unit &unit, const std::string &functionName);
+
+  // for the following GEN IR, %41 is kernel argument (struct)
+  // the first LOAD will be mov, and the second LOAD will be indirect 
+move
+  // (see lowerFunctionArguments). It hurts performance,
+  // and even impacts the correctness of reg liveness of indriect mov
+  //
+  // LOADI.uint64 %1114 72
+  // ADD.int64 %78 %41 %1114
+  // LOAD.int64.private.aligned {%79} %78 bti:255
+  // LOADI.int64 %1115 8
+  // ADD.int64 %1116 %78 %1115
+  // LOAD.int64.private.aligned {%80} %1116 bti:255
+  //
+  // this function folds the constants of 72 and 8 together,
+  // and so it will be direct mov.
+  // the GEN IR looks like:
+  // LOADI.int64 %1115 80
+  // ADD.int64 %1116 %41 %1115
+  void foldFunctionStructArgConstOffset(Unit &unit, const std::string 
+&functionName); } /* namespace ir */ } /* namespace gbe */
+
+#endif /* __GBE_IR_LOWERING_HPP__ */
diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp index e4aac08..f60d33f 100644
--- a/backend/src/ir/context.cpp
+++ b/backend/src/ir/context.cpp
@@ -24,6 +24,7 @@
 #include "ir/context.hpp"
 #include "ir/unit.hpp"
 #include "ir/lowering.hpp"
+#include "ir/constopt.hpp"
 
 namespace gbe {
 namespace ir {
@@ -82,6 +83,10 @@ namespace ir {
     fn->sortLabels();
     fn->computeCFG();
 
+    //TODO: do constant folding and propagation for GEN IR
+    //here as the first step, we just do constant folding for kernel struct args
+    foldFunctionStructArgConstOffset(unit, fn->getName());
+
     // Spill function argument to the stack if required and identify which
     // function arguments can use constant push
     lowerFunctionArguments(unit, fn->getName()); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index a9156ff..48590fd 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -833,6 +833,9 @@ namespace ir {
       INLINE Immediate getImmediate(const Function &fn) const {
         return fn.getImmediate(immediateIndex);
       }
+      INLINE void setImmediateIndex(ImmediateIndex immIndex) {
+        immediateIndex = immIndex;
+      }
       INLINE Type getType(void) const { return this->type; }
       bool wellFormed(const Function &fn, std::string &why) const;
       INLINE void out(std::ostream &out, const Function &fn) const;
@@ -2445,6 +2448,10 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))
     return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn);
   }
 
+  void LoadImmInstruction::setImmediateIndex(ImmediateIndex immIndex) {
+    
+ reinterpret_cast<internal::LoadImmInstruction*>(this)->setImmediateInd
+ ex(immIndex);
+  }
+
   ///////////////////////////////////////////////////////////////////////////
   // Implements the emission functions
   ///////////////////////////////////////////////////////////////////////////
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 8685dd4..05c3e64 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -389,6 +389,7 @@ namespace ir {
   public:
     /*! Return the value stored in the instruction */
     Immediate getImmediate(void) const;
+    void setImmediateIndex(ImmediateIndex immIndex);
     /*! Return the type of the stored value */
     Type getType(void) const;
     /*! Return true if the given instruction is an instance of this class */
--
2.7.4

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list