[Beignet] [PATCH] do constant folding for kernel struct args

Guo, Yejun yejun.guo at intel.com
Thu Jun 8 04:40:58 UTC 2017


for the following GEN IR, %41 is kernel argument (struct)
the first LOAD will be mov, and the second LOAD will be indirect move
(see lowerFunctionArguments). It hurts performance,
and even impacts the correctness of reg liveness of indriect mov

LOADI.uint64 %1114 72
ADD.int64 %78 %41 %1114
LOAD.int64.private.aligned {%79} %78 bti:255
LOADI.int64 %1115 8
ADD.int64 %1116 %78 %1115
LOAD.int64.private.aligned {%80} %1116 bti:255

this function folds the constants of 72 and 8 together,
and so it will be direct mov.
the GEN IR looks like:
LOADI.int64 %1115 80
ADD.int64 %1116 %41 %1115
---
 backend/src/CMakeLists.txt     |   2 +
 backend/src/ir/constopt.cpp    | 144 +++++++++++++++++++++++++++++++++++++++++
 backend/src/ir/constopt.hpp    |  54 ++++++++++++++++
 backend/src/ir/context.cpp     |   5 ++
 backend/src/ir/instruction.cpp |   7 ++
 backend/src/ir/instruction.hpp |   1 +
 6 files changed, 213 insertions(+)
 create mode 100644 backend/src/ir/constopt.cpp
 create mode 100644 backend/src/ir/constopt.hpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index c9ff833..74d7bab 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -73,6 +73,8 @@ set (GBE_SRC
     ir/value.hpp
     ir/lowering.cpp
     ir/lowering.hpp
+    ir/constopt.cpp
+    ir/constopt.hpp
     ir/profiling.cpp
     ir/profiling.hpp
     ir/printf.cpp
diff --git a/backend/src/ir/constopt.cpp b/backend/src/ir/constopt.cpp
new file mode 100644
index 0000000..24878b8
--- /dev/null
+++ b/backend/src/ir/constopt.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Guo Yejun <yejun.guo at intel.com>
+ */
+
+#include <assert.h>
+#include "ir/context.hpp"
+#include "ir/value.hpp"
+#include "ir/constopt.hpp"
+#include "sys/set.hpp"
+
+namespace gbe {
+namespace ir {
+
+  class FunctionStructArgConstOffsetFolder : public Context
+  {
+  public:
+    /*! Build the helper structure */
+    FunctionStructArgConstOffsetFolder(Unit &unit) : Context(unit) {
+      records.clear();
+      loadImms.clear();
+    }
+    /*! Free everything we needed */
+    virtual ~FunctionStructArgConstOffsetFolder() {
+      for (size_t i = 0; i < records.size(); ++i) {
+        delete records[i];
+      }
+      records.clear();
+      loadImms.clear();
+    }
+    /*! Perform all function arguments substitution if needed */
+    void folding(const std::string &name);
+
+  private:
+    class Record {  //add dst, arg (kernel struct arg base reg), imm_value
+    public:
+      Record(Register dst, Register arg, int64_t immv) :
+                                        _dst(dst), _arg(arg), _immv(immv) { }
+      Register _dst;
+      Register _arg;
+      int64_t _immv;
+    };
+    std::vector<Record*> records;
+    std::map<Register, LoadImmInstruction*> loadImms; //<ir reg, load reg imm>
+
+    void AddRecord(Register dst, Register arg, int64_t immv) {
+      Record* rec = new Record(dst, arg, immv);
+      records.push_back(rec);
+    }
+  };
+
+  void FunctionStructArgConstOffsetFolder::folding(const std::string &name) {
+    Function *fn = unit.getFunction(name);
+    if (fn == NULL)
+      return;
+
+    const uint32_t argNum = fn->argNum();
+    for (uint32_t argID = 0; argID < argNum; ++argID) {
+      FunctionArgument &arg = fn->getArg(argID);
+      if (arg.type != FunctionArgument::STRUCTURE)
+        continue;
+      AddRecord(arg.reg, arg.reg, 0);
+    }
+
+    fn->foreachInstruction([&](Instruction &insn) {
+      if (insn.getOpcode() == OP_LOADI) {
+        LoadImmInstruction *loadImm = cast<LoadImmInstruction>(&insn);
+        if(!loadImm)
+          return;
+
+        //to avoid regression, limit for the case: LOADI.int64 %1164 32
+        //we can loose the limit if necessary
+        if (loadImm->getImmediate().getType() != TYPE_S64 &&
+            loadImm->getImmediate().getType() != TYPE_U64)
+          return;
+
+        Register dst = insn.getDst();
+        loadImms[dst] = loadImm;
+        return;
+      }
+
+      //we will change imm of loadi directly, so it should not be dst
+      for (size_t i = 0; i < insn.getDstNum(); ++i) {
+        Register dst = insn.getDst(i);
+        assert(loadImms.find(dst) == loadImms.end());
+      }
+
+      if (insn.getOpcode() != OP_ADD)
+        return;
+
+      Register src0 = insn.getSrc(0);
+      Register src1 = insn.getSrc(1);
+      Register dst = insn.getDst();
+
+      //check if src0 is derived from kernel struct arg
+      std::vector<Record*>::iterator it =
+            std::find_if(records.begin(), records.end(), [=](Record* rec){
+                                                            return rec->_dst == src0;
+                                                            } );
+      if (it == records.end())
+        return;
+
+      //check if src1 is imm value
+      if (loadImms.find(src1) == loadImms.end())
+        return;
+
+      Record* rec = *it;
+      LoadImmInstruction *loadImm = loadImms[src1];
+      Immediate imm = loadImm->getImmediate();
+      int64_t newvalue = imm.getIntegerValue() + rec->_immv;
+
+      if (rec->_dst != rec->_arg) {  //directly dervied from arg if they are equal
+        //change src0 to be the kernel struct arg
+        insn.setSrc(0, rec->_arg);
+
+        //change the value of src1
+        ImmediateIndex immIndex = fn->newImmediate(newvalue);
+        loadImm->setImmediateIndex(immIndex);
+      }
+      AddRecord(dst, rec->_arg, newvalue);
+    });
+  }
+
+  void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName) {
+    FunctionStructArgConstOffsetFolder folder(unit);
+    folder.folding(functionName);
+  }
+
+} /* namespace ir */
+}
diff --git a/backend/src/ir/constopt.hpp b/backend/src/ir/constopt.hpp
new file mode 100644
index 0000000..f272637
--- /dev/null
+++ b/backend/src/ir/constopt.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Guo Yejun <yejun.guo at intel.com>
+ */
+
+#ifndef __GBE_IR_CONSTOPT_HPP__
+#define __GBE_IR_CONSTOPT_HPP__
+
+namespace gbe {
+namespace ir {
+
+  // Structure to update
+  class Unit;
+
+  // TODO
+  void foldConstant(Unit &unit, const std::string &functionName);
+  void propagateConstant(Unit &unit, const std::string &functionName);
+
+  // for the following GEN IR, %41 is kernel argument (struct)
+  // the first LOAD will be mov, and the second LOAD will be indirect move
+  // (see lowerFunctionArguments). It hurts performance,
+  // and even impacts the correctness of reg liveness of indriect mov
+  //
+  // LOADI.uint64 %1114 72
+  // ADD.int64 %78 %41 %1114
+  // LOAD.int64.private.aligned {%79} %78 bti:255
+  // LOADI.int64 %1115 8
+  // ADD.int64 %1116 %78 %1115
+  // LOAD.int64.private.aligned {%80} %1116 bti:255
+  //
+  // this function folds the constants of 72 and 8 together,
+  // and so it will be direct mov.
+  // the GEN IR looks like:
+  // LOADI.int64 %1115 80
+  // ADD.int64 %1116 %41 %1115
+  void foldFunctionStructArgConstOffset(Unit &unit, const std::string &functionName);
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_LOWERING_HPP__ */
diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
index e4aac08..f60d33f 100644
--- a/backend/src/ir/context.cpp
+++ b/backend/src/ir/context.cpp
@@ -24,6 +24,7 @@
 #include "ir/context.hpp"
 #include "ir/unit.hpp"
 #include "ir/lowering.hpp"
+#include "ir/constopt.hpp"
 
 namespace gbe {
 namespace ir {
@@ -82,6 +83,10 @@ namespace ir {
     fn->sortLabels();
     fn->computeCFG();
 
+    //TODO: do constant folding and propagation for GEN IR
+    //here as the first step, we just do constant folding for kernel struct args
+    foldFunctionStructArgConstOffset(unit, fn->getName());
+
     // Spill function argument to the stack if required and identify which
     // function arguments can use constant push
     lowerFunctionArguments(unit, fn->getName());
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index a9156ff..48590fd 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -833,6 +833,9 @@ namespace ir {
       INLINE Immediate getImmediate(const Function &fn) const {
         return fn.getImmediate(immediateIndex);
       }
+      INLINE void setImmediateIndex(ImmediateIndex immIndex) {
+        immediateIndex = immIndex;
+      }
       INLINE Type getType(void) const { return this->type; }
       bool wellFormed(const Function &fn, std::string &why) const;
       INLINE void out(std::ostream &out, const Function &fn) const;
@@ -2445,6 +2448,10 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))
     return reinterpret_cast<const internal::LoadImmInstruction*>(this)->getImmediate(fn);
   }
 
+  void LoadImmInstruction::setImmediateIndex(ImmediateIndex immIndex) {
+    reinterpret_cast<internal::LoadImmInstruction*>(this)->setImmediateIndex(immIndex);
+  }
+
   ///////////////////////////////////////////////////////////////////////////
   // Implements the emission functions
   ///////////////////////////////////////////////////////////////////////////
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 8685dd4..05c3e64 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -389,6 +389,7 @@ namespace ir {
   public:
     /*! Return the value stored in the instruction */
     Immediate getImmediate(void) const;
+    void setImmediateIndex(ImmediateIndex immIndex);
     /*! Return the type of the stored value */
     Type getType(void) const;
     /*! Return true if the given instruction is an instance of this class */
-- 
2.7.4



More information about the Beignet mailing list