[Beignet] [PATCH] do constant folding for kernel struct args

Tue Jun 13 07:53:35 UTC 2017

Has you consider the value from two arguments case. For example:

Struct  s1{
    int i,
   float4 f4;
}

Struct  s2{
    int i;
    short s;
   float4 f4;
}

__kernel void k(s1, s2, __global float *dst)
{
    int gid = get_global_id(0);
    float4 *p;
   if (gid % 2) {
      p = &s1.f4;
   } else {
      P = &s2.f4;
   }
    dst[gid] = *p.s1;
}

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Thursday, June 8, 2017 21:08
> To: Wang, Rander <rander.wang at intel.com>; Pan, Xiuli
> <xiuli.pan at intel.com>; beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] do constant folding for kernel struct args
> 
> Yes, the constant folding for kernel struct arg is a must here.
> 
> As for the general constant folding and propagation optimization, I do not
> have a position that sel ir or gen ir is better.
> 
> -----Original Message-----
> From: Wang, Rander
> Sent: Thursday, June 08, 2017 1:14 PM
> To: Pan, Xiuli; Guo, Yejun; beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: RE: [Beignet] [PATCH] do constant folding for kernel struct args
> 
> Yes, so I may be able to give some advice
> 
> -----Original Message-----
> From: Pan, Xiuli
> Sent: Thursday, June 8, 2017 1:09 PM
> To: Guo, Yejun <yejun.guo at intel.com>; beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>; Wang, Rander
> <rander.wang at intel.com>
> Subject: RE: [Beignet] [PATCH] do constant folding for kernel struct args
> 
> Rander seems to have a similar optimization about imm value at sel ir.
> If your case here need the optimization done in GEN IR level then  rander's
> patch may no longer be needed.
> 
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Thursday, June 8, 2017 12:41
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>
> Subject: [Beignet] [PATCH] do constant folding for kernel struct args
> 
> for the following GEN IR, %41 is kernel argument (struct) the first LOAD will
> be mov, and the second LOAD will be indirect move (see
> lowerFunctionArguments). It hurts performance, and even impacts the
> correctness of reg liveness of indriect mov
> 
> LOADI.uint64 %1114 72
> ADD.int64 %78 %41 %1114
> LOAD.int64.private.aligned {%79} %78 bti:255
> LOADI.int64 %1115 8
> ADD.int64 %1116 %78 %1115
> LOAD.int64.private.aligned {%80} %1116 bti:255
> 
> this function folds the constants of 72 and 8 together, and so it will be direct
> mov.
> the GEN IR looks like:
> LOADI.int64 %1115 80
> ADD.int64 %1116 %41 %1115
> ---
>  backend/src/CMakeLists.txt     |   2 +
>  backend/src/ir/constopt.cpp    | 144
> +++++++++++++++++++++++++++++++++++++++++
>  backend/src/ir/constopt.hpp    |  54 ++++++++++++++++
>  backend/src/ir/context.cpp     |   5 ++
>  backend/src/ir/instruction.cpp |   7 ++
>  backend/src/ir/instruction.hpp |   1 +
>  6 files changed, 213 insertions(+)
>  create mode 100644 backend/src/ir/constopt.cpp  create mode 100644
> backend/src/ir/constopt.hpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> c9ff833..74d7bab 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -73,6 +73,8 @@ set (GBE_SRC
>      ir/value.hpp
>      ir/lowering.cpp
>      ir/lowering.hpp
> +    ir/constopt.cpp
> +    ir/constopt.hpp
>      ir/profiling.cpp
>      ir/profiling.hpp
>      ir/printf.cpp
> diff --git a/backend/src/ir/constopt.cpp b/backend/src/ir/constopt.cpp new
> file mode 100644 index 0000000..24878b8
> --- /dev/null
> +++ b/backend/src/ir/constopt.cpp
> @@ -0,0 +1,144 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + * Author: Guo Yejun <yejun.guo at intel.com>  */
> +
> +#include <assert.h>
> +#include "ir/context.hpp"
> +#include "ir/value.hpp"
> +#include "ir/constopt.hpp"
> +#include "sys/set.hpp"
> +
> +namespace gbe {
> +namespace ir {
> +
> +  class FunctionStructArgConstOffsetFolder : public Context  {
> +  public:
> +    /*! Build the helper structure */
> +    FunctionStructArgConstOffsetFolder(Unit &unit) : Context(unit) {
> +      records.clear();
> +      loadImms.clear();
> +    }
> +    /*! Free everything we needed */
> +    virtual ~FunctionStructArgConstOffsetFolder() {
> +      for (size_t i = 0; i < records.size(); ++i) {
> +        delete records[i];
> +      }
> +      records.clear();
> +      loadImms.clear();
> +    }
> +    /*! Perform all function arguments substitution if needed */
> +    void folding(const std::string &name);
> +
> +  private:
> +    class Record {  //add dst, arg (kernel struct arg base reg), imm_value
> +    public:
> +      Record(Register dst, Register arg, int64_t immv) :
> +                                        _dst(dst), _arg(arg), _immv(immv) { }
> +      Register _dst;
> +      Register _arg;
> +      int64_t _immv;
> +    };
> +    std::vector<Record*> records;
> +    std::map<Register, LoadImmInstruction*> loadImms; //<ir reg, load
> + reg imm>
> +
> +    void AddRecord(Register dst, Register arg, int64_t immv) {
> +      Record* rec = new Record(dst, arg, immv);
> +      records.push_back(rec);
> +    }
> +  };
> +
> +  void FunctionStructArgConstOffsetFolder::folding(const std::string &name)
> {
> +    Function *fn = unit.getFunction(name);
> +    if (fn == NULL)
> +      return;
> +
> +    const uint32_t argNum = fn->argNum();
> +    for (uint32_t argID = 0; argID < argNum; ++argID) {
> +      FunctionArgument &arg = fn->getArg(argID);
> +      if (arg.type != FunctionArgument::STRUCTURE)
> +        continue;
> +      AddRecord(arg.reg, arg.reg, 0);
> +    }
> +
> +    fn->foreachInstruction([&](Instruction &insn) {
> +      if (insn.getOpcode() == OP_LOADI) {
> +        LoadImmInstruction *loadImm = cast<LoadImmInstruction>(&insn);
> +        if(!loadImm)
> +          return;
> +
> +        //to avoid regression, limit for the case: LOADI.int64 %1164 32
> +        //we can loose the limit if necessary
> +        if (loadImm->getImmediate().getType() != TYPE_S64 &&
> +            loadImm->getImmediate().getType() != TYPE_U64)
> +          return;
> +
> +        Register dst = insn.getDst();
> +        loadImms[dst] = loadImm;
> +        return;
> +      }
> +
> +      //we will change imm of loadi directly, so it should not be dst
> +      for (size_t i = 0; i < insn.getDstNum(); ++i) {
> +        Register dst = insn.getDst(i);
> +        assert(loadImms.find(dst) == loadImms.end());
> +      }
> +
> +      if (insn.getOpcode() != OP_ADD)
> +        return;
> +
> +      Register src0 = insn.getSrc(0);
> +      Register src1 = insn.getSrc(1);
> +      Register dst = insn.getDst();
> +
> +      //check if src0 is derived from kernel struct arg
> +      std::vector<Record*>::iterator it =
> +            std::find_if(records.begin(), records.end(), [=](Record* rec){
> +                                                            return rec->_dst == src0;
> +                                                            } );
> +      if (it == records.end())
> +        return;
> +
> +      //check if src1 is imm value
> +      if (loadImms.find(src1) == loadImms.end())
> +        return;
> +
> +      Record* rec = *it;
> +      LoadImmInstruction *loadImm = loadImms[src1];
> +      Immediate imm = loadImm->getImmediate();
> +      int64_t newvalue = imm.getIntegerValue() + rec->_immv;
> +
> +      if (rec->_dst != rec->_arg) {  //directly dervied from arg if they are equal
> +        //change src0 to be the kernel struct arg
> +        insn.setSrc(0, rec->_arg);
> +
> +        //change the value of src1
> +        ImmediateIndex immIndex = fn->newImmediate(newvalue);
> +        loadImm->setImmediateIndex(immIndex);
> +      }
> +      AddRecord(dst, rec->_arg, newvalue);
> +    });
> +  }
> +
> +  void foldFunctionStructArgConstOffset(Unit &unit, const std::string
> &functionName) {
> +    FunctionStructArgConstOffsetFolder folder(unit);
> +    folder.folding(functionName);
> +  }
> +
> +} /* namespace ir */
> +}
> diff --git a/backend/src/ir/constopt.hpp b/backend/src/ir/constopt.hpp new
> file mode 100644 index 0000000..f272637
> --- /dev/null
> +++ b/backend/src/ir/constopt.hpp
> @@ -0,0 +1,54 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + * Author: Guo Yejun <yejun.guo at intel.com>  */
> +
> +#ifndef __GBE_IR_CONSTOPT_HPP__
> +#define __GBE_IR_CONSTOPT_HPP__
> +
> +namespace gbe {
> +namespace ir {
> +
> +  // Structure to update
> +  class Unit;
> +
> +  // TODO
> +  void foldConstant(Unit &unit, const std::string &functionName);  void
> + propagateConstant(Unit &unit, const std::string &functionName);
> +
> +  // for the following GEN IR, %41 is kernel argument (struct)
> +  // the first LOAD will be mov, and the second LOAD will be indirect
> +move
> +  // (see lowerFunctionArguments). It hurts performance,
> +  // and even impacts the correctness of reg liveness of indriect mov
> +  //
> +  // LOADI.uint64 %1114 72
> +  // ADD.int64 %78 %41 %1114
> +  // LOAD.int64.private.aligned {%79} %78 bti:255
> +  // LOADI.int64 %1115 8
> +  // ADD.int64 %1116 %78 %1115
> +  // LOAD.int64.private.aligned {%80} %1116 bti:255
> +  //
> +  // this function folds the constants of 72 and 8 together,
> +  // and so it will be direct mov.
> +  // the GEN IR looks like:
> +  // LOADI.int64 %1115 80
> +  // ADD.int64 %1116 %41 %1115
> +  void foldFunctionStructArgConstOffset(Unit &unit, const std::string
> +&functionName); } /* namespace ir */ } /* namespace gbe */
> +
> +#endif /* __GBE_IR_LOWERING_HPP__ */
> diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp index
> e4aac08..f60d33f 100644
> --- a/backend/src/ir/context.cpp
> +++ b/backend/src/ir/context.cpp
> @@ -24,6 +24,7 @@
>  #include "ir/context.hpp"
>  #include "ir/unit.hpp"
>  #include "ir/lowering.hpp"
> +#include "ir/constopt.hpp"
> 
>  namespace gbe {
>  namespace ir {
> @@ -82,6 +83,10 @@ namespace ir {
>      fn->sortLabels();
>      fn->computeCFG();
> 
> +    //TODO: do constant folding and propagation for GEN IR
> +    //here as the first step, we just do constant folding for kernel struct args
> +    foldFunctionStructArgConstOffset(unit, fn->getName());
> +
>      // Spill function argument to the stack if required and identify which
>      // function arguments can use constant push
>      lowerFunctionArguments(unit, fn->getName()); diff --git
> a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index
> a9156ff..48590fd 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -833,6 +833,9 @@ namespace ir {
>        INLINE Immediate getImmediate(const Function &fn) const {
>          return fn.getImmediate(immediateIndex);
>        }
> +      INLINE void setImmediateIndex(ImmediateIndex immIndex) {
> +        immediateIndex = immIndex;
> +      }
>        INLINE Type getType(void) const { return this->type; }
>        bool wellFormed(const Function &fn, std::string &why) const;
>        INLINE void out(std::ostream &out, const Function &fn) const;
> @@ -2445,6 +2448,10 @@ DECL_MEM_FN(MemInstruction, void,
> setBtiReg(Register reg), setBtiReg(reg))
>      return reinterpret_cast<const internal::LoadImmInstruction*>(this)-
> >getImmediate(fn);
>    }
> 
> +  void LoadImmInstruction::setImmediateIndex(ImmediateIndex immIndex)
> {
> +
> + reinterpret_cast<internal::LoadImmInstruction*>(this)->setImmediateInd
> + ex(immIndex);
> +  }
> +
>    ///////////////////////////////////////////////////////////////////////////
>    // Implements the emission functions
>    ///////////////////////////////////////////////////////////////////////////
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 8685dd4..05c3e64 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -389,6 +389,7 @@ namespace ir {
>    public:
>      /*! Return the value stored in the instruction */
>      Immediate getImmediate(void) const;
> +    void setImmediateIndex(ImmediateIndex immIndex);
>      /*! Return the type of the stored value */
>      Type getType(void) const;
>      /*! Return true if the given instruction is an instance of this class */
> --
> 2.7.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet