[Beignet] [PATCH] GBE: optimize a special case of convert INT64 to float.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Jun 8 19:59:16 PDT 2014


Right, the common analysis will cover more cases. Considering the most
important cases have been covered by this special patch, we can do that
common analysis in next development cycle. One major missed case maybe
two Int64's comparision. Let's do it in the future. I will push this
patch latter. Thanks for the review comments.

On Mon, Jun 09, 2014 at 03:10:14AM +0000, Yang, Rong R wrote:
> This patch LGTM.
> 
> But I think it is too special, only for convert INT64 to float case. Is it worth to add common case to optimize I64 ops? The essence of this optimization is, the I64 ops, whose srcs and dsts all need care low 32 bits, than can convert to int ops. So in this patch, OP_AND only need one src is lowerI64Reg.
> 
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
> Sent: Thursday, June 05, 2014 4:31 PM
> To: beignet at lists.freedesktop.org
> Cc: Gong, Zhigang
> Subject: [Beignet] [PATCH] GBE: optimize a special case of convert INT64 to float.
> 
> We found the following instruction sequence is common in luxmark:
> CVT.int64.uin32 %75 %74
> LOADI.int64 %537 16777215
> AND.int64 %76 %75 %537
> CVT.float.uin64 %77 %76
> 
> Actually, the immediate value is a pure 32 bit value, and the %74 is also a uint32 bit value. The AND instruction will not touch the high 32 bit as well. So we can simply optimize the above instruction series to the follow:
> AND.uint32 %tmp %74 16777215
> MOV.float  %77 %tmp
> 
> This way, it will finally save about 55 instructions for each of the above case. This patch could bring about 8% performance gain with sala scene in luxmark.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp | 105 ++++++++++++++++++++++++-----
>  1 file changed, 89 insertions(+), 16 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 3530d2c..2d380e4 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -103,6 +103,7 @@
>  #include "sys/cvar.hpp"
>  #include "sys/vector.hpp"
>  #include <algorithm>
> +#include <climits>
>  
>  namespace gbe
>  {
> @@ -1809,8 +1810,10 @@ namespace gbe
>      }
>      /*! Call the child method with the proper prototype */
>      virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
> -      if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn))) {
> -        markAllChildren(dag);
> +      bool markChildren = true;
> +      if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn), markChildren)) {
> +        if (markChildren)
> +          markAllChildren(dag);
>          return true;
>        }
>        return false;
> @@ -1839,7 +1842,7 @@ namespace gbe
>        return ir::TYPE_FLOAT;
>      }
>  
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const {
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::UnaryInstruction &insn, bool &markChildren) const {
>        const ir::Opcode opcode = insn.getOpcode();
>        const ir::Type insnType = insn.getType();
>        const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode, insnType)); @@ -2590,7 +2593,7 @@ namespace gbe  #define DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FAMILY) \
>    struct FAMILY##Pattern : public OneToManyPattern<FAMILY##Pattern, ir::FAMILY>\
>    {\
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn) const {\
> +    INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn, 
> + bool &markChildren) const {\
>        NOT_IMPLEMENTED;\
>        return false;\
>      }\
> @@ -2601,7 +2604,7 @@ namespace gbe
>    /*! Load immediate pattern */
>    DECL_PATTERN(LoadImmInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadImmInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::LoadImmInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const Type type = insn.getType(); @@ -2649,7 +2652,7 @@ namespace gbe
>    /*! Sync instruction */
>    DECL_PATTERN(SyncInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::SyncInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const ir::Register reg = sel.reg(FAMILY_DWORD); @@ -2821,7 +2824,7 @@ namespace gbe
>        sel.INDIRECT_MOVE(dst, src);
>      }
>  
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::LoadInstruction &insn, bool &markChildren) const {
>        using namespace ir;
>        GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
>        const AddressSpace space = insn.getAddressSpace(); @@ -2939,7 +2942,7 @@ namespace gbe
>        }
>      }
>  
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::StoreInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const AddressSpace space = insn.getAddressSpace(); @@ -3045,7 +3048,7 @@ namespace gbe
>    /*! Bit cast instruction pattern */
>    DECL_PATTERN(BitCastInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::BitCastInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::BitCastInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const Type dstType = insn.getDstType(); @@ -3154,7 +3157,48 @@ namespace gbe
>    /*! Convert instruction pattern */
>    DECL_PATTERN(ConvertInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn) const
> +
> +    INLINE bool lowerI64Reg(Selection::Opaque &sel, SelectionDAG *dag, GenRegister &src, uint32_t type) const {
> +      using namespace ir;
> +      GBE_ASSERT(type == GEN_TYPE_UD || type == GEN_TYPE_F);
> +      if (dag->insn.getOpcode() == OP_LOADI) {
> +        const auto &immInsn = cast<LoadImmInstruction>(dag->insn);
> +        const auto imm = immInsn.getImmediate();
> +        const Type immType = immInsn.getType();
> +        if (immType == TYPE_S64 &&
> +          imm.data.s64 <= INT_MAX &&
> +          imm.data.s64 >= INT_MIN) {
> +          src = GenRegister::immd((int32_t)imm.data.s64);
> +          return true;
> +        } else if (immType == TYPE_U64 &&
> +                   imm.data.u64 <= UINT_MAX) {
> +          src = GenRegister::immud((uint32_t)imm.data.s64);
> +          return true;
> +        }
> +      } else if (dag->insn.getOpcode() == OP_CVT) {
> +        const auto cvtInsn = cast<ConvertInstruction>(dag->insn);
> +        auto srcType = cvtInsn.getSrcType();
> +        if (((srcType == TYPE_U32 || srcType == TYPE_S32) &&
> +            (type == GEN_TYPE_UD || type == GEN_TYPE_D)) ||
> +             ((srcType == TYPE_FLOAT) && type == GEN_TYPE_F)) {
> +          src = GenRegister::retype(sel.selReg(cvtInsn.getSrc(0), srcType), type);
> +          dag->isRoot = 1;
> +          return true;
> +        } else if (srcType == TYPE_FLOAT ||
> +                   srcType == TYPE_U16 ||
> +                   srcType == TYPE_S16 ||
> +                   srcType == TYPE_U32 ||
> +                   srcType == TYPE_S32) {
> +          src = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32), type);
> +          dag->isRoot = 1;
> +          sel.MOV(src, sel.selReg(cvtInsn.getSrc(0), srcType));
> +          return true;
> +        }
> +      }
> +      return false;
> +    }
> +
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::ConvertInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const Type dstType = insn.getDstType(); @@ -3225,6 +3269,35 @@ namespace gbe
>        } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
>          sel.CONVI64_TO_I(dst, src);
>        } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
> +        auto dag = sel.regDAG[src.reg()];
> +        SelectionDAG *dag0, *dag1;
> +        if (dag->child[0]->insn.getOpcode() == OP_LOADI) {
> +          dag0 = dag->child[1];
> +          dag1 = dag->child[0];
> +        } else {
> +          dag0 = dag->child[0];
> +          dag1 = dag->child[1];
> +        }
> +        GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI &&
> +                     dag->child[1]->insn.getOpcode() == OP_LOADI));
> +        if (dag->insn.getOpcode() == OP_AND ||
> +            dag->insn.getOpcode() == OP_OR  ||
> +            dag->insn.getOpcode() == OP_XOR) {
> +          GenRegister src0;
> +          GenRegister src1;
> +          if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) &&
> +              lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) {
> +            switch (dag->insn.getOpcode()) {
> +              default:
> +              case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> +              case OP_OR:  sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> +              case OP_XOR: sel.XOR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> +            }
> +            sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD));
> +            markChildren = false;
> +            return true;
> +          }
> +        }
>          GenRegister tmp[6];
>          for(int i=0; i<6; i++) {
>            tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); @@ -3269,7 +3342,7 @@ namespace gbe
>    /*! Convert instruction pattern */
>    DECL_PATTERN(AtomicInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::AtomicInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::AtomicInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const AtomicOps atomicOp = insn.getAtomicOpcode(); @@ -3346,7 +3419,7 @@ namespace gbe
>  
>    DECL_PATTERN(TernaryInstruction)
>     {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const {
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::TernaryInstruction &insn, bool &markChildren) const {
>        using namespace ir;
>        const Type type = insn.getType();
>        const GenRegister dst = sel.selReg(insn.getDst(0), type), @@ -3386,7 +3459,7 @@ namespace gbe
>    /*! Label instruction pattern */
>    DECL_PATTERN(LabelInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::LabelInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::LabelInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const LabelIndex label = insn.getLabelIndex(); @@ -3477,7 +3550,7 @@ namespace gbe
>  
>    DECL_PATTERN(SampleInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::SampleInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::SampleInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        GenRegister msgPayloads[4];
> @@ -3522,7 +3595,7 @@ namespace gbe
>    /*! Typed write instruction pattern. */
>    DECL_PATTERN(TypedWriteInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::TypedWriteInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        const uint32_t simdWidth = sel.ctx.getSimdWidth(); @@ -3605,7 +3678,7 @@ namespace gbe
>    /*! get image info instruction pattern. */
>    DECL_PATTERN(GetImageInfoInstruction)
>    {
> -    INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const
> +    INLINE bool emitOne(Selection::Opaque &sel, const 
> + ir::GetImageInfoInstruction &insn, bool &markChildren) const
>      {
>        using namespace ir;
>        GenRegister dst;
> --
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list