[Beignet] [PATCH] GBE: optimize a special case of convert INT64 to float.

Yang, Rong R rong.r.yang at intel.com
Sun Jun 8 20:10:14 PDT 2014


This patch LGTM.

But I think it is too special, only for convert INT64 to float case. Is it worth to add common case to optimize I64 ops? The essence of this optimization is, the I64 ops, whose srcs and dsts all need care low 32 bits, than can convert to int ops. So in this patch, OP_AND only need one src is lowerI64Reg.

-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Thursday, June 05, 2014 4:31 PM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH] GBE: optimize a special case of convert INT64 to float.

We found the following instruction sequence is common in luxmark:
CVT.int64.uin32 %75 %74
LOADI.int64 %537 16777215
AND.int64 %76 %75 %537
CVT.float.uin64 %77 %76

Actually, the immediate value is a pure 32 bit value, and the %74 is also a uint32 bit value. The AND instruction will not touch the high 32 bit as well. So we can simply optimize the above instruction series to the follow:
AND.uint32 %tmp %74 16777215
MOV.float  %77 %tmp

This way, it will finally save about 55 instructions for each of the above case. This patch could bring about 8% performance gain with sala scene in luxmark.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 105 ++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 16 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 3530d2c..2d380e4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -103,6 +103,7 @@
 #include "sys/cvar.hpp"
 #include "sys/vector.hpp"
 #include <algorithm>
+#include <climits>
 
 namespace gbe
 {
@@ -1809,8 +1810,10 @@ namespace gbe
     }
     /*! Call the child method with the proper prototype */
     virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
-      if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn))) {
-        markAllChildren(dag);
+      bool markChildren = true;
+      if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn), markChildren)) {
+        if (markChildren)
+          markAllChildren(dag);
         return true;
       }
       return false;
@@ -1839,7 +1842,7 @@ namespace gbe
       return ir::TYPE_FLOAT;
     }
 
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const {
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::UnaryInstruction &insn, bool &markChildren) const {
       const ir::Opcode opcode = insn.getOpcode();
       const ir::Type insnType = insn.getType();
       const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode, insnType)); @@ -2590,7 +2593,7 @@ namespace gbe  #define DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FAMILY) \
   struct FAMILY##Pattern : public OneToManyPattern<FAMILY##Pattern, ir::FAMILY>\
   {\
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn) const {\
+    INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn, 
+ bool &markChildren) const {\
       NOT_IMPLEMENTED;\
       return false;\
     }\
@@ -2601,7 +2604,7 @@ namespace gbe
   /*! Load immediate pattern */
   DECL_PATTERN(LoadImmInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadImmInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::LoadImmInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const Type type = insn.getType(); @@ -2649,7 +2652,7 @@ namespace gbe
   /*! Sync instruction */
   DECL_PATTERN(SyncInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::SyncInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const ir::Register reg = sel.reg(FAMILY_DWORD); @@ -2821,7 +2824,7 @@ namespace gbe
       sel.INDIRECT_MOVE(dst, src);
     }
 
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::LoadInstruction &insn, bool &markChildren) const {
       using namespace ir;
       GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
       const AddressSpace space = insn.getAddressSpace(); @@ -2939,7 +2942,7 @@ namespace gbe
       }
     }
 
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::StoreInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const AddressSpace space = insn.getAddressSpace(); @@ -3045,7 +3048,7 @@ namespace gbe
   /*! Bit cast instruction pattern */
   DECL_PATTERN(BitCastInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::BitCastInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::BitCastInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const Type dstType = insn.getDstType(); @@ -3154,7 +3157,48 @@ namespace gbe
   /*! Convert instruction pattern */
   DECL_PATTERN(ConvertInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn) const
+
+    INLINE bool lowerI64Reg(Selection::Opaque &sel, SelectionDAG *dag, GenRegister &src, uint32_t type) const {
+      using namespace ir;
+      GBE_ASSERT(type == GEN_TYPE_UD || type == GEN_TYPE_F);
+      if (dag->insn.getOpcode() == OP_LOADI) {
+        const auto &immInsn = cast<LoadImmInstruction>(dag->insn);
+        const auto imm = immInsn.getImmediate();
+        const Type immType = immInsn.getType();
+        if (immType == TYPE_S64 &&
+          imm.data.s64 <= INT_MAX &&
+          imm.data.s64 >= INT_MIN) {
+          src = GenRegister::immd((int32_t)imm.data.s64);
+          return true;
+        } else if (immType == TYPE_U64 &&
+                   imm.data.u64 <= UINT_MAX) {
+          src = GenRegister::immud((uint32_t)imm.data.s64);
+          return true;
+        }
+      } else if (dag->insn.getOpcode() == OP_CVT) {
+        const auto cvtInsn = cast<ConvertInstruction>(dag->insn);
+        auto srcType = cvtInsn.getSrcType();
+        if (((srcType == TYPE_U32 || srcType == TYPE_S32) &&
+            (type == GEN_TYPE_UD || type == GEN_TYPE_D)) ||
+             ((srcType == TYPE_FLOAT) && type == GEN_TYPE_F)) {
+          src = GenRegister::retype(sel.selReg(cvtInsn.getSrc(0), srcType), type);
+          dag->isRoot = 1;
+          return true;
+        } else if (srcType == TYPE_FLOAT ||
+                   srcType == TYPE_U16 ||
+                   srcType == TYPE_S16 ||
+                   srcType == TYPE_U32 ||
+                   srcType == TYPE_S32) {
+          src = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32), type);
+          dag->isRoot = 1;
+          sel.MOV(src, sel.selReg(cvtInsn.getSrc(0), srcType));
+          return true;
+        }
+      }
+      return false;
+    }
+
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::ConvertInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const Type dstType = insn.getDstType(); @@ -3225,6 +3269,35 @@ namespace gbe
       } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
         sel.CONVI64_TO_I(dst, src);
       } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
+        auto dag = sel.regDAG[src.reg()];
+        SelectionDAG *dag0, *dag1;
+        if (dag->child[0]->insn.getOpcode() == OP_LOADI) {
+          dag0 = dag->child[1];
+          dag1 = dag->child[0];
+        } else {
+          dag0 = dag->child[0];
+          dag1 = dag->child[1];
+        }
+        GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI &&
+                     dag->child[1]->insn.getOpcode() == OP_LOADI));
+        if (dag->insn.getOpcode() == OP_AND ||
+            dag->insn.getOpcode() == OP_OR  ||
+            dag->insn.getOpcode() == OP_XOR) {
+          GenRegister src0;
+          GenRegister src1;
+          if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) &&
+              lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) {
+            switch (dag->insn.getOpcode()) {
+              default:
+              case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
+              case OP_OR:  sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
+              case OP_XOR: sel.XOR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
+            }
+            sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD));
+            markChildren = false;
+            return true;
+          }
+        }
         GenRegister tmp[6];
         for(int i=0; i<6; i++) {
           tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); @@ -3269,7 +3342,7 @@ namespace gbe
   /*! Convert instruction pattern */
   DECL_PATTERN(AtomicInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::AtomicInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::AtomicInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const AtomicOps atomicOp = insn.getAtomicOpcode(); @@ -3346,7 +3419,7 @@ namespace gbe
 
   DECL_PATTERN(TernaryInstruction)
    {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const {
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::TernaryInstruction &insn, bool &markChildren) const {
       using namespace ir;
       const Type type = insn.getType();
       const GenRegister dst = sel.selReg(insn.getDst(0), type), @@ -3386,7 +3459,7 @@ namespace gbe
   /*! Label instruction pattern */
   DECL_PATTERN(LabelInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::LabelInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::LabelInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const LabelIndex label = insn.getLabelIndex(); @@ -3477,7 +3550,7 @@ namespace gbe
 
   DECL_PATTERN(SampleInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::SampleInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::SampleInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       GenRegister msgPayloads[4];
@@ -3522,7 +3595,7 @@ namespace gbe
   /*! Typed write instruction pattern. */
   DECL_PATTERN(TypedWriteInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::TypedWriteInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       const uint32_t simdWidth = sel.ctx.getSimdWidth(); @@ -3605,7 +3678,7 @@ namespace gbe
   /*! get image info instruction pattern. */
   DECL_PATTERN(GetImageInfoInstruction)
   {
-    INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const
+    INLINE bool emitOne(Selection::Opaque &sel, const 
+ ir::GetImageInfoInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
       GenRegister dst;
--
1.8.3.2

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list