[Beignet] [PATCH 18/27] Overload I64MUL function.

junyan.he at inbox.com junyan.he at inbox.com
Tue Jan 6 02:02:07 PST 2015


From: Junyan He <junyan.he at linux.intel.com>

BDW supports 32 by 32 instruction, so we can refine
the MUL instruction of long by using it.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen8_context.cpp       | 31 +++++++++++++++++++++++++++
 backend/src/backend/gen8_context.hpp       |  1 +
 backend/src/backend/gen_context.hpp        |  2 +-
 backend/src/backend/gen_insn_selection.cpp | 34 ++++++++++++++++++++++--------
 4 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 18a3425..85b72a5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -102,6 +102,37 @@ namespace gbe
     }
   }
 
+  void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn)
+  {
+    GenRegister src0 = ra->genReg(insn.src(0));
+    GenRegister src1 = ra->genReg(insn.src(1));
+    GenRegister dst = ra->genReg(insn.dst(0));
+    GenRegister res = ra->genReg(insn.dst(1));
+
+    src0.type = src1.type = GEN_TYPE_UD;
+    dst.type = GEN_TYPE_UL;
+    res.type = GEN_TYPE_UL;
+
+    /* Low 32 bits X low 32 bits. */
+    GenRegister s0l = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(src0, GEN_TYPE_UD) : GenRegister::unpacked_ud(src0.nr, src0.subnr);
+    GenRegister s1l = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(src1, GEN_TYPE_UD)  : GenRegister::unpacked_ud(src1.nr, src1.subnr);
+    p->MUL(dst, s0l, s1l);
+
+    /* Low 32 bits X high 32 bits. */
+    GenRegister s1h = GenRegister::offset(s1l, 0, 4);
+    p->MUL(res, s0l, s1h);
+    p->SHL(res, res, GenRegister::immud(32));
+    p->ADD(dst, dst, res);
+
+    /* High 32 bits X low 32 bits. */
+    GenRegister s0h = GenRegister::offset(s0l, 0, 4);
+    p->MUL(res, s0h, s1l);
+    p->SHL(res, res, GenRegister::immud(32));
+    p->ADD(dst, dst, res);
+  }
+
   void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
   {
     GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 54cc29d..7e6d3b3 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -54,6 +54,7 @@ namespace gbe
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
+    virtual void emitI64MULInstruction(const SelectionInstruction &insn);
   protected:
     virtual GenEncoder* generateEncoder(void) {
       return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 3593d66..2c97092 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -163,7 +163,7 @@ namespace gbe
     void emitSpillRegInstruction(const SelectionInstruction &insn);
     void emitUnSpillRegInstruction(const SelectionInstruction &insn);
     void emitGetImageInfoInstruction(const SelectionInstruction &insn);
-    void emitI64MULInstruction(const SelectionInstruction &insn);
+    virtual void emitI64MULInstruction(const SelectionInstruction &insn);
     void emitI64DIVREMInstruction(const SelectionInstruction &insn);
     void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
     void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 60f45f7..b63252a 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -615,7 +615,7 @@ namespace gbe
     /*! Get image information */
     void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti);
     /*! Multiply 64-bit integers */
-    void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
+    void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long);
     /*! 64-bit integer division */
     void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
     /*! 64-bit integer remainder of division */
@@ -1354,13 +1354,23 @@ namespace gbe
     insn->extra.function = function;
   }
 
-  void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2);
+  void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long) {
+    SelectionInstruction *insn = NULL;
+    if (native_long)
+      insn = this->appendInsn(SEL_OP_I64MUL, 2, 2);
+    else
+      insn = this->appendInsn(SEL_OP_I64MUL, 7, 2);
+
     insn->dst(0) = dst;
     insn->src(0) = src0;
     insn->src(1) = src1;
-    for(int i = 0; i < 6; i++)
-      insn->dst(i + 1) = tmp[i];
+
+    if (native_long) {
+      insn->dst(1) = tmp[0];
+    } else {
+      for (int i = 0; i < 6; i++)
+        insn->dst(i + 1) = tmp[i];
+    }
   }
 
   void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
@@ -2387,10 +2397,16 @@ namespace gbe
             sel.pop();
             return false;
           } else if (type == TYPE_S64 || type == TYPE_U64) {
-            GenRegister tmp[6];
-            for(int i = 0; i < 6; i++)
-              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
-            sel.I64MUL(dst, src0, src1, tmp);
+            if (sel.hasLongType()) {
+              GenRegister tmp;
+              tmp = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+              sel.I64MUL(dst, src0, src1, &tmp, true);
+            } else {
+              GenRegister tmp[6];
+              for(int i = 0; i < 6; i++)
+                tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+              sel.I64MUL(dst, src0, src1, tmp, false);
+            }
           } else
             sel.MUL(dst, src0, src1);
           break;
-- 
1.9.1



More information about the Beignet mailing list