[Beignet] [PATCH 23/27] Overlaod I64 Div and Rem function.

junyan.he at inbox.com junyan.he at inbox.com
Tue Jan 6 02:02:46 PST 2015


From: Junyan He <junyan.he at linux.intel.com>

Because the math shared function does not support 64bits
div and rem, we can just unpack the I64 and use old
function to handle it.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen8_context.cpp       | 31 +++++++++++++++++++++
 backend/src/backend/gen8_context.hpp       |  2 ++
 backend/src/backend/gen_context.hpp        |  2 +-
 backend/src/backend/gen_insn_selection.cpp | 43 ++++++++++++++++++++++--------
 4 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 8960d5b..daa4182 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -531,6 +531,37 @@ namespace gbe
     p->ADD(dst, dst, tmp_dst);
   }
 
+  void Gen8Context::emitI64DIVREMInstruction(const SelectionInstruction &cnst_insn)
+  {
+    SelectionInstruction* insn = const_cast<SelectionInstruction*>(&cnst_insn);
+    GenRegister packed_src0 = ra->genReg(insn->src(0));
+    GenRegister packed_src1 = ra->genReg(insn->src(1));
+    GenRegister dst = ra->genReg(insn->dst(0));
+    int tmp_reg_n = 14;
+
+    if (packed_src0.hstride != GEN_HORIZONTAL_STRIDE_0) {
+      GenRegister unpacked_src0 = ra->genReg(insn->dst(tmp_reg_n));
+      unpackLongVec(packed_src0, unpacked_src0, p->curr.execWidth);
+      tmp_reg_n++;
+      insn->src(0) = unpacked_src0;
+    }
+    if (packed_src1.hstride != GEN_HORIZONTAL_STRIDE_0) {
+      GenRegister unpacked_src1 = ra->genReg(insn->dst(tmp_reg_n));
+      unpackLongVec(packed_src1, unpacked_src1, p->curr.execWidth);
+      tmp_reg_n++;
+      insn->src(1) = unpacked_src1;
+    }
+    GBE_ASSERT(tmp_reg_n <= insn->dstNum);
+
+    GenContext::emitI64DIVREMInstruction(*insn);
+
+    if (dst.hstride != GEN_HORIZONTAL_STRIDE_0) {
+      GenRegister dst_packed = ra->genReg(insn->dst(14));
+      packLongVec(dst, dst_packed, p->curr.execWidth);
+      p->MOV(dst, dst_packed);
+    }
+  }
+
   void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
   {
     GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 9f1d749..bea78b6 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -60,6 +60,8 @@ namespace gbe
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
     virtual void emitI64MULInstruction(const SelectionInstruction &insn);
+    virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+
   protected:
     virtual GenEncoder* generateEncoder(void) {
       return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index a366e7f..3d01f2b 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -164,7 +164,7 @@ namespace gbe
     void emitUnSpillRegInstruction(const SelectionInstruction &insn);
     void emitGetImageInfoInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULInstruction(const SelectionInstruction &insn);
-    void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+    virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
     void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
     void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
 
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 21dfdb6..36d0c73 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -617,9 +617,9 @@ namespace gbe
     /*! Multiply 64-bit integers */
     void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long);
     /*! 64-bit integer division */
-    void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+    void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
     /*! 64-bit integer remainder of division */
-    void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+    void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
     /* common functions for both binary instruction and sel_cmp and compare instruction.
        It will handle the IMM or normal register assignment, and will try to avoid LOADI
        as much as possible. */
@@ -1373,21 +1373,21 @@ namespace gbe
     }
   }
 
-  void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2);
+  void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, tmp_num + 1, 2);
     insn->dst(0) = dst;
     insn->src(0) = src0;
     insn->src(1) = src1;
-    for(int i = 0; i < 13; i++)
+    for(int i = 0; i < tmp_num; i++)
       insn->dst(i + 1) = tmp[i];
   }
 
-  void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2);
+  void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, tmp_num + 1, 2);
     insn->dst(0) = dst;
     insn->src(0) = src0;
     insn->src(1) = src1;
-    for(int i = 0; i < 13; i++)
+    for(int i = 0; i < tmp_num; i++)
       insn->dst(i + 1) = tmp[i];
   }
 
@@ -2193,18 +2193,39 @@ namespace gbe
         GBE_ASSERT(op != OP_REM);
         sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
       } else if (type == TYPE_S64 || type == TYPE_U64) {
-        GenRegister tmp[13];
+        GenRegister tmp[15];
+        int tmp_num = 13;
         for(int i=0; i < 13; i++) {
           tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
           tmp[i].type = GEN_TYPE_UD;
         }
+
+        if (sel.hasLongType()) {
+          if (!sel.isScalarReg(insn.getSrc(0))) {
+            tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src0.type);
+            tmp_num++;
+          }
+
+          if (!sel.isScalarReg(insn.getSrc(1))) {
+            tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src1.type);
+            tmp_num++;
+          }
+
+          /* We at least one tmp register to convert if dst is not scalar. */
+          if (!sel.isScalarReg(insn.getDst(0)) && sel.isScalarReg(insn.getSrc(0))
+              && sel.isScalarReg(insn.getSrc(1))) {
+            GBE_ASSERT(tmp_num == 13);
+            tmp[tmp_num] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+            tmp_num++;
+          }
+        }
         sel.push();
           sel.curr.flag = 0;
           sel.curr.subFlag = 1;
           if(op == OP_DIV)
-            sel.I64DIV(dst, src0, src1, tmp);
+            sel.I64DIV(dst, src0, src1, tmp, tmp_num);
           else
-            sel.I64REM(dst, src0, src1, tmp);
+            sel.I64REM(dst, src0, src1, tmp, tmp_num);
         sel.pop();
       }
       markAllChildren(dag);
-- 
1.9.1



More information about the Beignet mailing list