[Beignet] [PATCH 22/27] Overload the I64MADSAT function.

junyan.he at inbox.com junyan.he at inbox.com
Tue Jan 6 02:02:39 PST 2015


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen8_context.cpp       | 148 +++++++++++++++++++++++++++++
 backend/src/backend/gen8_context.hpp       |   2 +
 backend/src/backend/gen_context.hpp        |   2 +-
 backend/src/backend/gen_insn_selection.cpp |  40 +++++---
 4 files changed, 176 insertions(+), 16 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index a816780..8960d5b 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -218,6 +218,154 @@ namespace gbe
     }
   }
 
+  void Gen8Context::emitI64MADSATInstruction(const SelectionInstruction &insn)
+  {
+    GenRegister src0 = ra->genReg(insn.src(0));
+    GenRegister src1 = ra->genReg(insn.src(1));
+    GenRegister src2 = ra->genReg(insn.src(2));
+    GenRegister dst_l = ra->genReg(insn.dst(0));
+    GenRegister dst_h = ra->genReg(insn.dst(1));
+    GenRegister s0_abs = ra->genReg(insn.dst(2));
+    GenRegister s1_abs = ra->genReg(insn.dst(3));
+    GenRegister tmp0 = ra->genReg(insn.dst(4));
+    GenRegister tmp1 = ra->genReg(insn.dst(5));
+    GenRegister sign = ra->genReg(insn.dst(6));
+    GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
+
+    if (src0.type == GEN_TYPE_UL) {
+      /* Always should be the same long type. */
+      GBE_ASSERT(src1.type == GEN_TYPE_UL);
+      GBE_ASSERT(src2.type == GEN_TYPE_UL);
+      dst_l.type = dst_h.type = GEN_TYPE_UL;
+      tmp0.type = tmp1.type = GEN_TYPE_UL;
+      calculateFullU64MUL(p, src0, src1, dst_h, dst_l, tmp0, tmp1);
+
+      /* Inplement the logic:
+      dst_l += src2;
+      if (dst_h)
+        dst_l = 0xFFFFFFFFFFFFFFFFULL;
+      if (dst_l < src2)  // carry if overflow
+        dst_l = 0xFFFFFFFFFFFFFFFFULL;
+      */
+      p->ADD(dst_l, dst_l, src2);
+
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_NZ, dst_h, GenRegister::immud(0), tmp0);
+      p->curr.noMask = 0;
+      p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF));
+      p->pop();
+
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_L, dst_l, src2, tmp0);
+      p->curr.noMask = 0;
+      p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF));
+      p->pop();
+    } else {
+      GBE_ASSERT(src0.type == GEN_TYPE_L);
+      GBE_ASSERT(src1.type == GEN_TYPE_L);
+      GBE_ASSERT(src2.type == GEN_TYPE_L);
+
+      calculateFullS64MUL(p, src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
+                          tmp1, sign, flagReg);
+
+      GenRegister sum = sign;
+      sum.type = GEN_TYPE_UL;
+      src2.type = GEN_TYPE_L;
+      dst_l.type = GEN_TYPE_UL;
+      p->NOP();
+      p->ADD(sum, src2, dst_l);
+
+      /* Implement this logic:
+      if(src2 >= 0) {
+        if(dst_l > sum) {
+          dst_h++;
+          if(CL_LONG_MIN == dst_h) {
+            dst_h = CL_LONG_MAX;
+            sum = CL_ULONG_MAX;
+          }
+        }
+      } */
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_GE, src2, GenRegister::immud(0), tmp1);
+      p->curr.noMask = 0;
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->CMP(GEN_CONDITIONAL_G, dst_l, sum, tmp1);
+      p->ADD(dst_h, dst_h, GenRegister::immud(1));
+      p->MOV(tmp0, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+      p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1);
+      p->MOV(dst_h, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+      p->MOV(sum, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFULL));
+      p->pop();
+      p->NOP();
+
+      /* Implement this logic:
+      else {
+        if(dst_l < sum) {
+          dst_h--;
+          if(CL_LONG_MAX == dst_h) {
+            dst_h = CL_LONG_MIN;
+            sum = 0;
+          }
+        }
+      } */
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_L, src2, GenRegister::immud(0), tmp1);
+      p->curr.noMask = 0;
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->CMP(GEN_CONDITIONAL_L, dst_l, sum, tmp1);
+      p->ADD(dst_h, dst_h, GenRegister::immd(-1));
+      p->MOV(tmp0, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+      p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1);
+      p->MOV(dst_h, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+      p->MOV(sum, GenRegister::immud(0));
+      p->pop();
+      p->NOP();
+
+      /* saturate logic:
+      if(dst_h > 0)
+        sum = CL_LONG_MAX;
+      else if(dst_h < -1)
+        sum = CL_LONG_MIN;
+      cl_long result = (cl_long) sum; */
+      p->MOV(dst_l, sum);
+
+      dst_h.type = GEN_TYPE_L;
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_G, dst_h, GenRegister::immud(0), tmp1);
+      p->curr.noMask = 0;
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(dst_l, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+      p->pop();
+      p->NOP();
+
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_L, dst_h, GenRegister::immd(-1), tmp1);
+      p->curr.noMask = 0;
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(dst_l, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+      p->pop();
+      p->NOP();
+    }
+  }
+
   void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn)
   {
     GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 6dc8afc..9f1d749 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -55,6 +55,8 @@ namespace gbe
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
     virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
     virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
+    virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
+
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
     virtual void emitI64MULInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 49d6017..a366e7f 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -130,7 +130,7 @@ namespace gbe
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
-    void emitI64MADSATInstruction(const SelectionInstruction &insn);
+    virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
     virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
     virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
     void emitI64ShiftInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 9c7226d..21dfdb6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -527,7 +527,7 @@ namespace gbe
     /*! Convert 64-bit integer to 32-bit float */
     void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]);
     /*! Saturated 64bit x*y + z */
-    void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]);
+    void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister* tmp, int tmp_num);
     /*! High 64bit of x*y */
     void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num);
     /*! (x+y)>>1 without mod. overflow */
@@ -1470,13 +1470,13 @@ namespace gbe
       insn->dst(i + 1) = tmp[i];
   }
 
-  void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]) {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 10, 3);
+  void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister *tmp, int tmp_num) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, tmp_num + 1, 3);
     insn->dst(0) = dst;
     insn->src(0) = src0;
     insn->src(1) = src1;
     insn->src(2) = src2;
-    for(int i = 0; i < 9; i ++)
+    for(int i = 0; i < tmp_num; i ++)
       insn->dst(i + 1) = tmp[i];
   }
 
@@ -4045,17 +4045,27 @@ namespace gbe
       switch(insn.getOpcode()) {
         case OP_I64MADSAT:
          {
-          GenRegister tmp[9];
-          for(int i=0; i<9; i++) {
-            tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
-            tmp[i].type = GEN_TYPE_UD;
-          }
-          sel.push();
-            sel.curr.flag = 0;
-            sel.curr.subFlag = 1;
-            sel.I64MADSAT(dst, src0, src1, src2, tmp);
-          sel.pop();
-          break;
+           GenRegister tmp[9];
+           int tmp_num;
+           if (!sel.hasLongType()) {
+             tmp_num = 9;
+             for(int i=0; i<9; i++) {
+               tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+               tmp[i].type = GEN_TYPE_UD;
+             }
+           } else {
+             tmp_num = 6;
+             for(int i=0; i<6; i++) {
+               tmp[i] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+               tmp[i].type = GEN_TYPE_UL;
+             }
+           }
+           sel.push();
+           sel.curr.flag = 0;
+           sel.curr.subFlag = 1;
+           sel.I64MADSAT(dst, src0, src1, src2, tmp, tmp_num);
+           sel.pop();
+           break;
          }
         case OP_MAD:
          {
-- 
1.9.1



More information about the Beignet mailing list