[Beignet] [PATCH 21/27] Overload I64RHADD function.

junyan.he at inbox.com junyan.he at inbox.com
Tue Jan 6 02:02:29 PST 2015


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen8_context.cpp       | 71 ++++++++++++++++++++++++++++++
 backend/src/backend/gen8_context.hpp       |  1 +
 backend/src/backend/gen_context.hpp        |  2 +-
 backend/src/backend/gen_insn_selection.cpp | 29 +++++++-----
 4 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index e2f705f..a816780 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -312,6 +312,77 @@ namespace gbe
     p->ADD(dst, dst, tmp_dst);
   }
 
+  void Gen8Context::emitI64RHADDInstruction(const SelectionInstruction &insn)
+  {
+    GenRegister src0 = ra->genReg(insn.src(0));
+    GenRegister src1 = ra->genReg(insn.src(1));
+    GenRegister dst = ra->genReg(insn.dst(0));
+    GenRegister tmp0 = ra->genReg(insn.dst(1));
+    GenRegister tmp1 = ra->genReg(insn.dst(2));
+    GenRegister tmp_dst = ra->genReg(insn.dst(3));
+    int execWidth = p->curr.execWidth;
+
+    /* Src0 and Src1 are always unsigned long type.*/
+    GBE_ASSERT(src0.type == GEN_TYPE_UL && src1.type == GEN_TYPE_UL);
+    dst.type = src0.type;
+    tmp0.type = tmp1.type = GEN_TYPE_UD;
+    tmp_dst.type = GEN_TYPE_UL;
+
+    GBE_ASSERT(tmp_dst.subnr == 0);
+    GenRegister dl = tmp_dst.hstride == GEN_HORIZONTAL_STRIDE_0 ? GenRegister::retype(tmp_dst, GEN_TYPE_UD) :
+      GenRegister::retype(GenRegister::ud16grf(tmp_dst.nr, tmp_dst.subnr), GEN_TYPE_UD);
+    GenRegister dh = tmp_dst.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(GenRegister::offset(tmp_dst, 0, 4), GEN_TYPE_UD) :
+      GenRegister::retype(GenRegister::ud16grf(tmp_dst.nr + execWidth / 8, tmp_dst.subnr), GEN_TYPE_UD);
+    GenRegister s0l = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(src0, GEN_TYPE_UD) : GenRegister::unpacked_ud(src0.nr, src0.subnr);
+    GenRegister s0h = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(GenRegister::offset(src0, 0, 4), GEN_TYPE_UD) :
+      GenRegister::unpacked_ud(src0.nr, src0.subnr + 1);
+    GenRegister s1l = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(src1, GEN_TYPE_UD) : GenRegister::unpacked_ud(src1.nr, src1.subnr);
+    GenRegister s1h = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+      GenRegister::retype(GenRegister::offset(src1, 0, 4), GEN_TYPE_UD) :
+      GenRegister::unpacked_ud(src1.nr, src1.subnr + 1);
+
+    GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
+    p->push();
+    p->curr.execWidth = 8;
+    p->ADDC(dl, s0l, s1l);
+    p->MOV(tmp0, acc0);
+    p->ADDC(dl, dl, GenRegister::immud(1));
+    p->MOV(tmp1, acc0);
+    p->ADD(tmp0, tmp0, tmp1);
+
+    p->ADDC(dh, s0h, s1h);
+    p->MOV(tmp1, acc0);
+    p->ADDC(dh, dh, tmp0);
+    p->MOV(tmp0, acc0);
+    p->ADD(tmp1, tmp0, tmp1);
+
+    if (execWidth == 16) {
+      p->curr.quarterControl = 1;
+      p->ADDC(GenRegister::Qn(dl, 1), GenRegister::Qn(s0l, 1), GenRegister::Qn(s1l, 1));
+      p->MOV(GenRegister::Qn(tmp0, 1), acc0);
+      p->ADDC(GenRegister::Qn(dl, 1), GenRegister::Qn(dl, 1), GenRegister::immud(1));
+      p->MOV(GenRegister::Qn(tmp1, 1), acc0);
+      p->ADD(GenRegister::Qn(tmp0, 1), GenRegister::Qn(tmp0, 1), GenRegister::Qn(tmp1, 1));
+
+      p->ADDC(GenRegister::Qn(dh, 1), GenRegister::Qn(s0h, 1), GenRegister::Qn(s1h, 1));
+      p->MOV(GenRegister::Qn(tmp1, 1), acc0);
+      p->ADDC(GenRegister::Qn(dh, 1), GenRegister::Qn(dh, 1), GenRegister::Qn(tmp0, 1));
+      p->MOV(GenRegister::Qn(tmp0, 1), acc0);
+      p->ADD(GenRegister::Qn(tmp1, 1), GenRegister::Qn(tmp0, 1), GenRegister::Qn(tmp1, 1));
+    }
+    p->pop();
+
+    packLongVec(GenRegister::retype(tmp_dst, GEN_TYPE_UD), GenRegister::retype(dst, GEN_TYPE_UD), execWidth);
+
+    p->SHR(dst, dst, GenRegister::immud(1));
+    p->SHL(tmp_dst, tmp1, GenRegister::immud(63));
+    p->ADD(dst, dst, tmp_dst);
+  }
+
   void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
   {
     GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 9204d10..6dc8afc 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -53,6 +53,7 @@ namespace gbe
     virtual void emitBinaryInstruction(const SelectionInstruction &insn);
     virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
+    virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
     virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
     virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     virtual void emitRead64Instruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index e350676..49d6017 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -132,7 +132,7 @@ namespace gbe
     virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
     void emitI64MADSATInstruction(const SelectionInstruction &insn);
     virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
-    void emitI64RHADDInstruction(const SelectionInstruction &insn);
+    virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
     void emitI64ShiftInstruction(const SelectionInstruction &insn);
     void emitI64CompareInstruction(const SelectionInstruction &insn);
     void emitI64SATADDInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 071fc1f..9c7226d 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -533,7 +533,7 @@ namespace gbe
     /*! (x+y)>>1 without mod. overflow */
     void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num);
     /*! (x+y+1)>>1 without mod. overflow */
-    void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
+    void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num);
     /*! Shift a 64-bit integer */
     void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]);
     /*! Compare 64-bit integer */
@@ -1498,12 +1498,12 @@ namespace gbe
       insn->dst(i + 1) = tmp[i];
   }
 
-  void Selection::Opaque::I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64RHADD, 5, 2);
+  void Selection::Opaque::I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64RHADD, tmp_num + 1, 2);
     insn->dst(0) = dst;
     insn->src(0) = src0;
     insn->src(1) = src1;
-    for(int i = 0; i < 4; i ++)
+    for(int i = 0; i < tmp_num; i ++)
       insn->dst(i + 1) = tmp[i];
   }
 
@@ -2445,13 +2445,20 @@ namespace gbe
             break;
           }
         case OP_I64RHADD:
-         {
-          GenRegister tmp[4];
-          for(int i=0; i<4; i++)
-            tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
-          sel.I64RHADD(dst, src0, src1, tmp);
-          break;
-         }
+          {
+            GenRegister tmp[4];
+            if (!sel.hasLongType()) {
+              for(int i=0; i<4; i++)
+                tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+              sel.I64RHADD(dst, src0, src1, tmp, 4);
+            } else {
+              tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U64);
+              tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U64);
+              tmp[2] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+              sel.I64RHADD(dst, src0, src1, tmp, 3);
+            }
+            break;
+          }
         case OP_UPSAMPLE_SHORT:
         {
           dst = GenRegister::retype(sel.unpacked_uw(dst.reg()), GEN_TYPE_B);
-- 
1.9.1



More information about the Beignet mailing list