[Beignet] [PATCH 03/13] Backend: Add FDIV64 function for gen_insn_selection.

Fri Sep 18 02:58:14 PDT 2015

From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen_context.cpp                |  4 ++
 backend/src/backend/gen_context.hpp                |  1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
 backend/src/backend/gen_insn_selection.cpp         | 52 +++++++++++++++++++++-
 backend/src/backend/gen_insn_selection.hxx         |  1 +
 5 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 25fdf08..9e2fd03 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1679,6 +1679,10 @@ namespace gbe
     }
   }
 
+  void GenContext::emitF64DIVInstruction(const SelectionInstruction &insn) {
+    GBE_ASSERT(0); // No support for double on Gen7
+  }
+
   void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 34f9293..57eb0a6 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -173,6 +173,7 @@ namespace gbe
     void emitGetImageInfoInstruction(const SelectionInstruction &insn);
     virtual void emitI64MULInstruction(const SelectionInstruction &insn);
     virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+    virtual void emitF64DIVInstruction(const SelectionInstruction &insn);
     void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
     void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
     unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, unsigned desc);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d073770..9b60c17 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -43,3 +43,4 @@ DECL_GEN7_SCHEDULE(Atomic,          80,        1,        1)
 DECL_GEN7_SCHEDULE(I64MUL,          20,        40,      20)
 DECL_GEN7_SCHEDULE(I64SATADD,       20,        40,      20)
 DECL_GEN7_SCHEDULE(I64SATSUB,       20,        40,      20)
+DECL_GEN7_SCHEDULE(F64DIV,          20,        40,      20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 596d828..457a629 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -361,8 +361,10 @@ namespace gbe
     bool has32X32Mul() const { return bHas32X32Mul; }
     void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
     bool hasLongType() const { return bHasLongType; }
+    bool hasDoubleType() const { return bHasDoubleType; }
     bool hasHalfType() const { return bHasHalfType; }
     void setHasLongType(bool b) { bHasLongType = b; }
+    void setHasDoubleType(bool b) { bHasDoubleType = b; }
     void setHasHalfType(bool b) { bHasHalfType = b; }
     bool hasLongRegRestrict() { return bLongRegRestrict; }
     void setLongRegRestrict(bool b) { bLongRegRestrict = b; }
@@ -669,6 +671,8 @@ namespace gbe
     void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
     /*! 64-bit integer remainder of division */
     void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
+    /*! double division */
+    void F64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmpNum);
     /* common functions for both binary instruction and sel_cmp and compare instruction.
        It will handle the IMM or normal register assignment, and will try to avoid LOADI
        as much as possible. */
@@ -745,6 +749,7 @@ namespace gbe
     uint32_t currAuxLabel;
     bool bHas32X32Mul;
     bool bHasLongType;
+    bool bHasDoubleType;
     bool bHasHalfType;
     bool bLongRegRestrict;
     uint32_t ldMsgOrder;
@@ -788,7 +793,7 @@ namespace gbe
     curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
     maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
     stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
-    bHas32X32Mul(false), bHasLongType(false), bHasHalfType(false), bLongRegRestrict(false),
+    bHas32X32Mul(false), bHasLongType(false), bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false),
     ldMsgOrder(LD_MSG_ORDER_IVB), slowByteGather(false)
   {
     const ir::Function &fn = ctx.getFunction();
@@ -1618,6 +1623,15 @@ namespace gbe
       insn->dst(i + 1) = tmp[i];
   }
 
+  void Selection::Opaque::F64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmpNum) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_F64DIV, tmpNum + 1, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    for(int i = 0; i < tmpNum; i++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
   void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
     SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
     insn->dst(0) = dst;
@@ -2076,6 +2090,7 @@ namespace gbe
   Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
     this->opaque->setHas32X32Mul(true);
     this->opaque->setHasLongType(true);
+    this->opaque->setHasDoubleType(true);
     this->opaque->setSlowByteGather(true);
     this->opaque->setHasHalfType(true);
   }
@@ -2083,6 +2098,7 @@ namespace gbe
   SelectionChv::SelectionChv(GenContext &ctx) : Selection(ctx) {
     this->opaque->setHas32X32Mul(true);
     this->opaque->setHasLongType(true);
+    this->opaque->setHasDoubleType(true);
     this->opaque->setLongRegRestrict(true);
     this->opaque->setSlowByteGather(true);
     this->opaque->setHasHalfType(true);
@@ -2091,6 +2107,7 @@ namespace gbe
   Selection9::Selection9(GenContext &ctx) : Selection(ctx) {
     this->opaque->setHas32X32Mul(true);
     this->opaque->setHasLongType(true);
+    this->opaque->setHasDoubleType(true);
     this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
     this->opaque->setSlowByteGather(true);
     this->opaque->setHasHalfType(true);
@@ -2580,6 +2597,39 @@ namespace gbe
           else
             sel.I64REM(dst, src0, src1, tmp, tmp_num);
         sel.pop();
+      } else if (type == TYPE_DOUBLE) {
+        if (!sel.hasDoubleType())
+          GBE_ASSERT(0);
+
+        GenRegister tmp[10];
+        int tmpNum = 7;
+        ir::RegisterFamily fm;
+        if (sel.ctx.getSimdWidth() == 16) {
+          fm = FAMILY_WORD;
+        } else {
+          fm = FAMILY_DWORD;
+        }
+
+        /* madm and invm need special accumutor support, which require us in align16
+           mode. If any src is uniform, we need another tmp register and MOV the
+           uniform one to it. Because the madm and invm will work in align16 mode,
+           the channel mask is different from the align1 mode. So we can not directly
+           write the result to the dst and need a tmp register to hold the result and
+           MOV it to dst later. */
+        tmpNum++; //For the dst.
+        if (src0.hstride == GEN_HORIZONTAL_STRIDE_0) tmpNum++;
+        if (src1.hstride == GEN_HORIZONTAL_STRIDE_0) tmpNum++;
+
+        for (int i = 0; i < tmpNum; i++)
+          tmp[i] = GenRegister::df8grf(sel.reg(fm));
+
+        sel.push();
+          sel.curr.flag = 0;
+          sel.curr.subFlag = 1;
+          sel.F64DIV(dst, src0, src1, tmp, tmpNum);
+        sel.pop();
+      } else {
+        GBE_ASSERT(0);
       }
       markAllChildren(dag);
       return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index adbb137..479398b 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -90,3 +90,4 @@ DECL_SELECTION_IR(ENDIF, UnaryInstruction)
 DECL_SELECTION_IR(ELSE, UnaryInstruction)
 DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
 DECL_SELECTION_IR(WHILE, UnaryInstruction)
+DECL_SELECTION_IR(F64DIV, F64DIVInstruction)
-- 
1.9.1