[Beignet] [PATCH] support 64-bit division and remainder

Homer Hsing homer.xing at intel.com
Sun Sep 22 18:11:58 PDT 2013


support both unsigned and signed type, 
and division ("/") and remainder ("%") arithmetic

tested by piglit

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp                | 153 ++++++++++++++++++++-
 backend/src/backend/gen_context.hpp                |   2 +
 backend/src/backend/gen_encoder.hpp                |   1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |   1 +
 backend/src/backend/gen_insn_selection.cpp         |  35 ++++-
 backend/src/backend/gen_insn_selection.hxx         |   2 +
 6 files changed, 187 insertions(+), 7 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 345d37f..949ef2d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -463,6 +463,14 @@ namespace gbe
     I64FullAdd(e, f, b, d);
   }
 
+  void GenContext::I64Neg(GenRegister high, GenRegister low, GenRegister tmp) {
+    p->NOT(high, high);
+    p->NOT(low, low);
+    p->MOV(tmp, GenRegister::immud(1));
+    addWithCarry(low, low, tmp);
+    p->ADD(high, high, tmp);
+  }
+
   void GenContext::I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg) {
     p->SHR(sign, high, GenRegister::immud(31));
     p->push();
@@ -470,11 +478,7 @@ namespace gbe
     p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
     p->CMP(GEN_CONDITIONAL_NZ, sign, GenRegister::immud(0));
     p->curr.predicate = GEN_PREDICATE_NORMAL;
-    p->NOT(high, high);
-    p->NOT(low, low);
-    p->MOV(tmp, GenRegister::immud(1));
-    addWithCarry(low, low, tmp);
-    p->ADD(high, high, tmp);
+    I64Neg(high, low, tmp);
     p->pop();
   }
 
@@ -1175,6 +1179,145 @@ namespace gbe
     storeBottomHalf(dest, a);
   }
 
+  void GenContext::emitI64DIVREMInstruction(const SelectionInstruction &insn) {
+    GenRegister dest = ra->genReg(insn.dst(0));
+    GenRegister x = ra->genReg(insn.src(0));
+    GenRegister y = ra->genReg(insn.src(1));
+    GenRegister a = ra->genReg(insn.dst(1));
+    GenRegister b = ra->genReg(insn.dst(2));
+    GenRegister c = ra->genReg(insn.dst(3));
+    GenRegister d = ra->genReg(insn.dst(4));
+    GenRegister e = ra->genReg(insn.dst(5));
+    GenRegister f = ra->genReg(insn.dst(6));
+    GenRegister g = ra->genReg(insn.dst(7));
+    GenRegister h = ra->genReg(insn.dst(8));
+    GenRegister i = ra->genReg(insn.dst(9));
+    GenRegister j = ra->genReg(insn.dst(10));
+    GenRegister k = ra->genReg(insn.dst(11));
+    GenRegister l = ra->genReg(insn.dst(12));
+    GenRegister m = ra->genReg(insn.dst(13));
+    GenRegister flagReg = ra->genReg(insn.dst(14));
+    GenRegister zero = GenRegister::immud(0),
+                one = GenRegister::immud(1),
+                imm31 = GenRegister::immud(31);
+    // (a,b) <- x
+    loadTopHalf(a, x);
+    loadBottomHalf(b, x);
+    // (c,d) <- y
+    loadTopHalf(c, y);
+    loadBottomHalf(d, y);
+    // k <- sign_of_result
+    if(x.is_signed_int()) {
+      GBE_ASSERT(y.is_signed_int());
+      GBE_ASSERT(dest.is_signed_int());
+      I64ABS(k, a, b, e, flagReg);
+      I64ABS(l, c, d, e, flagReg);
+      if(insn.opcode == SEL_OP_I64DIV)
+        p->XOR(k, k, l);
+    }
+    // (e,f) <- 0
+    p->MOV(e, zero);
+    p->MOV(f, zero);
+    // (g,h) <- 2**63
+    p->MOV(g, GenRegister::immud(0x80000000));
+    p->MOV(h, zero);
+    // (i,j) <- 0
+    p->MOV(i, zero);
+    p->MOV(j, zero);
+    // m <- 0
+    p->MOV(m, zero);
+    {
+      uint32_t loop_start = p->n_instruction();
+      // (c,d,e,f) <- (c,d,e,f) / 2
+      p->SHR(f, f, one);
+      p->SHL(l, e, imm31);
+      p->OR(f, f, l);
+      p->SHR(e, e, one);
+      p->SHL(l, d, imm31);
+      p->OR(e, e, l);
+      p->SHR(d, d, one);
+      p->SHL(l, c, imm31);
+      p->OR(d, d, l);
+      p->SHR(c, c, one);
+      // condition <- (c,d)==0 && (a,b)>=(e,f)
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->MOV(l, zero);
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_EQ, a, e);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->CMP(GEN_CONDITIONAL_GE, b, f);
+      p->MOV(l, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_G, a, e);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->MOV(l, one);
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->CMP(GEN_CONDITIONAL_NEQ, l, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      p->CMP(GEN_CONDITIONAL_EQ, c, zero);
+      p->CMP(GEN_CONDITIONAL_EQ, d, zero);
+      // under condition, (a,b) <- (a,b) - (e,f)
+      p->MOV(l, f);
+      subWithBorrow(b, b, l);
+      subWithBorrow(a, a, l);
+      p->MOV(l, e);
+      subWithBorrow(a, a, l);
+      // under condition, (i,j) <- (i,j) | (g,h)
+      p->OR(i, i, g);
+      p->OR(j, j, h);
+      p->pop();
+      // (g,h) /= 2
+      p->SHR(h, h, one);
+      p->SHL(l, g, imm31);
+      p->OR(h, h, l);
+      p->SHR(g, g, one);
+      // condition: m < 64
+      p->ADD(m, m, one);
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_L, m, GenRegister::immud(64));
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      // under condition, jump back to start point
+      if (simdWidth == 8)
+        p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+      else if (simdWidth == 16)
+        p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+      else
+        NOT_IMPLEMENTED;
+      p->curr.execWidth = 1;
+      p->curr.noMask = 1;
+      int jip = -(int)(p->n_instruction() - loop_start + 1) * 2;
+      p->JMPI(zero);
+      p->patchJMPI(p->n_instruction()-1, jip);
+      p->pop();
+      // end of loop
+    }
+    // adjust sign of result
+    if(x.is_signed_int()) {
+      p->push();
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+      p->CMP(GEN_CONDITIONAL_NEQ, k, zero);
+      p->curr.predicate = GEN_PREDICATE_NORMAL;
+      if(insn.opcode == SEL_OP_I64DIV)
+        I64Neg(i, j, l);
+      else
+        I64Neg(a, b, l);
+      p->pop();
+    }
+    // write dest
+    if(insn.opcode == SEL_OP_I64DIV) {
+      storeTopHalf(dest, i);
+      storeBottomHalf(dest, j);
+    } else {
+      GBE_ASSERT(insn.opcode == SEL_OP_I64REM);
+      storeTopHalf(dest, a);
+      storeBottomHalf(dest, b);
+    }
+  }
+
   void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index e24c126..10e0603 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -86,6 +86,7 @@ namespace gbe
 
     void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
     void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
+    void I64Neg(GenRegister high, GenRegister low, GenRegister tmp);
     void I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg);
     void I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2);
     void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
@@ -132,6 +133,7 @@ namespace gbe
     void emitUnSpillRegInstruction(const SelectionInstruction &insn);
     void emitGetImageInfoInstruction(const SelectionInstruction &insn);
     void emitI64MULInstruction(const SelectionInstruction &insn);
+    void emitI64DIVREMInstruction(const SelectionInstruction &insn);
     void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
     void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
 
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index bb88484..fe5245e 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -193,6 +193,7 @@ namespace gbe
     void setSrc0(GenInstruction *insn, GenRegister reg);
     void setSrc1(GenInstruction *insn, GenRegister reg);
     GenInstruction *next(uint32_t opcode);
+    uint32_t n_instruction(void) const { return store.size(); }
     GBE_CLASS(GenEncoder); //!< Use custom allocators
   };
 
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index b94f235..b33112c 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -13,6 +13,7 @@ DECL_GEN7_SCHEDULE(I64MULHI,        20,        4,        2)
 DECL_GEN7_SCHEDULE(I64MADSAT,       20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
 DECL_GEN7_SCHEDULE(I64Compare,      20,        4,        2)
+DECL_GEN7_SCHEDULE(I64DIVREM,       20,        4,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
 DECL_GEN7_SCHEDULE(IndirectMove,    20,        2,        2)
 DECL_GEN7_SCHEDULE(Eot,             20,        1,        1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 83b4f1b..3e539a2 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -545,6 +545,10 @@ namespace gbe
     void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti);
     /*! Multiply 64-bit integers */
     void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
+    /*! 64-bit integer division */
+    void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]);
+    /*! 64-bit integer remainder of division */
+    void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]);
     /*! Use custom allocators */
     GBE_CLASS(Opaque);
     friend class SelectionBlock;
@@ -1042,6 +1046,24 @@ namespace gbe
       insn->dst(i + 1) = tmp[i];
   }
 
+  void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 15, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    for(int i = 0; i < 14; i++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
+  void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 15, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    for(int i = 0; i < 14; i++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
   void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
     SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
     insn->dst(0) = dst;
@@ -1577,8 +1599,17 @@ namespace gbe
       } else if(type == TYPE_FLOAT) {
         GBE_ASSERT(op != OP_REM);
         sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
-      } else {
-        NOT_IMPLEMENTED;
+      } else if (type == TYPE_S64 || type == TYPE_U64) {
+        GenRegister tmp[14];
+        for(int i=0; i<13; i++) {
+          tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+          tmp[i].type = GEN_TYPE_UD;
+        }
+        tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL));
+        if(op == OP_DIV)
+          sel.I64DIV(dst, src0, src1, tmp);
+        else
+          sel.I64REM(dst, src0, src1, tmp);
       }
       markAllChildren(dag);
       return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 89b878d..21b0a43 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -33,6 +33,8 @@ DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
 DECL_SELECTION_IR(I64SATSUB, I64SATSUBInstruction)
 DECL_SELECTION_IR(MUL, BinaryInstruction)
 DECL_SELECTION_IR(I64MUL, I64MULInstruction)
+DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction)
+DECL_SELECTION_IR(I64REM, I64DIVREMInstruction)
 DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
 DECL_SELECTION_IR(MACH, BinaryInstruction)
 DECL_SELECTION_IR(CMP, CompareInstruction)
-- 
1.8.1.2



More information about the Beignet mailing list