[Beignet] [PATCH] support 64-bit division and remainder
Song, Ruiling
ruiling.song at intel.com
Thu Sep 26 00:06:31 PDT 2013
Test pass, the patch LGTM.
-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Monday, September 23, 2013 9:12 AM
To: beignet at lists.freedesktop.org
Subject: [Beignet] [PATCH] support 64-bit division and remainder
support both unsigned and signed type,
and division ("/") and remainder ("%") arithmetic
tested by piglit
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 153 ++++++++++++++++++++-
backend/src/backend/gen_context.hpp | 2 +
backend/src/backend/gen_encoder.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 35 ++++-
backend/src/backend/gen_insn_selection.hxx | 2 +
6 files changed, 187 insertions(+), 7 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 345d37f..949ef2d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -463,6 +463,14 @@ namespace gbe
I64FullAdd(e, f, b, d);
}
+ void GenContext::I64Neg(GenRegister high, GenRegister low, GenRegister tmp) {
+ p->NOT(high, high);
+ p->NOT(low, low);
+ p->MOV(tmp, GenRegister::immud(1));
+ addWithCarry(low, low, tmp);
+ p->ADD(high, high, tmp);
+ }
+
void GenContext::I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg) {
p->SHR(sign, high, GenRegister::immud(31));
p->push();
@@ -470,11 +478,7 @@ namespace gbe
p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
p->CMP(GEN_CONDITIONAL_NZ, sign, GenRegister::immud(0));
p->curr.predicate = GEN_PREDICATE_NORMAL;
- p->NOT(high, high);
- p->NOT(low, low);
- p->MOV(tmp, GenRegister::immud(1));
- addWithCarry(low, low, tmp);
- p->ADD(high, high, tmp);
+ I64Neg(high, low, tmp);
p->pop();
}
@@ -1175,6 +1179,145 @@ namespace gbe
storeBottomHalf(dest, a);
}
+ void GenContext::emitI64DIVREMInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister a = ra->genReg(insn.dst(1));
+ GenRegister b = ra->genReg(insn.dst(2));
+ GenRegister c = ra->genReg(insn.dst(3));
+ GenRegister d = ra->genReg(insn.dst(4));
+ GenRegister e = ra->genReg(insn.dst(5));
+ GenRegister f = ra->genReg(insn.dst(6));
+ GenRegister g = ra->genReg(insn.dst(7));
+ GenRegister h = ra->genReg(insn.dst(8));
+ GenRegister i = ra->genReg(insn.dst(9));
+ GenRegister j = ra->genReg(insn.dst(10));
+ GenRegister k = ra->genReg(insn.dst(11));
+ GenRegister l = ra->genReg(insn.dst(12));
+ GenRegister m = ra->genReg(insn.dst(13));
+ GenRegister flagReg = ra->genReg(insn.dst(14));
+ GenRegister zero = GenRegister::immud(0),
+ one = GenRegister::immud(1),
+ imm31 = GenRegister::immud(31);
+ // (a,b) <- x
+ loadTopHalf(a, x);
+ loadBottomHalf(b, x);
+ // (c,d) <- y
+ loadTopHalf(c, y);
+ loadBottomHalf(d, y);
+ // k <- sign_of_result
+ if(x.is_signed_int()) {
+ GBE_ASSERT(y.is_signed_int());
+ GBE_ASSERT(dest.is_signed_int());
+ I64ABS(k, a, b, e, flagReg);
+ I64ABS(l, c, d, e, flagReg);
+ if(insn.opcode == SEL_OP_I64DIV)
+ p->XOR(k, k, l);
+ }
+ // (e,f) <- 0
+ p->MOV(e, zero);
+ p->MOV(f, zero);
+ // (g,h) <- 2**63
+ p->MOV(g, GenRegister::immud(0x80000000));
+ p->MOV(h, zero);
+ // (i,j) <- 0
+ p->MOV(i, zero);
+ p->MOV(j, zero);
+ // m <- 0
+ p->MOV(m, zero);
+ {
+ uint32_t loop_start = p->n_instruction();
+ // (c,d,e,f) <- (c,d,e,f) / 2
+ p->SHR(f, f, one);
+ p->SHL(l, e, imm31);
+ p->OR(f, f, l);
+ p->SHR(e, e, one);
+ p->SHL(l, d, imm31);
+ p->OR(e, e, l);
+ p->SHR(d, d, one);
+ p->SHL(l, c, imm31);
+ p->OR(d, d, l);
+ p->SHR(c, c, one);
+ // condition <- (c,d)==0 && (a,b)>=(e,f)
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MOV(l, zero);
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_EQ, a, e);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_GE, b, f);
+ p->MOV(l, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_G, a, e);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(l, one);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_NEQ, l, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_EQ, c, zero);
+ p->CMP(GEN_CONDITIONAL_EQ, d, zero);
+ // under condition, (a,b) <- (a,b) - (e,f)
+ p->MOV(l, f);
+ subWithBorrow(b, b, l);
+ subWithBorrow(a, a, l);
+ p->MOV(l, e);
+ subWithBorrow(a, a, l);
+ // under condition, (i,j) <- (i,j) | (g,h)
+ p->OR(i, i, g);
+ p->OR(j, j, h);
+ p->pop();
+ // (g,h) /= 2
+ p->SHR(h, h, one);
+ p->SHL(l, g, imm31);
+ p->OR(h, h, l);
+ p->SHR(g, g, one);
+ // condition: m < 64
+ p->ADD(m, m, one);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, m, GenRegister::immud(64));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ // under condition, jump back to start point
+ if (simdWidth == 8)
+ p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else if (simdWidth == 16)
+ p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else
+ NOT_IMPLEMENTED;
+ p->curr.execWidth = 1;
+ p->curr.noMask = 1;
+ int jip = -(int)(p->n_instruction() - loop_start + 1) * 2;
+ p->JMPI(zero);
+ p->patchJMPI(p->n_instruction()-1, jip);
+ p->pop();
+ // end of loop
+ }
+ // adjust sign of result
+ if(x.is_signed_int()) {
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_NEQ, k, zero);
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ if(insn.opcode == SEL_OP_I64DIV)
+ I64Neg(i, j, l);
+ else
+ I64Neg(a, b, l);
+ p->pop();
+ }
+ // write dest
+ if(insn.opcode == SEL_OP_I64DIV) {
+ storeTopHalf(dest, i);
+ storeBottomHalf(dest, j);
+ } else {
+ GBE_ASSERT(insn.opcode == SEL_OP_I64REM);
+ storeTopHalf(dest, a);
+ storeBottomHalf(dest, b);
+ }
+ }
+
void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0)); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index e24c126..10e0603 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -86,6 +86,7 @@ namespace gbe
void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
+ void I64Neg(GenRegister high, GenRegister low, GenRegister tmp);
void I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp, GenRegister flagReg);
void I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2);
void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1); @@ -132,6 +133,7 @@ namespace gbe
void emitUnSpillRegInstruction(const SelectionInstruction &insn);
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
void emitI64MULInstruction(const SelectionInstruction &insn);
+ void emitI64DIVREMInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index bb88484..fe5245e 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -193,6 +193,7 @@ namespace gbe
void setSrc0(GenInstruction *insn, GenRegister reg);
void setSrc1(GenInstruction *insn, GenRegister reg);
GenInstruction *next(uint32_t opcode);
+ uint32_t n_instruction(void) const { return store.size(); }
GBE_CLASS(GenEncoder); //!< Use custom allocators
};
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index b94f235..b33112c 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -13,6 +13,7 @@ DECL_GEN7_SCHEDULE(I64MULHI, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64MADSAT, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64DIVREM, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
DECL_GEN7_SCHEDULE(IndirectMove, 20, 2, 2)
DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 83b4f1b..3e539a2 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -545,6 +545,10 @@ namespace gbe
void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti);
/*! Multiply 64-bit integers */
void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
+ /*! 64-bit integer division */
+ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]);
+ /*! 64-bit integer remainder of division */
+ void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]);
/*! Use custom allocators */
GBE_CLASS(Opaque);
friend class SelectionBlock;
@@ -1042,6 +1046,24 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
+ void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 15, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ for(int i = 0; i < 14; i++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
+ void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[14]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 15, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ for(int i = 0; i < 14; i++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
insn->dst(0) = dst;
@@ -1577,8 +1599,17 @@ namespace gbe
} else if(type == TYPE_FLOAT) {
GBE_ASSERT(op != OP_REM);
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
- } else {
- NOT_IMPLEMENTED;
+ } else if (type == TYPE_S64 || type == TYPE_U64) {
+ GenRegister tmp[14];
+ for(int i=0; i<13; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp[i].type = GEN_TYPE_UD;
+ }
+ tmp[13] = sel.selReg(sel.reg(FAMILY_BOOL));
+ if(op == OP_DIV)
+ sel.I64DIV(dst, src0, src1, tmp);
+ else
+ sel.I64REM(dst, src0, src1, tmp);
}
markAllChildren(dag);
return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 89b878d..21b0a43 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -33,6 +33,8 @@ DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction) DECL_SELECTION_IR(I64SATSUB, I64SATSUBInstruction) DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(I64MUL, I64MULInstruction)
+DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction)
+DECL_SELECTION_IR(I64REM, I64DIVREMInstruction)
DECL_SELECTION_IR(ATOMIC, AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) DECL_SELECTION_IR(CMP, CompareInstruction)
--
1.8.1.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list