[Beignet] [PATCH 23/27] Overlaod I64 Div and Rem function.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Jan 6 02:02:46 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
Because the math shared function does not support 64bits
div and rem, we can just unpack the I64 and use old
function to handle it.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen8_context.cpp | 31 +++++++++++++++++++++
backend/src/backend/gen8_context.hpp | 2 ++
backend/src/backend/gen_context.hpp | 2 +-
backend/src/backend/gen_insn_selection.cpp | 43 ++++++++++++++++++++++--------
4 files changed, 66 insertions(+), 12 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 8960d5b..daa4182 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -531,6 +531,37 @@ namespace gbe
p->ADD(dst, dst, tmp_dst);
}
+ void Gen8Context::emitI64DIVREMInstruction(const SelectionInstruction &cnst_insn)
+ {
+ SelectionInstruction* insn = const_cast<SelectionInstruction*>(&cnst_insn);
+ GenRegister packed_src0 = ra->genReg(insn->src(0));
+ GenRegister packed_src1 = ra->genReg(insn->src(1));
+ GenRegister dst = ra->genReg(insn->dst(0));
+ int tmp_reg_n = 14;
+
+ if (packed_src0.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister unpacked_src0 = ra->genReg(insn->dst(tmp_reg_n));
+ unpackLongVec(packed_src0, unpacked_src0, p->curr.execWidth);
+ tmp_reg_n++;
+ insn->src(0) = unpacked_src0;
+ }
+ if (packed_src1.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister unpacked_src1 = ra->genReg(insn->dst(tmp_reg_n));
+ unpackLongVec(packed_src1, unpacked_src1, p->curr.execWidth);
+ tmp_reg_n++;
+ insn->src(1) = unpacked_src1;
+ }
+ GBE_ASSERT(tmp_reg_n <= insn->dstNum);
+
+ GenContext::emitI64DIVREMInstruction(*insn);
+
+ if (dst.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ GenRegister dst_packed = ra->genReg(insn->dst(14));
+ packLongVec(dst, dst_packed, p->curr.execWidth);
+ p->MOV(dst, dst_packed);
+ }
+ }
+
void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
{
GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 9f1d749..bea78b6 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -60,6 +60,8 @@ namespace gbe
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
+ virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+
protected:
virtual GenEncoder* generateEncoder(void) {
return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index a366e7f..3d01f2b 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -164,7 +164,7 @@ namespace gbe
void emitUnSpillRegInstruction(const SelectionInstruction &insn);
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
- void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+ virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 21dfdb6..36d0c73 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -617,9 +617,9 @@ namespace gbe
/*! Multiply 64-bit integers */
void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long);
/*! 64-bit integer division */
- void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/*! 64-bit integer remainder of division */
- void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
+ void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/* common functions for both binary instruction and sel_cmp and compare instruction.
It will handle the IMM or normal register assignment, and will try to avoid LOADI
as much as possible. */
@@ -1373,21 +1373,21 @@ namespace gbe
}
}
- void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, 14, 2);
+ void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64DIV, tmp_num + 1, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 13; i++)
+ for(int i = 0; i < tmp_num; i++)
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, 14, 2);
+ void Selection::Opaque::I64REM(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64REM, tmp_num + 1, 2);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 13; i++)
+ for(int i = 0; i < tmp_num; i++)
insn->dst(i + 1) = tmp[i];
}
@@ -2193,18 +2193,39 @@ namespace gbe
GBE_ASSERT(op != OP_REM);
sel.MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
} else if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[13];
+ GenRegister tmp[15];
+ int tmp_num = 13;
for(int i=0; i < 13; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
tmp[i].type = GEN_TYPE_UD;
}
+
+ if (sel.hasLongType()) {
+ if (!sel.isScalarReg(insn.getSrc(0))) {
+ tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src0.type);
+ tmp_num++;
+ }
+
+ if (!sel.isScalarReg(insn.getSrc(1))) {
+ tmp[tmp_num] = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), src1.type);
+ tmp_num++;
+ }
+
+ /* We at least one tmp register to convert if dst is not scalar. */
+ if (!sel.isScalarReg(insn.getDst(0)) && sel.isScalarReg(insn.getSrc(0))
+ && sel.isScalarReg(insn.getSrc(1))) {
+ GBE_ASSERT(tmp_num == 13);
+ tmp[tmp_num] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ tmp_num++;
+ }
+ }
sel.push();
sel.curr.flag = 0;
sel.curr.subFlag = 1;
if(op == OP_DIV)
- sel.I64DIV(dst, src0, src1, tmp);
+ sel.I64DIV(dst, src0, src1, tmp, tmp_num);
else
- sel.I64REM(dst, src0, src1, tmp);
+ sel.I64REM(dst, src0, src1, tmp, tmp_num);
sel.pop();
}
markAllChildren(dag);
--
1.9.1
More information about the Beignet
mailing list