[Beignet] [PATCH 18/27] Overload I64MUL function.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Jan 6 02:02:07 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
BDW supports 32 by 32 instruction, so we can refine
the MUL instruction of long by using it.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen8_context.cpp | 31 +++++++++++++++++++++++++++
backend/src/backend/gen8_context.hpp | 1 +
backend/src/backend/gen_context.hpp | 2 +-
backend/src/backend/gen_insn_selection.cpp | 34 ++++++++++++++++++++++--------
4 files changed, 58 insertions(+), 10 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 18a3425..85b72a5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -102,6 +102,37 @@ namespace gbe
}
}
+ void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn)
+ {
+ GenRegister src0 = ra->genReg(insn.src(0));
+ GenRegister src1 = ra->genReg(insn.src(1));
+ GenRegister dst = ra->genReg(insn.dst(0));
+ GenRegister res = ra->genReg(insn.dst(1));
+
+ src0.type = src1.type = GEN_TYPE_UD;
+ dst.type = GEN_TYPE_UL;
+ res.type = GEN_TYPE_UL;
+
+ /* Low 32 bits X low 32 bits. */
+ GenRegister s0l = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+ GenRegister::retype(src0, GEN_TYPE_UD) : GenRegister::unpacked_ud(src0.nr, src0.subnr);
+ GenRegister s1l = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ?
+ GenRegister::retype(src1, GEN_TYPE_UD) : GenRegister::unpacked_ud(src1.nr, src1.subnr);
+ p->MUL(dst, s0l, s1l);
+
+ /* Low 32 bits X high 32 bits. */
+ GenRegister s1h = GenRegister::offset(s1l, 0, 4);
+ p->MUL(res, s0l, s1h);
+ p->SHL(res, res, GenRegister::immud(32));
+ p->ADD(dst, dst, res);
+
+ /* High 32 bits X low 32 bits. */
+ GenRegister s0h = GenRegister::offset(s0l, 0, 4);
+ p->MUL(res, s0h, s1l);
+ p->SHL(res, res, GenRegister::immud(32));
+ p->ADD(dst, dst, res);
+ }
+
void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
{
GBE_ASSERT(packed.subnr == 0);
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 54cc29d..7e6d3b3 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -54,6 +54,7 @@ namespace gbe
virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
+ virtual void emitI64MULInstruction(const SelectionInstruction &insn);
protected:
virtual GenEncoder* generateEncoder(void) {
return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 3593d66..2c97092 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -163,7 +163,7 @@ namespace gbe
void emitSpillRegInstruction(const SelectionInstruction &insn);
void emitUnSpillRegInstruction(const SelectionInstruction &insn);
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
- void emitI64MULInstruction(const SelectionInstruction &insn);
+ virtual void emitI64MULInstruction(const SelectionInstruction &insn);
void emitI64DIVREMInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 60f45f7..b63252a 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -615,7 +615,7 @@ namespace gbe
/*! Get image information */
void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti);
/*! Multiply 64-bit integers */
- void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
+ void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long);
/*! 64-bit integer division */
void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]);
/*! 64-bit integer remainder of division */
@@ -1354,13 +1354,23 @@ namespace gbe
insn->extra.function = function;
}
- void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2);
+ void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long) {
+ SelectionInstruction *insn = NULL;
+ if (native_long)
+ insn = this->appendInsn(SEL_OP_I64MUL, 2, 2);
+ else
+ insn = this->appendInsn(SEL_OP_I64MUL, 7, 2);
+
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
- for(int i = 0; i < 6; i++)
- insn->dst(i + 1) = tmp[i];
+
+ if (native_long) {
+ insn->dst(1) = tmp[0];
+ } else {
+ for (int i = 0; i < 6; i++)
+ insn->dst(i + 1) = tmp[i];
+ }
}
void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) {
@@ -2387,10 +2397,16 @@ namespace gbe
sel.pop();
return false;
} else if (type == TYPE_S64 || type == TYPE_U64) {
- GenRegister tmp[6];
- for(int i = 0; i < 6; i++)
- tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- sel.I64MUL(dst, src0, src1, tmp);
+ if (sel.hasLongType()) {
+ GenRegister tmp;
+ tmp = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ sel.I64MUL(dst, src0, src1, &tmp, true);
+ } else {
+ GenRegister tmp[6];
+ for(int i = 0; i < 6; i++)
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ sel.I64MUL(dst, src0, src1, tmp, false);
+ }
} else
sel.MUL(dst, src0, src1);
break;
--
1.9.1
More information about the Beignet
mailing list