[Beignet] [PATCH 1/2] support 64bit-integer shifting
Homer Hsing
homer.xing at intel.com
Wed Aug 7 00:05:12 PDT 2013
support left-shifting (<<), right-shifting (>>),
and arithmetic right-shifting (>>).
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 149 ++++++++++++++++++++-
backend/src/backend/gen_context.hpp | 5 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 3 +
backend/src/backend/gen_insn_selection.cpp | 86 +++++++++++-
backend/src/backend/gen_insn_selection.hxx | 3 +
backend/src/backend/gen_register.hpp | 6 +
6 files changed, 248 insertions(+), 4 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 570e1f0..03f7309 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -140,7 +140,7 @@ namespace gbe
switch (insn.opcode) {
case SEL_OP_MOV: p->MOV(dst, src); break;
case SEL_OP_MOV_INT64:
- {
+ if (src.isint64()) {
GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
xsrc = GenRegister::retype(src, GEN_TYPE_UL);
int execWidth = p->curr.execWidth;
@@ -154,6 +154,22 @@ namespace gbe
xsrc = GenRegister::suboffset(xsrc, 4);
}
p->pop();
+ } else {
+ int execWidth = p->curr.execWidth;
+ GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL), xsrc = src;
+ p->push();
+ p->curr.execWidth = 8;
+ for (int nib = 0; nib < execWidth / 4; nib ++) {
+ p->curr.chooseNib(nib);
+ p->MOV(xdst.bottom_half(), xsrc);
+ if(xsrc.issi())
+ p->ASR(xdst.top_half(), xsrc, GenRegister::immud(31));
+ else
+ p->MOV(xdst.top_half(), GenRegister::immd(0));
+ xdst = GenRegister::suboffset(xdst, 4);
+ xsrc = GenRegister::suboffset(xsrc, 4);
+ }
+ p->pop();
}
break;
case SEL_OP_FBH: p->FBH(dst, src); break;
@@ -335,6 +351,137 @@ namespace gbe
p->pop();
}
+ void GenContext::collectShifter(GenRegister dest, GenRegister src) {
+ int execWidth = p->curr.execWidth;
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ for (int nib = 0; nib < execWidth / 4; nib ++) {
+ p->AND(dest, src.bottom_half(), GenRegister::immud(63));
+ dest = GenRegister::suboffset(dest, 4);
+ src = GenRegister::suboffset(src, 4);
+ }
+ p->pop();
+ }
+
+ void GenContext::emitI64SHLInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister a = ra->genReg(insn.src(2));
+ GenRegister b = ra->genReg(insn.src(3));
+ GenRegister c = ra->genReg(insn.src(4));
+ GenRegister d = ra->genReg(insn.src(5));
+ GenRegister e = ra->genReg(insn.src(6));
+ GenRegister f = ra->genReg(insn.src(7));
+ a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD;
+ GenRegister zero = GenRegister::immud(0);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ collectShifter(a, y);
+ loadBottomHalf(e, x);
+ loadTopHalf(f, x);
+ p->SHR(b, e, GenRegister::negate(a));
+ p->SHL(c, e, a);
+ p->SHL(d, f, a);
+ p->OR(e, d, b);
+ p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->SEL(d, d, e);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->AND(a, a, GenRegister::immud(32));
+ p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->SEL(d, d, c);
+ p->SEL(c, c, zero);
+ p->pop();
+ storeBottomHalf(dest, c);
+ storeTopHalf(dest, d);
+ }
+
+ void GenContext::emitI64SHRInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister a = ra->genReg(insn.src(2));
+ GenRegister b = ra->genReg(insn.src(3));
+ GenRegister c = ra->genReg(insn.src(4));
+ GenRegister d = ra->genReg(insn.src(5));
+ GenRegister e = ra->genReg(insn.src(6));
+ GenRegister f = ra->genReg(insn.src(7));
+ a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD;
+ GenRegister zero = GenRegister::immud(0);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ collectShifter(a, y);
+ loadBottomHalf(e, x);
+ loadTopHalf(f, x);
+ p->SHL(b, f, GenRegister::negate(a));
+ p->SHR(c, f, a);
+ p->SHR(d, e, a);
+ p->OR(e, d, b);
+ p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->SEL(d, d, e);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->AND(a, a, GenRegister::immud(32));
+ p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->SEL(d, d, c);
+ p->SEL(c, c, zero);
+ p->pop();
+ storeBottomHalf(dest, d);
+ storeTopHalf(dest, c);
+ }
+
+ void GenContext::emitI64ASRInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister a = ra->genReg(insn.src(2));
+ GenRegister b = ra->genReg(insn.src(3));
+ GenRegister c = ra->genReg(insn.src(4));
+ GenRegister d = ra->genReg(insn.src(5));
+ GenRegister e = ra->genReg(insn.src(6));
+ GenRegister f = ra->genReg(insn.src(7));
+ a.type = b.type = c.type = d.type = e.type = GEN_TYPE_UD;
+ f.type = GEN_TYPE_D;
+ GenRegister zero = GenRegister::immud(0);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ collectShifter(a, y);
+ loadBottomHalf(e, x);
+ loadTopHalf(f, x);
+ p->SHL(b, f, GenRegister::negate(a));
+ p->ASR(c, f, a);
+ p->SHR(d, e, a);
+ p->OR(e, d, b);
+ p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->SEL(d, d, e);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->AND(a, a, GenRegister::immud(32));
+ p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+ p->CMP(GEN_CONDITIONAL_Z, a, zero);
+ p->SEL(d, d, c);
+ p->SEL(c, c, GenRegister::immd(-1));
+ p->pop();
+ storeBottomHalf(dest, d);
+ storeTopHalf(dest, c);
+ }
+
void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
int execWidth = p->curr.execWidth;
GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index dc5dc45..ca64cd3 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -75,6 +75,8 @@ namespace gbe
return this->liveness->getLiveOut(bb);
}
+ void collectShifter(GenRegister dest, GenRegister src);
+
void loadTopHalf(GenRegister dest, GenRegister src);
void storeTopHalf(GenRegister dest, GenRegister src);
@@ -89,6 +91,9 @@ namespace gbe
void emitUnaryInstruction(const SelectionInstruction &insn);
void emitBinaryInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
+ void emitI64SHLInstruction(const SelectionInstruction &insn);
+ void emitI64ASRInstruction(const SelectionInstruction &insn);
+ void emitI64SHRInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
void emitIndirectMoveInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 6f37c3d..30f05c2 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -3,6 +3,9 @@ DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64ASR, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64SHR, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64SHL, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
DECL_GEN7_SCHEDULE(IndirectMove, 20, 2, 2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 6cc012e..e3191f4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -453,6 +453,12 @@ namespace gbe
#undef ALU1
#undef ALU2
#undef ALU3
+ /*! Left-shifting a 64-bit integer */
+ void I64SHL(Reg dst, Reg src0, Reg src1, Reg a, Reg b, Reg c, Reg d, Reg e, Reg f);
+ /*! Right-shifting a 64-bit integer */
+ void I64SHR(Reg dst, Reg src0, Reg src1, Reg a, Reg b, Reg c, Reg d, Reg e, Reg f);
+ /*! Arithmetic right-shifting a 64-bit integer */
+ void I64ASR(Reg dst, Reg src0, Reg src1, Reg a, Reg b, Reg c, Reg d, Reg e, Reg f);
/*! Encode a barrier instruction */
void BARRIER(GenRegister src);
/*! Encode a barrier instruction */
@@ -947,6 +953,45 @@ namespace gbe
insn->src(2) = src2;
}
+ void Selection::Opaque::I64SHL(Reg dst, Reg src0, Reg src1, Reg a, Reg b, Reg c, Reg d, Reg e, Reg f) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SHL, 1, 8);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->src(2) = a;
+ insn->src(3) = b;
+ insn->src(4) = c;
+ insn->src(5) = d;
+ insn->src(6) = e;
+ insn->src(7) = f;
+ }
+
+ void Selection::Opaque::I64SHR(Reg dst, Reg src0, Reg src1, Reg a, Reg b, Reg c, Reg d, Reg e, Reg f) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SHR, 1, 8);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->src(2) = a;
+ insn->src(3) = b;
+ insn->src(4) = c;
+ insn->src(5) = d;
+ insn->src(6) = e;
+ insn->src(7) = f;
+ }
+
+ void Selection::Opaque::I64ASR(Reg dst, Reg src0, Reg src1, Reg a, Reg b, Reg c, Reg d, Reg e, Reg f) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64ASR, 1, 8);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->src(2) = a;
+ insn->src(3) = b;
+ insn->src(4) = c;
+ insn->src(5) = d;
+ insn->src(6) = e;
+ insn->src(7) = f;
+ }
+
// Boiler plate to initialize the selection library at c++ pre-main
static SelectionLibrary *selLib = NULL;
static void destroySelectionLibrary(void) { GBE_DELETE(selLib); }
@@ -1470,9 +1515,42 @@ namespace gbe
sel.ADD(dst, src0, GenRegister::negate(src1));
sel.pop();
break;
- case OP_SHL: sel.SHL(dst, src0, src1); break;
- case OP_SHR: sel.SHR(dst, src0, src1); break;
- case OP_ASR: sel.ASR(dst, src0, src1); break;
+ case OP_SHL:
+ if (type == TYPE_S64 || type == TYPE_U64) {
+ GenRegister a = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ b = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ c = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ d = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ e = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ f = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD));
+ sel.I64SHL(dst, src0, src1, a, b, c, d, e, f);
+ } else
+ sel.SHL(dst, src0, src1);
+ break;
+ case OP_SHR:
+ if (type == TYPE_S64 || type == TYPE_U64) {
+ GenRegister a = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ b = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ c = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ d = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ e = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ f = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD));
+ sel.I64SHR(dst, src0, src1, a, b, c, d, e, f);
+ } else
+ sel.SHR(dst, src0, src1);
+ break;
+ case OP_ASR:
+ if (type == TYPE_S64 || type == TYPE_U64) {
+ GenRegister a = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ b = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ c = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ d = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ e = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD)),
+ f = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD));
+ sel.I64ASR(dst, src0, src1, a, b, c, d, e, f);
+ } else
+ sel.ASR(dst, src0, src1);
+ break;
case OP_MUL_HI: {
GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_UD);
sel.MUL_HI(dst, src0, src1, temp);
@@ -2169,6 +2247,8 @@ namespace gbe
} else if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r));
+ } else if (dst.isint64()) {
+ sel.MOV_INT64(dst, src);
} else
sel.MOV(dst, src);
return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index d2e9db3..d57ea24 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -20,10 +20,13 @@ DECL_SELECTION_IR(I64AND, BinaryInstruction)
DECL_SELECTION_IR(I64OR, BinaryInstruction)
DECL_SELECTION_IR(I64XOR, BinaryInstruction)
DECL_SELECTION_IR(SHR, BinaryInstruction)
+DECL_SELECTION_IR(I64SHR, I64SHRInstruction)
DECL_SELECTION_IR(SHL, BinaryInstruction)
+DECL_SELECTION_IR(I64SHL, I64SHLInstruction)
DECL_SELECTION_IR(RSR, BinaryInstruction)
DECL_SELECTION_IR(RSL, BinaryInstruction)
DECL_SELECTION_IR(ASR, BinaryInstruction)
+DECL_SELECTION_IR(I64ASR, I64ASRInstruction)
DECL_SELECTION_IR(ADD, BinaryInstruction)
DECL_SELECTION_IR(I64ADD, TernaryInstruction)
DECL_SELECTION_IR(I64SUB, TernaryInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 2cad4c0..91268c0 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -279,6 +279,12 @@ namespace gbe
return false;
}
+ INLINE bool issi(void) const {
+ if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D) && file == GEN_GENERAL_REGISTER_FILE)
+ return true;
+ return false;
+ }
+
static INLINE GenRegister h2(GenRegister reg) {
GenRegister r = reg;
r.hstride = GEN_HORIZONTAL_STRIDE_2;
--
1.8.1.2
More information about the Beignet
mailing list