[Beignet] [PATCH 22/27] Overload the I64MADSAT function.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Jan 6 02:02:39 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen8_context.cpp | 148 +++++++++++++++++++++++++++++
backend/src/backend/gen8_context.hpp | 2 +
backend/src/backend/gen_context.hpp | 2 +-
backend/src/backend/gen_insn_selection.cpp | 40 +++++---
4 files changed, 176 insertions(+), 16 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index a816780..8960d5b 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -218,6 +218,154 @@ namespace gbe
}
}
+ void Gen8Context::emitI64MADSATInstruction(const SelectionInstruction &insn)
+ {
+ GenRegister src0 = ra->genReg(insn.src(0));
+ GenRegister src1 = ra->genReg(insn.src(1));
+ GenRegister src2 = ra->genReg(insn.src(2));
+ GenRegister dst_l = ra->genReg(insn.dst(0));
+ GenRegister dst_h = ra->genReg(insn.dst(1));
+ GenRegister s0_abs = ra->genReg(insn.dst(2));
+ GenRegister s1_abs = ra->genReg(insn.dst(3));
+ GenRegister tmp0 = ra->genReg(insn.dst(4));
+ GenRegister tmp1 = ra->genReg(insn.dst(5));
+ GenRegister sign = ra->genReg(insn.dst(6));
+ GenRegister flagReg = GenRegister::flag(insn.state.flag, insn.state.subFlag);
+
+ if (src0.type == GEN_TYPE_UL) {
+ /* Always should be the same long type. */
+ GBE_ASSERT(src1.type == GEN_TYPE_UL);
+ GBE_ASSERT(src2.type == GEN_TYPE_UL);
+ dst_l.type = dst_h.type = GEN_TYPE_UL;
+ tmp0.type = tmp1.type = GEN_TYPE_UL;
+ calculateFullU64MUL(p, src0, src1, dst_h, dst_l, tmp0, tmp1);
+
+ /* Inplement the logic:
+ dst_l += src2;
+ if (dst_h)
+ dst_l = 0xFFFFFFFFFFFFFFFFULL;
+ if (dst_l < src2) // carry if overflow
+ dst_l = 0xFFFFFFFFFFFFFFFFULL;
+ */
+ p->ADD(dst_l, dst_l, src2);
+
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_NZ, dst_h, GenRegister::immud(0), tmp0);
+ p->curr.noMask = 0;
+ p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF));
+ p->pop();
+
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, dst_l, src2, tmp0);
+ p->curr.noMask = 0;
+ p->MOV(dst_l, GenRegister::immuint64(0xFFFFFFFFFFFFFFFF));
+ p->pop();
+ } else {
+ GBE_ASSERT(src0.type == GEN_TYPE_L);
+ GBE_ASSERT(src1.type == GEN_TYPE_L);
+ GBE_ASSERT(src2.type == GEN_TYPE_L);
+
+ calculateFullS64MUL(p, src0, src1, dst_h, dst_l, s0_abs, s1_abs, tmp0,
+ tmp1, sign, flagReg);
+
+ GenRegister sum = sign;
+ sum.type = GEN_TYPE_UL;
+ src2.type = GEN_TYPE_L;
+ dst_l.type = GEN_TYPE_UL;
+ p->NOP();
+ p->ADD(sum, src2, dst_l);
+
+ /* Implement this logic:
+ if(src2 >= 0) {
+ if(dst_l > sum) {
+ dst_h++;
+ if(CL_LONG_MIN == dst_h) {
+ dst_h = CL_LONG_MAX;
+ sum = CL_ULONG_MAX;
+ }
+ }
+ } */
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_GE, src2, GenRegister::immud(0), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_G, dst_l, sum, tmp1);
+ p->ADD(dst_h, dst_h, GenRegister::immud(1));
+ p->MOV(tmp0, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+ p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1);
+ p->MOV(dst_h, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+ p->MOV(sum, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFULL));
+ p->pop();
+ p->NOP();
+
+ /* Implement this logic:
+ else {
+ if(dst_l < sum) {
+ dst_h--;
+ if(CL_LONG_MAX == dst_h) {
+ dst_h = CL_LONG_MIN;
+ sum = 0;
+ }
+ }
+ } */
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, src2, GenRegister::immud(0), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_L, dst_l, sum, tmp1);
+ p->ADD(dst_h, dst_h, GenRegister::immd(-1));
+ p->MOV(tmp0, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+ p->CMP(GEN_CONDITIONAL_EQ, dst_h, tmp0, tmp1);
+ p->MOV(dst_h, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+ p->MOV(sum, GenRegister::immud(0));
+ p->pop();
+ p->NOP();
+
+ /* saturate logic:
+ if(dst_h > 0)
+ sum = CL_LONG_MAX;
+ else if(dst_h < -1)
+ sum = CL_LONG_MIN;
+ cl_long result = (cl_long) sum; */
+ p->MOV(dst_l, sum);
+
+ dst_h.type = GEN_TYPE_L;
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_G, dst_h, GenRegister::immud(0), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(dst_l, GenRegister::immint64(0x7FFFFFFFFFFFFFFFLL));
+ p->pop();
+ p->NOP();
+
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ p->CMP(GEN_CONDITIONAL_L, dst_h, GenRegister::immd(-1), tmp1);
+ p->curr.noMask = 0;
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(dst_l, GenRegister::immint64(-0x7FFFFFFFFFFFFFFFLL - 1LL));
+ p->pop();
+ p->NOP();
+ }
+ }
+
void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn)
{
GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 6dc8afc..9f1d749 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -55,6 +55,8 @@ namespace gbe
virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
+ virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
+
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 49d6017..a366e7f 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -130,7 +130,7 @@ namespace gbe
virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
virtual void emitI64MULHIInstruction(const SelectionInstruction &insn);
- void emitI64MADSATInstruction(const SelectionInstruction &insn);
+ virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
virtual void emitI64HADDInstruction(const SelectionInstruction &insn);
virtual void emitI64RHADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 9c7226d..21dfdb6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -527,7 +527,7 @@ namespace gbe
/*! Convert 64-bit integer to 32-bit float */
void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[2]);
/*! Saturated 64bit x*y + z */
- void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]);
+ void I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister* tmp, int tmp_num);
/*! High 64bit of x*y */
void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_num);
/*! (x+y)>>1 without mod. overflow */
@@ -1470,13 +1470,13 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
- void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister tmp[9]) {
- SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, 10, 3);
+ void Selection::Opaque::I64MADSAT(Reg dst, Reg src0, Reg src1, Reg src2, GenRegister *tmp, int tmp_num) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MADSAT, tmp_num + 1, 3);
insn->dst(0) = dst;
insn->src(0) = src0;
insn->src(1) = src1;
insn->src(2) = src2;
- for(int i = 0; i < 9; i ++)
+ for(int i = 0; i < tmp_num; i ++)
insn->dst(i + 1) = tmp[i];
}
@@ -4045,17 +4045,27 @@ namespace gbe
switch(insn.getOpcode()) {
case OP_I64MADSAT:
{
- GenRegister tmp[9];
- for(int i=0; i<9; i++) {
- tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
- tmp[i].type = GEN_TYPE_UD;
- }
- sel.push();
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.I64MADSAT(dst, src0, src1, src2, tmp);
- sel.pop();
- break;
+ GenRegister tmp[9];
+ int tmp_num;
+ if (!sel.hasLongType()) {
+ tmp_num = 9;
+ for(int i=0; i<9; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp[i].type = GEN_TYPE_UD;
+ }
+ } else {
+ tmp_num = 6;
+ for(int i=0; i<6; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ tmp[i].type = GEN_TYPE_UL;
+ }
+ }
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.I64MADSAT(dst, src0, src1, src2, tmp, tmp_num);
+ sel.pop();
+ break;
}
case OP_MAD:
{
--
1.9.1
More information about the Beignet
mailing list