[Beignet] [PATCH 03/13] Backend: Add FDIV64 function for gen_insn_selection.
junyan.he at inbox.com
junyan.he at inbox.com
Fri Sep 18 02:58:14 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen_context.cpp | 4 ++
backend/src/backend/gen_context.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 52 +++++++++++++++++++++-
backend/src/backend/gen_insn_selection.hxx | 1 +
5 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 25fdf08..9e2fd03 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1679,6 +1679,10 @@ namespace gbe
}
}
+ void GenContext::emitF64DIVInstruction(const SelectionInstruction &insn) {
+ GBE_ASSERT(0); // No support for double on Gen7
+ }
+
void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 34f9293..57eb0a6 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -173,6 +173,7 @@ namespace gbe
void emitGetImageInfoInstruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
+ virtual void emitF64DIVInstruction(const SelectionInstruction &insn);
void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, unsigned desc);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d073770..9b60c17 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -43,3 +43,4 @@ DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1)
DECL_GEN7_SCHEDULE(I64MUL, 20, 40, 20)
DECL_GEN7_SCHEDULE(I64SATADD, 20, 40, 20)
DECL_GEN7_SCHEDULE(I64SATSUB, 20, 40, 20)
+DECL_GEN7_SCHEDULE(F64DIV, 20, 40, 20)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 596d828..457a629 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -361,8 +361,10 @@ namespace gbe
bool has32X32Mul() const { return bHas32X32Mul; }
void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
bool hasLongType() const { return bHasLongType; }
+ bool hasDoubleType() const { return bHasDoubleType; }
bool hasHalfType() const { return bHasHalfType; }
void setHasLongType(bool b) { bHasLongType = b; }
+ void setHasDoubleType(bool b) { bHasDoubleType = b; }
void setHasHalfType(bool b) { bHasHalfType = b; }
bool hasLongRegRestrict() { return bLongRegRestrict; }
void setLongRegRestrict(bool b) { bLongRegRestrict = b; }
@@ -669,6 +671,8 @@ namespace gbe
void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
/*! 64-bit integer remainder of division */
void I64REM(Reg dst, Reg src0, Reg src1, GenRegister *tmp, int tmp_int);
+ /*! double division */
+ void F64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmpNum);
/* common functions for both binary instruction and sel_cmp and compare instruction.
It will handle the IMM or normal register assignment, and will try to avoid LOADI
as much as possible. */
@@ -745,6 +749,7 @@ namespace gbe
uint32_t currAuxLabel;
bool bHas32X32Mul;
bool bHasLongType;
+ bool bHasDoubleType;
bool bHasHalfType;
bool bLongRegRestrict;
uint32_t ldMsgOrder;
@@ -788,7 +793,7 @@ namespace gbe
curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
- bHas32X32Mul(false), bHasLongType(false), bHasHalfType(false), bLongRegRestrict(false),
+ bHas32X32Mul(false), bHasLongType(false), bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false),
ldMsgOrder(LD_MSG_ORDER_IVB), slowByteGather(false)
{
const ir::Function &fn = ctx.getFunction();
@@ -1618,6 +1623,15 @@ namespace gbe
insn->dst(i + 1) = tmp[i];
}
+ void Selection::Opaque::F64DIV(Reg dst, Reg src0, Reg src1, GenRegister* tmp, int tmpNum) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_F64DIV, tmpNum + 1, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ for(int i = 0; i < tmpNum; i++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
insn->dst(0) = dst;
@@ -2076,6 +2090,7 @@ namespace gbe
Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
this->opaque->setHas32X32Mul(true);
this->opaque->setHasLongType(true);
+ this->opaque->setHasDoubleType(true);
this->opaque->setSlowByteGather(true);
this->opaque->setHasHalfType(true);
}
@@ -2083,6 +2098,7 @@ namespace gbe
SelectionChv::SelectionChv(GenContext &ctx) : Selection(ctx) {
this->opaque->setHas32X32Mul(true);
this->opaque->setHasLongType(true);
+ this->opaque->setHasDoubleType(true);
this->opaque->setLongRegRestrict(true);
this->opaque->setSlowByteGather(true);
this->opaque->setHasHalfType(true);
@@ -2091,6 +2107,7 @@ namespace gbe
Selection9::Selection9(GenContext &ctx) : Selection(ctx) {
this->opaque->setHas32X32Mul(true);
this->opaque->setHasLongType(true);
+ this->opaque->setHasDoubleType(true);
this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
this->opaque->setSlowByteGather(true);
this->opaque->setHasHalfType(true);
@@ -2580,6 +2597,39 @@ namespace gbe
else
sel.I64REM(dst, src0, src1, tmp, tmp_num);
sel.pop();
+ } else if (type == TYPE_DOUBLE) {
+ if (!sel.hasDoubleType())
+ GBE_ASSERT(0);
+
+ GenRegister tmp[10];
+ int tmpNum = 7;
+ ir::RegisterFamily fm;
+ if (sel.ctx.getSimdWidth() == 16) {
+ fm = FAMILY_WORD;
+ } else {
+ fm = FAMILY_DWORD;
+ }
+
+ /* madm and invm need special accumutor support, which require us in align16
+ mode. If any src is uniform, we need another tmp register and MOV the
+ uniform one to it. Because the madm and invm will work in align16 mode,
+ the channel mask is different from the align1 mode. So we can not directly
+ write the result to the dst and need a tmp register to hold the result and
+ MOV it to dst later. */
+ tmpNum++; //For the dst.
+ if (src0.hstride == GEN_HORIZONTAL_STRIDE_0) tmpNum++;
+ if (src1.hstride == GEN_HORIZONTAL_STRIDE_0) tmpNum++;
+
+ for (int i = 0; i < tmpNum; i++)
+ tmp[i] = GenRegister::df8grf(sel.reg(fm));
+
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.F64DIV(dst, src0, src1, tmp, tmpNum);
+ sel.pop();
+ } else {
+ GBE_ASSERT(0);
}
markAllChildren(dag);
return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index adbb137..479398b 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -90,3 +90,4 @@ DECL_SELECTION_IR(ENDIF, UnaryInstruction)
DECL_SELECTION_IR(ELSE, UnaryInstruction)
DECL_SELECTION_IR(READ_ARF, UnaryInstruction)
DECL_SELECTION_IR(WHILE, UnaryInstruction)
+DECL_SELECTION_IR(F64DIV, F64DIVInstruction)
--
1.9.1
More information about the Beignet
mailing list