[Beignet] [PATCH v2 1/2] Support 64-bit float
Homer Hsing
homer.xing at intel.com
Thu Jun 6 18:59:41 PDT 2013
support 64-bit arithmetic, store, load, and immediate value
example:
kernel void f(global double *src, global double *dst) {
int i = get_global_id(0);
double d = 1.234567890123456789;
dst[i] = d * (src[i] + d);
}
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 16 ++++-
backend/src/backend/gen_defs.hpp | 1 +
backend/src/backend/gen_encoder.cpp | 107 +++++++++++++++++++++++++++--
backend/src/backend/gen_encoder.hpp | 6 +-
backend/src/backend/gen_insn_selection.cpp | 46 +++++++++----
backend/src/backend/gen_insn_selection.hpp | 4 ++
backend/src/backend/gen_reg_allocation.cpp | 10 ++-
backend/src/backend/gen_register.hpp | 80 +++++++++++++++++++++
backend/src/ir/profile.cpp | 2 +
backend/src/ir/profile.hpp | 3 +-
10 files changed, 249 insertions(+), 26 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 055c8fc..e6080b2 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -138,7 +138,15 @@ namespace gbe
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src = ra->genReg(insn.src(0));
switch (insn.opcode) {
- case SEL_OP_MOV: p->MOV(dst, src); break;
+ case SEL_OP_MOV:
+ if (dst.isdf() && !src.isdf()) {
+ bool doubleio = true;
+ const GenRegister r = ra->genReg(GenRegister::f16grf(ir::ocl::doubleio));
+ p->MOV(dst, src, doubleio, r);
+ } else {
+ p->MOV(dst, src);
+ }
+ break;
case SEL_OP_NOT: p->NOT(dst, src); break;
case SEL_OP_RNDD: p->RNDD(dst, src); break;
case SEL_OP_RNDU: p->RNDU(dst, src); break;
@@ -263,14 +271,16 @@ namespace gbe
const GenRegister src = ra->genReg(insn.src(0));
const uint32_t bti = insn.extra.function;
const uint32_t elemNum = insn.extra.elem;
- p->UNTYPED_READ(dst, src, bti, elemNum);
+ const GenRegister r = ra->genReg(GenRegister::ud16grf(ir::ocl::doubleio));
+ p->UNTYPED_READ(dst, src, bti, elemNum, insn.doubleio, r);
}
void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
const GenRegister src = ra->genReg(insn.src(0));
const uint32_t bti = insn.extra.function;
const uint32_t elemNum = insn.extra.elem;
- p->UNTYPED_WRITE(src, bti, elemNum);
+ const GenRegister r = ra->genReg(GenRegister::ud16grf(ir::ocl::doubleio));
+ p->UNTYPED_WRITE(src, bti, elemNum, insn.doubleio, r);
}
void GenContext::emitByteGatherInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index c7a1581..63f98f5 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -215,6 +215,7 @@ enum GenMessageTarget {
#define GEN_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN_TYPE_HF 6
#define GEN_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN_TYPE_DF 6
#define GEN_TYPE_F 7
#define GEN_ARF_NULL 0x00
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index b65cc94..db08e90 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -355,7 +355,29 @@ namespace gbe
0
};
- void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
+ void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum, bool doubleio, GenRegister r) {
+ if (doubleio) {
+ GenRegister hdr = GenRegister::h2(r);
+ GenRegister imm4 = GenRegister::immud(4);
+ push();
+ curr.execWidth = 8;
+ MOV(hdr, GenRegister::ud8grf(src.nr, 0));
+ ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+ MOV(GenRegister::offset(hdr, 1), GenRegister::ud8grf(src.nr, 4));
+ ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+ pop();
+ UNTYPED_READ(dst, hdr, bti, elemNum, false, r);
+
+ push();
+ curr.execWidth = 8;
+ MOV(hdr, GenRegister::ud8grf(src.nr + 1, 0));
+ ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+ MOV(GenRegister::offset(hdr, 1), GenRegister::ud8grf(src.nr + 1, 4));
+ ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+ pop();
+ UNTYPED_READ(GenRegister::offset(dst, 2), hdr, bti, elemNum, false, r);
+ return;
+ }
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
uint32_t msg_length = 0;
@@ -382,7 +404,32 @@ namespace gbe
response_length);
}
- void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
+ void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum, bool doubleio, GenRegister r) {
+ if (doubleio) {
+ GenRegister hdr = GenRegister::h2(r);
+ GenRegister data = GenRegister::offset(r, 2);
+ GenRegister imm4 = GenRegister::immud(4);
+ push();
+ curr.execWidth = 8;
+ MOV(hdr, GenRegister::ud8grf(msg.nr, 0));
+ ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+ MOV(GenRegister::offset(hdr, 1), GenRegister::ud8grf(msg.nr, 4));
+ ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+ pop();
+ MOV(data, GenRegister::ud16grf(msg.nr+2, 0));
+ UNTYPED_WRITE(hdr, bti, elemNum, false, r);
+
+ push();
+ curr.execWidth = 8;
+ MOV(hdr, GenRegister::ud8grf(msg.nr+1, 0));
+ ADD(GenRegister::offset(hdr, 0, 4), hdr, imm4);
+ MOV(GenRegister::offset(hdr, 1), GenRegister::ud8grf(msg.nr+1, 4));
+ ADD(GenRegister::offset(hdr, 1, 4), GenRegister::offset(hdr, 1), imm4);
+ pop();
+ MOV(data, GenRegister::ud16grf(msg.nr+4, 0));
+ UNTYPED_WRITE(hdr, bti, elemNum, false, r);
+ return;
+ }
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
uint32_t msg_length = 0;
@@ -467,7 +514,17 @@ namespace gbe
}
INLINE void alu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src) {
- if (needToSplitAlu1(p, dst, src) == false) {
+ if (opcode != GEN_OPCODE_MOV && dst.isdf() && src.isdf()) {
+ GenInstruction *insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, dst);
+ p->setSrc0(insn, src);
+ int w = p->curr.execWidth / 4;
+ insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, GenRegister::QnPhysical(dst, w));
+ p->setSrc0(insn, GenRegister::QnPhysical(src, w));
+ } else if (needToSplitAlu1(p, dst, src) == false) {
GenInstruction *insn = p->next(opcode);
p->setHeader(insn);
p->setDst(insn, dst);
@@ -499,7 +556,19 @@ namespace gbe
GenRegister src0,
GenRegister src1)
{
- if (needToSplitAlu2(p, dst, src0, src1) == false) {
+ if (dst.isdf() && src0.isdf() && src1.isdf()) {
+ GenInstruction *insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, dst);
+ p->setSrc0(insn, src0);
+ p->setSrc1(insn, src1);
+ int w = p->curr.execWidth / 4;
+ insn = p->next(opcode);
+ p->setHeader(insn);
+ p->setDst(insn, GenRegister::QnPhysical(dst, w));
+ p->setSrc0(insn, GenRegister::QnPhysical(src0, w));
+ p->setSrc1(insn, GenRegister::QnPhysical(src1, w));
+ } else if (needToSplitAlu2(p, dst, src0, src1) == false) {
GenInstruction *insn = p->next(opcode);
p->setHeader(insn);
p->setDst(insn, dst);
@@ -620,7 +689,35 @@ namespace gbe
alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
}
- ALU1(MOV)
+ void GenEncoder::MOV(GenRegister dest, GenRegister src0, bool doubleio, GenRegister r) {
+ if (doubleio) {
+ GenRegister r0 = GenRegister::h2(r);
+ push();
+ curr.execWidth = 8;
+ if(src0.isimmdf()) {
+ union { double d; unsigned u[2]; } u;
+ u.d = src0.value.df;
+ r0 = GenRegister::retype(r0, GEN_TYPE_UD);
+ GenRegister imm0 = GenRegister::immud(u.u[0]);
+ GenRegister imm1 = GenRegister::immud(u.u[1]);
+ MOV(r0, imm0);
+ MOV(GenRegister::suboffset(r0, 1), imm1);
+ MOV(GenRegister::offset(r0, 1), imm0);
+ MOV(GenRegister::suboffset(GenRegister::offset(r0, 1), 1), imm1);
+ } else {
+ MOV(r0, src0);
+ MOV(GenRegister::offset(r0, 1), GenRegister::offset(src0, 0, 16));
+ }
+ pop();
+ if(src0.isimmdf())
+ r = GenRegister::retype(r, GEN_TYPE_DF);
+ MOV(dest, r);
+ MOV(GenRegister::offset(dest, 2), r);
+ } else {
+ alu1(this, GEN_OPCODE_MOV, dest, src0);
+ }
+ }
+
ALU1(RNDZ)
ALU1(RNDE)
ALU1(RNDD)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 83d83d2..3dc55b6 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -89,7 +89,7 @@ namespace gbe
#define ALU1(OP) void OP(GenRegister dest, GenRegister src0);
#define ALU2(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1);
#define ALU3(OP) void OP(GenRegister dest, GenRegister src0, GenRegister src1, GenRegister src2);
- ALU1(MOV)
+ void MOV(GenRegister dest, GenRegister src0, bool doubleio = false, GenRegister r = GenRegister());
ALU1(RNDZ)
ALU1(RNDE)
ALU1(RNDD)
@@ -131,9 +131,9 @@ namespace gbe
/*! Wait instruction (used for the barrier) */
void WAIT(void);
/*! Untyped read (upto 4 channels) */
- void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
+ void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum, bool doubleio, GenRegister r);
/*! Untyped write (upto 4 channels) */
- void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
+ void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum, bool doubleio, GenRegister r);
/*! Byte gather (for unaligned bytes, shorts and ints) */
void BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 88f9e94..6a845cf 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -129,6 +129,7 @@ namespace gbe
case TYPE_S32: return GEN_TYPE_D;
case TYPE_U32: return GEN_TYPE_UD;
case TYPE_FLOAT: return GEN_TYPE_F;
+ case TYPE_DOUBLE: return GEN_TYPE_DF;
default: NOT_SUPPORTED; return GEN_TYPE_F;
}
}
@@ -151,7 +152,7 @@ namespace gbe
///////////////////////////////////////////////////////////////////////////
SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t dst, uint32_t src) :
- parent(NULL), opcode(op), dstNum(dst), srcNum(src)
+ parent(NULL), opcode(op), dstNum(dst), srcNum(src), doubleio(false)
{}
void SelectionInstruction::prepend(SelectionInstruction &other) {
@@ -448,9 +449,9 @@ namespace gbe
/*! Wait instruction (used for the barrier) */
void WAIT(void);
/*! Untyped read (up to 4 elements) */
- void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+ void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti, bool doubleio);
/*! Untyped write (up to 4 elements) */
- void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti);
+ void UNTYPED_WRITE(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t bti, bool doubleio);
/*! Byte gather (for unaligned bytes, shorts and ints) */
void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
@@ -655,6 +656,7 @@ namespace gbe
case FAMILY_WORD: SEL_REG(uw16grf, uw8grf, uw1grf); break;
case FAMILY_BYTE: SEL_REG(ub16grf, ub8grf, ub1grf); break;
case FAMILY_DWORD: SEL_REG(f16grf, f8grf, f1grf); break;
+ case FAMILY_QWORD: SEL_REG(df16grf, df8grf, df1grf); break;
default: NOT_SUPPORTED;
}
GBE_ASSERT(false);
@@ -715,7 +717,8 @@ namespace gbe
void Selection::Opaque::UNTYPED_READ(Reg addr,
const GenRegister *dst,
uint32_t elemNum,
- uint32_t bti)
+ uint32_t bti,
+ bool doubleio)
{
SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_READ, elemNum, 1);
SelectionVector *srcVector = this->appendVector();
@@ -727,11 +730,13 @@ namespace gbe
insn->src(0) = addr;
insn->extra.function = bti;
insn->extra.elem = elemNum;
+ insn->doubleio = doubleio;
// Sends require contiguous allocation
dstVector->regNum = elemNum;
dstVector->isSrc = 0;
dstVector->reg = &insn->dst(0);
+ dstVector->doubleio = doubleio;
// Source cannot be scalar (yet)
srcVector->regNum = 1;
@@ -742,7 +747,8 @@ namespace gbe
void Selection::Opaque::UNTYPED_WRITE(Reg addr,
const GenRegister *src,
uint32_t elemNum,
- uint32_t bti)
+ uint32_t bti,
+ bool doubleio)
{
SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, 0, elemNum+1);
SelectionVector *vector = this->appendVector();
@@ -753,11 +759,13 @@ namespace gbe
insn->src(elemID+1) = src[elemID];
insn->extra.function = bti;
insn->extra.elem = elemNum;
+ insn->doubleio = doubleio;
// Sends require contiguous allocation for the sources
vector->regNum = elemNum+1;
vector->reg = &insn->src(0);
vector->isSrc = 1;
+ vector->doubleio = doubleio;
}
void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) {
@@ -1085,6 +1093,15 @@ namespace gbe
// Implementation of all patterns
///////////////////////////////////////////////////////////////////////////
+ bool canGetRegisterFromImmediate(const ir::Instruction &insn) {
+ using namespace ir;
+ const auto &childInsn = cast<LoadImmInstruction>(insn);
+ const auto &imm = childInsn.getImmediate();
+ if(imm.type != TYPE_DOUBLE)
+ return true;
+ return false;
+ }
+
GenRegister getRegisterFromImmediate(ir::Immediate imm)
{
using namespace ir;
@@ -1096,6 +1113,7 @@ namespace gbe
case TYPE_S16: return GenRegister::immw(imm.data.s16);
case TYPE_U8: return GenRegister::immuw(imm.data.u8);
case TYPE_S8: return GenRegister::immw(imm.data.s8);
+ case TYPE_DOUBLE: return GenRegister::immdf(imm.data.f64);
default: NOT_SUPPORTED; return GenRegister::immuw(0);
}
}
@@ -1218,14 +1236,14 @@ namespace gbe
SelectionDAG *dag1 = dag.child[1];
// Right source can always be an immediate
- if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
src0 = sel.selReg(insn.getSrc(0), type);
src1 = getRegisterFromImmediate(childInsn.getImmediate());
if (dag0) dag0->isRoot = 1;
}
// Left source cannot be immediate but it is OK if we can commute
- else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI) {
+ else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && insn.commutes() && dag0->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag0->insn)) {
const auto &childInsn = cast<LoadImmInstruction>(dag0->insn);
src0 = sel.selReg(insn.getSrc(1), type);
src1 = getRegisterFromImmediate(childInsn.getImmediate());
@@ -1261,7 +1279,7 @@ namespace gbe
case OP_SHR: sel.SHR(dst, src0, src1); break;
case OP_ASR: sel.ASR(dst, src0, src1); break;
case OP_MUL:
- if (type == TYPE_FLOAT)
+ if (type == TYPE_FLOAT || type == TYPE_DOUBLE)
sel.MUL(dst, src0, src1);
else if (type == TYPE_U32 || type == TYPE_S32) {
sel.pop();
@@ -1592,6 +1610,7 @@ namespace gbe
case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.data.s16)); break;
case TYPE_U8: sel.MOV(dst, GenRegister::immuw(imm.data.u8)); break;
case TYPE_S8: sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
+ case TYPE_DOUBLE: sel.MOV(dst, GenRegister::immdf(imm.data.f64)); break;
default: NOT_SUPPORTED;
}
sel.pop();
@@ -1639,6 +1658,7 @@ namespace gbe
INLINE uint32_t getByteScatterGatherSize(ir::Type type) {
using namespace ir;
switch (type) {
+ case TYPE_DOUBLE:
case TYPE_FLOAT:
case TYPE_U32:
case TYPE_S32:
@@ -1665,9 +1685,10 @@ namespace gbe
using namespace ir;
const uint32_t valueNum = insn.getValueNum();
vector<GenRegister> dst(valueNum);
+ bool doubleio = insn.getValueType() == TYPE_DOUBLE ? true : false;
for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F);
- sel.UNTYPED_READ(addr, dst.data(), valueNum, bti);
+ sel.UNTYPED_READ(addr, dst.data(), valueNum, bti, doubleio);
}
void emitByteGather(Selection::Opaque &sel,
@@ -1744,11 +1765,12 @@ namespace gbe
const uint32_t addrID = ir::StoreInstruction::addressIndex;
GenRegister addr;
vector<GenRegister> value(valueNum);
+ bool doubleio = insn.getValueType() == TYPE_DOUBLE ? true : false;
addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);;
for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_F);
- sel.UNTYPED_WRITE(addr, value.data(), valueNum, bti);
+ sel.UNTYPED_WRITE(addr, value.data(), valueNum, bti, doubleio);
}
void emitByteScatter(Selection::Opaque &sel,
@@ -1828,7 +1850,7 @@ namespace gbe
SelectionDAG *dag1 = dag.child[1];
// Right source can always be an immediate
- if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI && canGetRegisterFromImmediate(dag1->insn)) {
const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
src0 = sel.selReg(insn.getSrc(0), type);
src1 = getRegisterFromImmediate(childInsn.getImmediate());
@@ -1862,7 +1884,7 @@ namespace gbe
const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
// We need two instructions to make the conversion
- if (dstFamily != FAMILY_DWORD && srcFamily == FAMILY_DWORD) {
+ if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {
GenRegister unpacked;
if (dstFamily == FAMILY_WORD) {
const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 778eb1f..5f7289d 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -116,6 +116,8 @@ namespace gbe
uint8_t srcNum:5;
/*! To store various indices */
uint16_t index;
+ /*! Double size I/O ? */
+ bool doubleio;
/*! Variable sized. Destinations and sources go here */
GenRegister regs[0];
private:
@@ -138,6 +140,8 @@ namespace gbe
uint16_t regNum;
/*! Indicate if this a destination or a source vector */
uint16_t isSrc;
+ /*! "double IO" requires two more register */
+ bool doubleio;
};
// Owns the selection block
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 469be12..6b63c41 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -454,7 +454,6 @@ namespace gbe
}
bool GenRegAllocator::Opaque::allocateGRFs(Selection &selection) {
-
// Perform the linear scan allocator
const uint32_t regNum = ctx.sel->getRegNum();
for (uint32_t startID = 0; startID < regNum; ++startID) {
@@ -472,7 +471,9 @@ namespace gbe
const SelectionVector *vector = it->second.first;
const uint32_t simdWidth = ctx.getSimdWidth();
const uint32_t alignment = simdWidth * sizeof(uint32_t);
- const uint32_t size = vector->regNum * alignment;
+ uint32_t size = vector->regNum * alignment;
+ if (vector->doubleio)
+ size += alignment;
uint32_t grfOffset;
while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
const bool success = this->expireGRF(interval);
@@ -667,6 +668,11 @@ namespace gbe
// First we try to put all booleans registers into flags
this->allocateFlags(selection);
+ int w = ctx.getSimdWidth();
+ int offst = ctx.allocate(w * sizeof(int) * 2, w * sizeof(int));
+ GBE_ASSERT(offst != 0);
+ RA.insert(std::make_pair(ocl::doubleio, offst));
+
// Allocate all the GRFs now (regular register and boolean that are not in
// flag registers)
return this->allocateGRFs(selection);
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index d772b0d..5870b07 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -192,6 +192,7 @@ namespace gbe
/*! For immediates or virtual register */
union {
+ double df;
float f;
int32_t d;
uint32_t ud;
@@ -211,6 +212,31 @@ namespace gbe
uint32_t quarter:1; //!< To choose which part we want (Q1 / Q2)
uint32_t address_mode:1; //!< direct or indirect
+ static INLINE GenRegister offset(GenRegister reg, int nr, int subnr = 0) {
+ GenRegister r = reg;
+ r.nr += nr;
+ r.subnr += subnr;
+ return r;
+ }
+
+ INLINE bool isimmdf(void) const {
+ if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
+ return true;
+ return false;
+ }
+
+ INLINE bool isdf(void) const {
+ if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
+ return true;
+ return false;
+ }
+
+ static INLINE GenRegister h2(GenRegister reg) {
+ GenRegister r = reg;
+ r.hstride = GEN_HORIZONTAL_STRIDE_2;
+ return r;
+ }
+
static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
GBE_ASSERT(reg.physical == 0);
if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
@@ -293,6 +319,18 @@ namespace gbe
return reg;
}
+ static INLINE GenRegister df16(uint32_t file, ir::Register reg) {
+ return retype(vec16(file, reg), GEN_TYPE_DF);
+ }
+
+ static INLINE GenRegister df8(uint32_t file, ir::Register reg) {
+ return retype(vec8(file, reg), GEN_TYPE_DF);
+ }
+
+ static INLINE GenRegister df1(uint32_t file, ir::Register reg) {
+ return retype(vec1(file, reg), GEN_TYPE_DF);
+ }
+
static INLINE GenRegister ud16(uint32_t file, ir::Register reg) {
return retype(vec16(file, reg), GEN_TYPE_UD);
}
@@ -371,6 +409,12 @@ namespace gbe
GEN_HORIZONTAL_STRIDE_0);
}
+ static INLINE GenRegister immdf(double df) {
+ GenRegister immediate = imm(GEN_TYPE_DF);
+ immediate.value.df = df;
+ return immediate;
+ }
+
static INLINE GenRegister immf(float f) {
GenRegister immediate = imm(GEN_TYPE_F);
immediate.value.f = f;
@@ -448,6 +492,18 @@ namespace gbe
return vec16(GEN_GENERAL_REGISTER_FILE, reg);
}
+ static INLINE GenRegister df1grf(ir::Register reg) {
+ return df1(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister df8grf(ir::Register reg) {
+ return df8(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
+ static INLINE GenRegister df16grf(ir::Register reg) {
+ return df16(GEN_GENERAL_REGISTER_FILE, reg);
+ }
+
static INLINE GenRegister ud16grf(ir::Register reg) {
return ud16(GEN_GENERAL_REGISTER_FILE, reg);
}
@@ -613,6 +669,18 @@ namespace gbe
return reg;
}
+ static INLINE GenRegister df16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec16(file, nr, subnr), GEN_TYPE_DF);
+ }
+
+ static INLINE GenRegister df8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec8(file, nr, subnr), GEN_TYPE_DF);
+ }
+
+ static INLINE GenRegister df1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec1(file, nr, subnr), GEN_TYPE_DF);
+ }
+
static INLINE GenRegister ud16(uint32_t file, uint32_t nr, uint32_t subnr) {
return retype(vec16(file, nr, subnr), GEN_TYPE_UD);
}
@@ -685,6 +753,18 @@ namespace gbe
return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
}
+ static INLINE GenRegister df16grf(uint32_t nr, uint32_t subnr) {
+ return df16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister df8grf(uint32_t nr, uint32_t subnr) {
+ return df8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenRegister df1grf(uint32_t nr, uint32_t subnr) {
+ return df1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
static INLINE GenRegister ud16grf(uint32_t nr, uint32_t subnr) {
return ud16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
}
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 99cd06c..9a17f1d 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -41,6 +41,7 @@ namespace ir {
"block_ip",
"barrier_id", "thread_number",
"const_curbe_offset",
+ "double_io",
};
#if GBE_DEBUG
@@ -77,6 +78,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, threadn);
DECL_NEW_REG(FAMILY_DWORD, constoffst);
DECL_NEW_REG(FAMILY_DWORD, workdim);
+ DECL_NEW_REG(FAMILY_DWORD, doubleio);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 4b0ef5e..029b47c 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -65,7 +65,8 @@ namespace ir {
static const Register threadn = Register(21); // number of threads
static const Register constoffst = Register(22); // offset of global constant array's curbe
static const Register workdim = Register(23); // work dimention.
- static const uint32_t regNum = 24; // number of special registers
+ static const Register doubleio = Register(24); // work dimention.
+ static const uint32_t regNum = 25; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
--
1.8.1.2
More information about the Beignet
mailing list