[Beignet] [ocl2.0 3/4] GBE: Add 64bit data stateless messages
Ruiling Song
ruiling.song at intel.com
Thu Oct 29 00:19:17 PDT 2015
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen8_context.cpp | 34 +++++++++++
backend/src/backend/gen8_context.hpp | 2 +
backend/src/backend/gen_context.cpp | 6 ++
backend/src/backend/gen_context.hpp | 2 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
backend/src/backend/gen_insn_selection.cpp | 71 ++++++++++++++++++++++
backend/src/backend/gen_insn_selection.hxx | 2 +
7 files changed, 119 insertions(+)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 898907a..3ede9c8 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -995,6 +995,40 @@ namespace gbe
afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
+ void Gen8Context::emitRead64A64Instruction(const SelectionInstruction &insn) {
+ const uint32_t elemNum = insn.extra.elem;
+ GBE_ASSERT(elemNum == 1);
+
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src = ra->genReg(insn.src(0));
+
+ /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+ which we can not accept. We just fallback to 2 DW untyperead here. */
+ p->UNTYPED_READA64(dst, src, 2*elemNum);
+
+ for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+ GenRegister long_tmp = ra->genReg(insn.dst(elemID));
+ GenRegister the_long = ra->genReg(insn.dst(elemID + elemNum));
+ this->packLongVec(long_tmp, the_long, p->curr.execWidth);
+ }
+ }
+
+ void Gen8Context::emitWrite64A64Instruction(const SelectionInstruction &insn)
+ {
+ const uint32_t elemNum = insn.extra.elem;
+ GBE_ASSERT(elemNum == 1);
+ const GenRegister addr = ra->genReg(insn.src(elemNum));
+
+ /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+ which we can not accept. We just fallback to 2 DW untypewrite here. */
+ for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+ GenRegister the_long = ra->genReg(insn.src(elemID));
+ GenRegister long_tmp = ra->genReg(insn.src(elemNum + 1 + elemID));
+ this->unpackLongVec(the_long, long_tmp, p->curr.execWidth);
+ }
+
+ p->UNTYPED_WRITEA64(addr, elemNum*2);
+ }
void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) {
const GenRegister src = ra->genReg(insn.src(0));
const GenRegister dst = ra->genReg(insn.dst(0));
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index c076080..c1ede13 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -72,6 +72,8 @@ namespace gbe
virtual void emitByteScatterA64Instruction(const SelectionInstruction &insn);
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
+ virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
+ virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 73f5c12..42bd198 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2223,6 +2223,12 @@ namespace gbe
void GenContext::emitByteScatterA64Instruction(const SelectionInstruction &insn) {
assert(0);
}
+ void GenContext::emitRead64A64Instruction(const SelectionInstruction &insn) {
+ assert(0);
+ }
+ void GenContext::emitWrite64A64Instruction(const SelectionInstruction &insn) {
+ assert(0);
+ }
void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index b2f4a85..1e0d959 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -156,6 +156,8 @@ namespace gbe
void emitMathInstruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
+ virtual void emitRead64A64Instruction(const SelectionInstruction &insn);
+ virtual void emitWrite64A64Instruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 87dce49..63e2fd3 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -26,6 +26,8 @@ DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1)
DECL_GEN7_SCHEDULE(Fence, 80, 1, 1)
DECL_GEN7_SCHEDULE(Read64, 80, 1, 1)
DECL_GEN7_SCHEDULE(Write64, 80, 1, 1)
+DECL_GEN7_SCHEDULE(Read64A64, 80, 1, 1)
+DECL_GEN7_SCHEDULE(Write64A64, 80, 1, 1)
DECL_GEN7_SCHEDULE(UntypedRead, 160, 1, 1)
DECL_GEN7_SCHEDULE(UntypedWrite, 160, 1, 1)
DECL_GEN7_SCHEDULE(UntypedReadA64, 160, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ef5174e..b160db9 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -185,6 +185,7 @@ namespace gbe
return this->opcode == SEL_OP_UNTYPED_READ ||
this->opcode == SEL_OP_UNTYPED_READA64 ||
this->opcode == SEL_OP_READ64 ||
+ this->opcode == SEL_OP_READ64A64 ||
this->opcode == SEL_OP_ATOMIC ||
this->opcode == SEL_OP_BYTE_GATHER ||
this->opcode == SEL_OP_BYTE_GATHERA64 ||
@@ -210,6 +211,7 @@ namespace gbe
return this->opcode == SEL_OP_UNTYPED_WRITE ||
this->opcode == SEL_OP_UNTYPED_WRITEA64 ||
this->opcode == SEL_OP_WRITE64 ||
+ this->opcode == SEL_OP_WRITE64A64 ||
this->opcode == SEL_OP_ATOMIC ||
this->opcode == SEL_OP_BYTE_SCATTER ||
this->opcode == SEL_OP_BYTE_SCATTERA64 ||
@@ -629,6 +631,10 @@ namespace gbe
void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> temps);
/*! Write 64 bits float/int array */
void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, GenRegister bti, bool native_long, vector<GenRegister> temps);
+ /*! Read64 A64 */
+ void READ64A64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum);
+ /*! write64 a64 */
+ void WRITE64A64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum);
/*! Untyped read (up to 4 elements) */
void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, GenRegister bti, vector<GenRegister> temps);
/*! Untyped write (up to 4 elements) */
@@ -1363,6 +1369,39 @@ namespace gbe
srcVector->reg = &insn->src(0);
}
+ void Selection::Opaque::READ64A64(Reg addr,
+ const GenRegister *dst,
+ const GenRegister *tmp,
+ uint32_t elemNum)
+ {
+ SelectionInstruction *insn = NULL;
+ SelectionVector *srcVector = NULL;
+ SelectionVector *dstVector = NULL;
+ insn = this->appendInsn(SEL_OP_READ64A64,elemNum*2, 1);
+ srcVector = this->appendVector();
+ dstVector = this->appendVector();
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID) = tmp[elemID];
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID + elemNum) = dst[elemID];
+
+ insn->src(0) = addr;
+
+ insn->extra.elem = elemNum;
+
+ dstVector->regNum = elemNum;
+ dstVector->isSrc = 0;
+ dstVector->offsetID = 0;
+ dstVector->reg = &insn->dst(0);
+
+ srcVector->regNum = 1;
+ srcVector->offsetID = 0;
+ srcVector->isSrc = 1;
+ srcVector->reg = &insn->src(0);
+ }
+
void Selection::Opaque::UNTYPED_READ(Reg addr,
const GenRegister *dst,
uint32_t elemNum,
@@ -1504,6 +1543,38 @@ namespace gbe
}
}
+ void Selection::Opaque::WRITE64A64(Reg addr,
+ const GenRegister *src,
+ const GenRegister *tmp,
+ uint32_t srcNum)
+ {
+ SelectionVector *vector = NULL;
+ SelectionInstruction *insn = NULL;
+
+ const uint32_t dstNum = srcNum;
+ insn = this->appendInsn(SEL_OP_WRITE64A64, dstNum, srcNum*2 + 1);
+ vector = this->appendVector();
+
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(elemID) = src[elemID];
+
+ insn->src(srcNum) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(srcNum + 1 + elemID) = tmp[elemID];
+
+ /* We also need to add the tmp reigster to dst, in order
+ to avoid the post schedule error . */
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->dst(elemID) = tmp[elemID];
+
+ insn->extra.elem = srcNum;
+
+ vector->regNum = srcNum + 1;
+ vector->offsetID = srcNum;
+ vector->reg = &insn->src(srcNum);
+ vector->isSrc = 1;
+ }
+
void Selection::Opaque::UNTYPED_WRITE(Reg addr,
const GenRegister *src,
uint32_t elemNum,
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 0ba9fd6..426362a 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -59,6 +59,8 @@ DECL_SELECTION_IR(UNTYPED_READA64, UntypedReadA64Instruction)
DECL_SELECTION_IR(UNTYPED_WRITEA64, UntypedWriteA64Instruction)
DECL_SELECTION_IR(READ64, Read64Instruction)
DECL_SELECTION_IR(WRITE64, Write64Instruction)
+DECL_SELECTION_IR(READ64A64, Read64A64Instruction)
+DECL_SELECTION_IR(WRITE64A64, Write64A64Instruction)
DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
DECL_SELECTION_IR(BYTE_GATHERA64, ByteGatherA64Instruction)
--
2.3.1
More information about the Beignet
mailing list