[Beignet] [PATCH 12/27] Overload the READ64 and WRITE64 function for Gen8
junyan.he at inbox.com
junyan.he at inbox.com
Tue Jan 6 02:01:27 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
We still use the old manner to read/write the long
type data as 2 elements of DW data. After/Before
read/write we will use the pack/unpacked function
to convert it.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen8_context.cpp | 38 +++++++++++
backend/src/backend/gen8_context.hpp | 2 +
backend/src/backend/gen_context.hpp | 4 +-
backend/src/backend/gen_insn_selection.cpp | 105 ++++++++++++++++++++++-------
4 files changed, 124 insertions(+), 25 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 85896df..276b8c5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -109,4 +109,42 @@ namespace gbe
p->pop();
}
+ void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn)
+ {
+ const uint32_t bti = insn.getbti();
+ const uint32_t elemNum = insn.extra.elem;
+ GBE_ASSERT(elemNum == 1);
+
+ const GenRegister addr = ra->genReg(insn.src(0));
+ const GenRegister tmp_dst = ra->genReg(insn.dst(0));
+
+ /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+ which we can not accept. We just fallback to 2 DW untyperead here. */
+ p->UNTYPED_READ(tmp_dst, addr, bti, elemNum*2);
+
+ for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+ GenRegister long_tmp = ra->genReg(insn.dst(elemID));
+ GenRegister the_long = ra->genReg(insn.dst(elemID + elemNum));
+ this->packLongVec(long_tmp, the_long, p->curr.execWidth);
+ }
+ }
+
+ void Gen8Context::emitWrite64Instruction(const SelectionInstruction &insn)
+ {
+ const uint32_t bti = insn.getbti();
+ const uint32_t elemNum = insn.extra.elem;
+ GBE_ASSERT(elemNum == 1);
+
+ const GenRegister addr = ra->genReg(insn.src(elemNum));
+
+ /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+ which we can not accept. We just fallback to 2 DW untypewrite here. */
+ for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+ GenRegister the_long = ra->genReg(insn.src(elemID));
+ GenRegister long_tmp = ra->genReg(insn.src(elemNum + 1 + elemID));
+ this->unpackLongVec(the_long, long_tmp, p->curr.execWidth);
+ }
+
+ p->UNTYPED_WRITE(addr, bti, elemNum*2);
+ }
}
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 1b9125b..d9e547b 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -48,6 +48,8 @@ namespace gbe
/*! Get the pointer argument size for curbe alloc */
virtual uint32_t getPointerSize(void) { return 8; }
+ virtual void emitWrite64Instruction(const SelectionInstruction &insn);
+ virtual void emitRead64Instruction(const SelectionInstruction &insn);
protected:
virtual GenEncoder* generateEncoder(void) {
return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 45347b9..317257b 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -148,8 +148,8 @@ namespace gbe
void emitBarrierInstruction(const SelectionInstruction &insn);
void emitFenceInstruction(const SelectionInstruction &insn);
void emitMathInstruction(const SelectionInstruction &insn);
- void emitRead64Instruction(const SelectionInstruction &insn);
- void emitWrite64Instruction(const SelectionInstruction &insn);
+ virtual void emitRead64Instruction(const SelectionInstruction &insn);
+ virtual void emitWrite64Instruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
void emitAtomicInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bf3613d..f78b049 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -569,9 +569,9 @@ namespace gbe
/*! Atomic instruction */
void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti);
/*! Read 64 bits float/int array */
- void READ64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+ void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, uint32_t bti, bool native_long);
/*! Write 64 bits float/int array */
- void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, uint32_t bti);
+ void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, uint32_t bti, bool native_long);
/*! Untyped read (up to 4 elements) */
void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
/*! Untyped write (up to 4 elements) */
@@ -1127,16 +1127,29 @@ namespace gbe
void Selection::Opaque::READ64(Reg addr,
const GenRegister *dst,
+ const GenRegister *tmp,
uint32_t elemNum,
- uint32_t bti)
+ uint32_t bti,
+ bool native_long)
{
- SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
+ SelectionInstruction *insn = NULL;
SelectionVector *srcVector = this->appendVector();
SelectionVector *dstVector = this->appendVector();
- // Regular instruction to encode
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
- insn->dst(elemID) = dst[elemID];
+ if (!native_long) {
+ insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
+ // Regular instruction to encode
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID) = dst[elemID];
+ } else {
+ insn = this->appendInsn(SEL_OP_READ64, elemNum*2, 1);
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID) = tmp[elemID];
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ insn->dst(elemID + elemNum) = dst[elemID];
+ }
+
insn->src(0) = addr;
insn->setbti(bti);
insn->extra.elem = elemNum;
@@ -1179,23 +1192,50 @@ namespace gbe
void Selection::Opaque::WRITE64(Reg addr,
const GenRegister *src,
+ const GenRegister *tmp,
uint32_t srcNum,
- uint32_t bti)
+ uint32_t bti,
+ bool native_long)
{
- SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
SelectionVector *vector = this->appendVector();
-
- // Regular instruction to encode
- insn->src(0) = addr;
- for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
- insn->src(elemID + 1) = src[elemID];
-
- insn->setbti(bti);
- insn->extra.elem = srcNum;
-
- vector->regNum = srcNum + 1;
- vector->reg = &insn->src(0);
- vector->isSrc = 1;
+ SelectionInstruction *insn = NULL;
+
+ if (!native_long) {
+ insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
+ // Regular instruction to encode
+ insn->src(0) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(elemID + 1) = src[elemID];
+
+ insn->setbti(bti);
+ insn->extra.elem = srcNum;
+
+ vector->regNum = srcNum + 1;
+ vector->reg = &insn->src(0);
+ vector->isSrc = 1;
+ } else { // handle the native long case
+ insn = this->appendInsn(SEL_OP_WRITE64, srcNum, srcNum*2 + 1);
+
+ insn->src(0) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(elemID) = src[elemID];
+
+ insn->src(srcNum) = addr;
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->src(srcNum + 1 + elemID) = tmp[0];
+
+ /* We also need to add the tmp reigster to dst, in order
+ to avoid the post schedule error . */
+ for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+ insn->dst(elemID) = tmp[0];
+
+ insn->setbti(bti);
+ insn->extra.elem = srcNum;
+
+ vector->regNum = srcNum + 1;
+ vector->reg = &insn->src(srcNum);
+ vector->isSrc = 1;
+ }
}
void Selection::Opaque::UNTYPED_WRITE(Reg addr,
@@ -2931,7 +2971,17 @@ namespace gbe
GenRegister tmpAddr = getRelativeAddress(sel, addr, bti.bti[0]);
for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
- sel.READ64(tmpAddr, dst.data(), valueNum, bti.bti[0]);
+
+ if (sel.hasLongType()) {
+ vector<GenRegister> tmp(valueNum);
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+ tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
+ }
+
+ sel.READ64(tmpAddr, dst.data(), tmp.data(), valueNum, bti.bti[0], true);
+ } else {
+ sel.READ64(tmpAddr, dst.data(), NULL, valueNum, bti.bti[0], false);
+ }
}
void readByteAsDWord(Selection::Opaque &sel,
@@ -3246,7 +3296,16 @@ namespace gbe
for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
- sel.WRITE64(addr, src.data(), valueNum, bti);
+
+ if (sel.hasLongType()) {
+ vector<GenRegister> tmp(valueNum);
+ for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+ tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
+ }
+ sel.WRITE64(addr, src.data(), tmp.data(), valueNum, bti, true);
+ } else {
+ sel.WRITE64(addr, src.data(), NULL, valueNum, bti, false);
+ }
}
void emitByteScatter(Selection::Opaque &sel,
--
1.9.1
More information about the Beignet
mailing list