[Beignet] [PATCH 2/4] support sends (split send) for untyped write
Guo, Yejun
yejun.guo at intel.com
Tue Nov 22 06:42:38 UTC 2016
sends is a new instruction starting from gen9 to split the registers
of address and data for write, the register pressure can be loosed
since they are not necessary to be continuous any more.
more patches for sends will be sent out.
we can choose send or sends based on hasSends() in selection stage,
only enabeld as default for skylake now.
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen75_encoder.cpp | 2 +-
backend/src/backend/gen75_encoder.hpp | 2 +-
backend/src/backend/gen8_context.cpp | 21 +++++++----
backend/src/backend/gen8_encoder.cpp | 2 +-
backend/src/backend/gen8_encoder.hpp | 2 +-
backend/src/backend/gen9_encoder.cpp | 58 ++++++++++++++++++++++++++++++
backend/src/backend/gen9_encoder.hpp | 3 +-
backend/src/backend/gen_context.cpp | 41 ++++++++++++---------
backend/src/backend/gen_encoder.cpp | 12 ++++++-
backend/src/backend/gen_encoder.hpp | 4 ++-
backend/src/backend/gen_insn_selection.cpp | 22 ++++++++++--
backend/src/backend/gen_insn_selection.hpp | 1 +
12 files changed, 137 insertions(+), 33 deletions(-)
diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp
index fc37991..9cafaa7 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -199,7 +199,7 @@ namespace gbe
return insn->bits3.ud;
}
- void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
+ void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
this->setHeader(insn);
diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp
index d06f393..517afff 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -44,7 +44,7 @@ namespace gbe
virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
- virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum);
virtual void setHeader(GenNativeInstruction *insn);
virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 71c54fb..95b1013 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -968,6 +968,9 @@ namespace gbe
GBE_ASSERT(elemNum == 1);
const GenRegister addr = ra->genReg(insn.src(elemNum));
const GenRegister bti = ra->genReg(insn.src(elemNum*2+1));
+ GenRegister data = ra->genReg(insn.src(elemNum+1));
+ if (!insn.extra.splitSend)
+ data = addr;
/* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
which we can not accept. We just fallback to 2 DW untypewrite here. */
@@ -978,11 +981,15 @@ namespace gbe
}
if (bti.file == GEN_IMMEDIATE_VALUE) {
- p->UNTYPED_WRITE(addr, bti, elemNum*2);
+ p->UNTYPED_WRITE(addr, data, bti, elemNum*2);
} else {
const GenRegister tmp = ra->genReg(insn.dst(elemNum));
const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
- unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
+ unsigned desc = 0;
+ if (insn.extra.splitSend)
+ desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum*2);
+ else
+ desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
@@ -990,7 +997,7 @@ namespace gbe
p->push();
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
- p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2);
+ p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2);
p->pop();
afterMessage(insn, bti, tmp, btiTmp, jip0);
}
@@ -1351,7 +1358,7 @@ namespace gbe
nextDst = GenRegister::Qn(tempDst, 1);
p->MOV(nextDst, nextSrc);
p->pop();
- p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
p->push();
@@ -1367,7 +1374,7 @@ namespace gbe
nextDst = GenRegister::Qn(tempDst, 1);
p->MOV(nextDst, nextSrc);
p->pop();
- p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
}
@@ -1794,7 +1801,7 @@ namespace gbe
p->curr.execWidth = 8;
p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
+ p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
}
else
{
@@ -1802,7 +1809,7 @@ namespace gbe
p->MOV(msgData, threadData);
p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
+ p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
}
/* init partialData register, it will hold the final result */
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index 6638805..4239e84 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -268,7 +268,7 @@ namespace gbe
return insn->bits3.ud;
}
- void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
+ void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
this->setHeader(insn);
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index b73beb3..f6a91a0 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -47,7 +47,7 @@ namespace gbe
virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
- virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum);
virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize);
diff --git a/backend/src/backend/gen9_encoder.cpp b/backend/src/backend/gen9_encoder.cpp
index 80df50d..351788c 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -26,6 +26,14 @@
**********************************************************************/
#include "backend/gen9_encoder.hpp"
+#include "backend/gen9_instruction.hpp"
+static const uint32_t untypedRWMask[] = {
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+ GEN_UNTYPED_ALPHA,
+ 0
+};
namespace gbe
{
@@ -65,4 +73,54 @@ namespace gbe
header_present,
simd_mode, return_format);
}
+ unsigned Gen9Encoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum)
+ {
+ Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
+ gen9_insn->bits3.sends_untyped_rw.header_present = 0;
+ gen9_insn->bits3.sends_untyped_rw.response_length = 0;
+ gen9_insn->bits3.sends_untyped_rw.end_of_thread = 0;
+ gen9_insn->bits3.sends_untyped_rw.msg_type = GEN75_P1_UNTYPED_SURFACE_WRITE;
+ gen9_insn->bits3.sends_untyped_rw.bti = bti;
+ gen9_insn->bits3.sends_untyped_rw.rgba = untypedRWMask[elemNum];
+ if (this->curr.execWidth == 8) {
+ gen9_insn->bits3.sends_untyped_rw.src0_length = 1;
+ gen9_insn->bits3.sends_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
+ } else if (this->curr.execWidth == 16) {
+ gen9_insn->bits3.sends_untyped_rw.src0_length = 2;
+ gen9_insn->bits3.sends_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
+ }
+ return gen9_insn->bits3.ud;
+ }
+ void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum)
+ {
+ if (addr.reg() == data.reg())
+ Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum);
+ else {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+ Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
+ assert(elemNum >= 1 || elemNum <= 4);
+ this->setHeader(insn);
+ insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
+ gen9_insn->bits1.sends.dest_reg_file_0 = 1; //01 for GRF
+ gen9_insn->bits1.sends.src1_reg_file_0 = 1;
+ gen9_insn->bits1.sends.src1_reg_nr = data.nr;
+ gen9_insn->bits1.sends.dest_subreg_nr = 0;
+ gen9_insn->bits1.sends.dest_reg_nr = 0;
+ gen9_insn->bits1.sends.dest_address_mode = 0; //direct mode
+ gen9_insn->bits2.sends.src0_subreg_nr = addr.subnr;
+ gen9_insn->bits2.sends.src0_reg_nr = addr.nr;
+ gen9_insn->bits2.sends.src0_address_mode = 0;
+ if (this->curr.execWidth == 8)
+ gen9_insn->bits2.sends.src1_length = elemNum;
+ else if (this->curr.execWidth == 16)
+ gen9_insn->bits2.sends.src1_length = 2 * elemNum;
+ else
+ assert(!"unsupported");
+ if (bti.file == GEN_IMMEDIATE_VALUE) {
+ gen9_insn->bits2.sends.sel_reg32_desc = 0;
+ setUntypedWriteSendsMessageDesc(insn, bti.value.ud, elemNum);
+ } else
+ gen9_insn->bits2.sends.sel_reg32_desc = 1;
+ }
+ }
} /* End of the name space. */
diff --git a/backend/src/backend/gen9_encoder.hpp b/backend/src/backend/gen9_encoder.hpp
index 319e871..7b9f0df 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -47,7 +47,8 @@ namespace gbe
uint32_t return_format,
bool isLD,
bool isUniform);
-
+ virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum);
+ virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
};
}
#endif /* __GBE_GEN9_ENCODER_HPP__ */
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index c38b7af..848933e 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2146,7 +2146,7 @@ namespace gbe
const GenRegister bti = ra->genReg(insn.src(elemNum+1));
if (bti.file == GEN_IMMEDIATE_VALUE) {
- p->UNTYPED_WRITE(src, bti, elemNum*2);
+ p->UNTYPED_WRITE(src, src, bti, elemNum*2);
} else {
const GenRegister tmp = ra->genReg(insn.dst(0));
const GenRegister btiTmp = ra->genReg(insn.dst(1));
@@ -2158,22 +2158,29 @@ namespace gbe
p->push();
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
- p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum*2);
+ p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2);
p->pop();
afterMessage(insn, bti, tmp, btiTmp, jip0);
}
}
void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
- const GenRegister src = ra->genReg(insn.src(0));
+ const GenRegister addr = ra->genReg(insn.src(0));
+ GenRegister data = ra->genReg(insn.src(1));
+ if (!insn.extra.splitSend)
+ data = addr;
const uint32_t elemNum = insn.extra.elem;
const GenRegister bti = ra->genReg(insn.src(elemNum+1));
if (bti.file == GEN_IMMEDIATE_VALUE) {
- p->UNTYPED_WRITE(src, bti, elemNum);
+ p->UNTYPED_WRITE(addr, data, bti, elemNum);
} else {
const GenRegister tmp = ra->genReg(insn.dst(0));
const GenRegister btiTmp = ra->genReg(insn.dst(1));
- unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum);
+ unsigned desc = 0;
+ if (insn.extra.splitSend)
+ desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum);
+ else
+ desc = p->generateUntypedWriteMessageDesc(0, elemNum);
unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
@@ -2181,7 +2188,7 @@ namespace gbe
p->push();
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->curr.useFlag(insn.state.flag, insn.state.subFlag);
- p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum);
+ p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum);
p->pop();
afterMessage(insn, bti, tmp, btiTmp, jip0);
}
@@ -2881,14 +2888,14 @@ namespace gbe
// Write it out.
p->curr.execWidth = 8;
p->curr.noMask = 1;
- p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
p->ADD(addr, addr, GenRegister::immud(32));
// time stamps
for (int i = 0; i < 3; i++) {
p->curr.execWidth = 8;
p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD));
- p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
p->ADD(addr, addr, GenRegister::immud(32));
}
} p->pop();
@@ -3294,7 +3301,7 @@ namespace gbe
p->curr.execWidth = 8;
p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
+ p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
}
else
{
@@ -3302,7 +3309,7 @@ namespace gbe
p->MOV(msgData, threadData);
p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
+ p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
}
/* init partialData register, it will hold the final result */
@@ -3460,11 +3467,11 @@ namespace gbe
void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister& data,
GenRegister& src, uint32_t bti) {
p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half());
- p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.top_half(this->simdWidth));
- p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
}
@@ -3492,15 +3499,15 @@ namespace gbe
p->ATOMIC(addr, GEN_ATOMIC_OP_ADD, addr, GenRegister::immud(insn.extra.printfBTI), 2);
/* Write out the header. */
p->MOV(data, GenRegister::immud(0xAABBCCDD));
- p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
p->MOV(data, GenRegister::immud(insn.extra.printfSize + 12));
- p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
p->MOV(data, GenRegister::immud(insn.extra.printfNum));
- p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
}
@@ -3510,11 +3517,11 @@ namespace gbe
src = ra->genReg(insn.src(i));
if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D || src.type == GEN_TYPE_F) {
p->MOV(GenRegister::retype(data, src.type), src);
- p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
} else if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_UB ) {
p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src);
- p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+ p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
} else if (src.type == GEN_TYPE_L || src.type == GEN_TYPE_UL ) {
emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI);
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index a69adc7..dc6dc63 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -392,6 +392,11 @@ namespace gbe
return setUntypedWriteMessageDesc(&insn, bti, elemNum);
}
+ unsigned GenEncoder::generateUntypedWriteSendsMessageDesc(unsigned bti, unsigned elemNum) {
+ GenNativeInstruction insn;
+ memset(&insn, 0, sizeof(GenNativeInstruction));
+ return setUntypedWriteSendsMessageDesc(&insn, bti, elemNum);
+ }
unsigned GenEncoder::setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum) {
uint32_t msg_length = 0;
uint32_t response_length = 0;
@@ -411,6 +416,11 @@ namespace gbe
return insn->bits3.ud;
}
+ unsigned GenEncoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum)
+ {
+ assert(0);
+ return 0;
+ }
void GenEncoder::UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum) {
assert(0);
}
@@ -423,7 +433,7 @@ namespace gbe
assert(0);
}
- void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
+ void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
this->setHeader(insn);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 00d3eaa..e6f362b 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -177,7 +177,7 @@ namespace gbe
/*! Untyped read (upto 4 channels) */
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */
- virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+ virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum);
/*! Untyped read A64(upto 4 channels) */
virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */
@@ -260,12 +260,14 @@ namespace gbe
virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long);
virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
+ virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
unsigned setByteScatterMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
unsigned generateAtomicMessageDesc(unsigned function, unsigned bti, unsigned srcNum);
unsigned generateUntypedReadMessageDesc(unsigned bti, unsigned elemNum);
unsigned generateUntypedWriteMessageDesc(unsigned bti, unsigned elemNum);
+ unsigned generateUntypedWriteSendsMessageDesc(unsigned bti, unsigned elemNum);
unsigned generateByteGatherMessageDesc(unsigned bti, unsigned elemSize);
unsigned generateByteScatterMessageDesc(unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index c14e0bc..deebafa 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -373,7 +373,9 @@ namespace gbe
/*! spill a register (insert spill/unspill instructions) */
INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
bool has32X32Mul() const { return bHas32X32Mul; }
+ bool hasSends() const { return bHasSends; }
void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
+ void setHasSends(bool b) { bHasSends = b; }
bool hasLongType() const { return bHasLongType; }
bool hasDoubleType() const { return bHasDoubleType; }
bool hasHalfType() const { return bHasHalfType; }
@@ -822,6 +824,7 @@ namespace gbe
bool bHasDoubleType;
bool bHasHalfType;
bool bLongRegRestrict;
+ bool bHasSends;
uint32_t ldMsgOrder;
bool slowByteGather;
INLINE ir::LabelIndex newAuxLabel()
@@ -864,7 +867,7 @@ namespace gbe
maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
stateNum(0), vectorNum(0), bwdCodeGeneration(false), storeThreadMap(false),
currAuxLabel(ctx.getFunction().labelNum()), bHas32X32Mul(false), bHasLongType(false),
- bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false),
+ bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false), bHasSends(false),
ldMsgOrder(LD_MSG_ORDER_IVB), slowByteGather(false)
{
const ir::Function &fn = ctx.getFunction();
@@ -1665,7 +1668,6 @@ namespace gbe
unsigned dstNum = temps.size();
unsigned srcNum = elemNum + 2 + temps.size();
SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, dstNum, srcNum);
- SelectionVector *vector = this->appendVector();
if (bti.file != GEN_IMMEDIATE_VALUE) {
insn->state.flag = 0;
@@ -1685,11 +1687,26 @@ namespace gbe
}
insn->extra.elem = elemNum;
+ if (hasSends()) {
+ insn->extra.splitSend = 1;
+ SelectionVector *vector = this->appendVector();
+ vector->regNum = elemNum;
+ vector->reg = &insn->src(1);
+ vector->offsetID = 1;
+ vector->isSrc = 1;
+ vector = this->appendVector();
+ vector->regNum = 1;
+ vector->reg = &insn->src(0);
+ vector->offsetID = 0;
+ vector->isSrc = 1;
+ } else {
// Sends require contiguous allocation for the sources
+ SelectionVector *vector = this->appendVector();
vector->regNum = elemNum+1;
vector->reg = &insn->src(0);
vector->offsetID = 0;
vector->isSrc = 1;
+ }
}
void Selection::Opaque::UNTYPED_WRITEA64(const GenRegister *src,
@@ -2722,6 +2739,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
this->opaque->setSlowByteGather(false);
this->opaque->setHasHalfType(true);
+ this->opaque->setHasSends(true);
opt_features = SIOF_LOGICAL_SRCMOD;
}
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 14ac05f..7ce2b94 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -104,6 +104,7 @@ namespace gbe
uint16_t function:8;
/*! elemSize for byte scatters / gathers, elemNum for untyped msg, operand number for atomic */
uint16_t elem:8;
+ uint16_t splitSend:1;
};
struct {
/*! Number of sources in the tuple */
--
1.9.1
More information about the Beignet
mailing list