[Beignet] [PATCH 2/4] support sends (split send) for untyped write
Song, Ruiling
ruiling.song at intel.com
Wed Nov 23 14:15:10 UTC 2016
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Tuesday, November 22, 2016 2:43 PM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>
> Subject: [Beignet] [PATCH 2/4] support sends (split send) for untyped write
>
> sends is a new instruction starting from gen9 to split the registers
> of address and data for write, the register pressure can be loosed
> since they are not necessary to be continuous any more.
>
> more patches for sends will be sent out.
>
> we can choose send or sends based on hasSends() in selection stage,
> only enabeld as default for skylake now.
>
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/gen75_encoder.cpp | 2 +-
> backend/src/backend/gen75_encoder.hpp | 2 +-
> backend/src/backend/gen8_context.cpp | 21 +++++++----
> backend/src/backend/gen8_encoder.cpp | 2 +-
> backend/src/backend/gen8_encoder.hpp | 2 +-
> backend/src/backend/gen9_encoder.cpp | 58
> ++++++++++++++++++++++++++++++
> backend/src/backend/gen9_encoder.hpp | 3 +-
> backend/src/backend/gen_context.cpp | 41 ++++++++++++---------
> backend/src/backend/gen_encoder.cpp | 12 ++++++-
> backend/src/backend/gen_encoder.hpp | 4 ++-
> backend/src/backend/gen_insn_selection.cpp | 22 ++++++++++--
> backend/src/backend/gen_insn_selection.hpp | 1 +
> 12 files changed, 137 insertions(+), 33 deletions(-)
>
> diff --git a/backend/src/backend/gen75_encoder.cpp
> b/backend/src/backend/gen75_encoder.cpp
> index fc37991..9cafaa7 100644
> --- a/backend/src/backend/gen75_encoder.cpp
> +++ b/backend/src/backend/gen75_encoder.cpp
> @@ -199,7 +199,7 @@ namespace gbe
> return insn->bits3.ud;
> }
>
> - void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti,
> uint32_t elemNum) {
> + void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data,
> GenRegister bti, uint32_t elemNum) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4);
> this->setHeader(insn);
> diff --git a/backend/src/backend/gen75_encoder.hpp
> b/backend/src/backend/gen75_encoder.hpp
> index d06f393..517afff 100644
> --- a/backend/src/backend/gen75_encoder.hpp
> +++ b/backend/src/backend/gen75_encoder.hpp
> @@ -44,7 +44,7 @@ namespace gbe
> virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
> virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
> - virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
> + virtual void UNTYPED_WRITE(GenRegister src, GenRegister data,
> GenRegister bti, uint32_t elemNum);
> virtual void setHeader(GenNativeInstruction *insn);
> virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti,
> uint32_t rgba,
> uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index 71c54fb..95b1013 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -968,6 +968,9 @@ namespace gbe
> GBE_ASSERT(elemNum == 1);
> const GenRegister addr = ra->genReg(insn.src(elemNum));
> const GenRegister bti = ra->genReg(insn.src(elemNum*2+1));
> + GenRegister data = ra->genReg(insn.src(elemNum+1));
> + if (!insn.extra.splitSend)
> + data = addr;
>
> /* Because BDW's store and load send instructions for 64 bits require the bti
> to be surfaceless,
> which we can not accept. We just fallback to 2 DW untypewrite here. */
> @@ -978,11 +981,15 @@ namespace gbe
> }
>
> if (bti.file == GEN_IMMEDIATE_VALUE) {
> - p->UNTYPED_WRITE(addr, bti, elemNum*2);
> + p->UNTYPED_WRITE(addr, data, bti, elemNum*2);
> } else {
> const GenRegister tmp = ra->genReg(insn.dst(elemNum));
> const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
> - unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
> + unsigned desc = 0;
> + if (insn.extra.splitSend)
> + desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum*2);
> + else
> + desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
>
> unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
>
> @@ -990,7 +997,7 @@ namespace gbe
> p->push();
> p->curr.predicate = GEN_PREDICATE_NORMAL;
> p->curr.useFlag(insn.state.flag, insn.state.subFlag);
> - p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2);
> + p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2);
> p->pop();
> afterMessage(insn, bti, tmp, btiTmp, jip0);
> }
> @@ -1351,7 +1358,7 @@ namespace gbe
> nextDst = GenRegister::Qn(tempDst, 1);
> p->MOV(nextDst, nextSrc);
> p->pop();
> - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
>
> p->push();
> @@ -1367,7 +1374,7 @@ namespace gbe
> nextDst = GenRegister::Qn(tempDst, 1);
> p->MOV(nextDst, nextSrc);
> p->pop();
> - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> }
>
> @@ -1794,7 +1801,7 @@ namespace gbe
> p->curr.execWidth = 8;
> p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
> p->ADD(msgAddr, msgAddr, msgSlmOff);
> - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
> + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
> }
> else
> {
> @@ -1802,7 +1809,7 @@ namespace gbe
> p->MOV(msgData, threadData);
> p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
> p->ADD(msgAddr, msgAddr, msgSlmOff);
> - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
> + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
> }
>
> /* init partialData register, it will hold the final result */
> diff --git a/backend/src/backend/gen8_encoder.cpp
> b/backend/src/backend/gen8_encoder.cpp
> index 6638805..4239e84 100644
> --- a/backend/src/backend/gen8_encoder.cpp
> +++ b/backend/src/backend/gen8_encoder.cpp
> @@ -268,7 +268,7 @@ namespace gbe
> return insn->bits3.ud;
> }
>
> - void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti,
> uint32_t elemNum) {
> + void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data,
> GenRegister bti, uint32_t elemNum) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4);
> this->setHeader(insn);
> diff --git a/backend/src/backend/gen8_encoder.hpp
> b/backend/src/backend/gen8_encoder.hpp
> index b73beb3..f6a91a0 100644
> --- a/backend/src/backend/gen8_encoder.hpp
> +++ b/backend/src/backend/gen8_encoder.hpp
> @@ -47,7 +47,7 @@ namespace gbe
> virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
> - virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
> + virtual void UNTYPED_WRITE(GenRegister src, GenRegister data,
> GenRegister bti, uint32_t elemNum);
> virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t
> elemNum);
> virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
> virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t
> elemSize);
> diff --git a/backend/src/backend/gen9_encoder.cpp
> b/backend/src/backend/gen9_encoder.cpp
> index 80df50d..351788c 100644
> --- a/backend/src/backend/gen9_encoder.cpp
> +++ b/backend/src/backend/gen9_encoder.cpp
> @@ -26,6 +26,14 @@
>
> *****************************************************************
> *****/
>
> #include "backend/gen9_encoder.hpp"
> +#include "backend/gen9_instruction.hpp"
> +static const uint32_t untypedRWMask[] = {
> +
> GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_U
> NTYPED_RED,
> + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
> + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
> + GEN_UNTYPED_ALPHA,
> + 0
> +};
>
> namespace gbe
> {
> @@ -65,4 +73,54 @@ namespace gbe
> header_present,
> simd_mode, return_format);
> }
> + unsigned
> Gen9Encoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum)
The message desc encoding is same for send and sends, what about calling existing function?
66 void Gen8Encoder::setDPUntypedRW(GenNativeInstruction *insn,
67 uint32_t bti,
68 uint32_t rgba,
69 uint32_t msg_type,
70 uint32_t msg_length,
71 uint32_t response_length)
> + {
> + Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
> + gen9_insn->bits3.sends_untyped_rw.header_present = 0;
> + gen9_insn->bits3.sends_untyped_rw.response_length = 0;
> + gen9_insn->bits3.sends_untyped_rw.end_of_thread = 0;
> + gen9_insn->bits3.sends_untyped_rw.msg_type =
> GEN75_P1_UNTYPED_SURFACE_WRITE;
> + gen9_insn->bits3.sends_untyped_rw.bti = bti;
> + gen9_insn->bits3.sends_untyped_rw.rgba = untypedRWMask[elemNum];
> + if (this->curr.execWidth == 8) {
> + gen9_insn->bits3.sends_untyped_rw.src0_length = 1;
> + gen9_insn->bits3.sends_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
> + } else if (this->curr.execWidth == 16) {
> + gen9_insn->bits3.sends_untyped_rw.src0_length = 2;
> + gen9_insn->bits3.sends_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
> + }
> + return gen9_insn->bits3.ud;
> + }
> + void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data,
> GenRegister bti, uint32_t elemNum)
> + {
> + if (addr.reg() == data.reg())
> + Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum);
> + else {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
> + Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
> + assert(elemNum >= 1 || elemNum <= 4);
> + this->setHeader(insn);
> + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
> + gen9_insn->bits1.sends.dest_reg_file_0 = 1; //01 for GRF
Generally, we should set sends destination to null register, so it is ARF.
> + gen9_insn->bits1.sends.src1_reg_file_0 = 1;
> + gen9_insn->bits1.sends.src1_reg_nr = data.nr;
> + gen9_insn->bits1.sends.dest_subreg_nr = 0;
> + gen9_insn->bits1.sends.dest_reg_nr = 0;
> + gen9_insn->bits1.sends.dest_address_mode = 0; //direct mode
> + gen9_insn->bits2.sends.src0_subreg_nr = addr.subnr;
Setting src0_subreg_nr here is meaningless, only the src0_subreg_nr[4] bit left, I am not sure whether hw use it correctly.
Generally the message payload register subnr should be 0. You can remove above line, add an assert(addr.subnr == 0);
And I would also suggest you define below functions to implement sends encoding logic as sends has very different encoding.
setSendsDst(nullreg);
setSendsSrc0(src0);
setSendsSrc1(src1);
so that untyped_write() byte_scatter() typed_write can call these functions instead of repeating same logic at every place.
> + gen9_insn->bits2.sends.src0_reg_nr = addr.nr;
> + gen9_insn->bits2.sends.src0_address_mode = 0;
> + if (this->curr.execWidth == 8)
> + gen9_insn->bits2.sends.src1_length = elemNum;
> + else if (this->curr.execWidth == 16)
> + gen9_insn->bits2.sends.src1_length = 2 * elemNum;
> + else
> + assert(!"unsupported");
> + if (bti.file == GEN_IMMEDIATE_VALUE) {
> + gen9_insn->bits2.sends.sel_reg32_desc = 0;
> + setUntypedWriteSendsMessageDesc(insn, bti.value.ud, elemNum);
> + } else
> + gen9_insn->bits2.sends.sel_reg32_desc = 1;
> + }
> + }
> } /* End of the name space. */
> diff --git a/backend/src/backend/gen9_encoder.hpp
> b/backend/src/backend/gen9_encoder.hpp
> index 319e871..7b9f0df 100644
> --- a/backend/src/backend/gen9_encoder.hpp
> +++ b/backend/src/backend/gen9_encoder.hpp
> @@ -47,7 +47,8 @@ namespace gbe
> uint32_t return_format,
> bool isLD,
> bool isUniform);
> -
> + virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data,
> GenRegister bti, uint32_t elemNum);
> + virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction
> *insn, unsigned bti, unsigned elemNum);
> };
> }
> #endif /* __GBE_GEN9_ENCODER_HPP__ */
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index c38b7af..848933e 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2146,7 +2146,7 @@ namespace gbe
> const GenRegister bti = ra->genReg(insn.src(elemNum+1));
>
> if (bti.file == GEN_IMMEDIATE_VALUE) {
> - p->UNTYPED_WRITE(src, bti, elemNum*2);
> + p->UNTYPED_WRITE(src, src, bti, elemNum*2);
> } else {
> const GenRegister tmp = ra->genReg(insn.dst(0));
> const GenRegister btiTmp = ra->genReg(insn.dst(1));
> @@ -2158,22 +2158,29 @@ namespace gbe
> p->push();
> p->curr.predicate = GEN_PREDICATE_NORMAL;
> p->curr.useFlag(insn.state.flag, insn.state.subFlag);
> - p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum*2);
> + p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2);
> p->pop();
> afterMessage(insn, bti, tmp, btiTmp, jip0);
> }
> }
>
> void GenContext::emitUntypedWriteInstruction(const SelectionInstruction
> &insn) {
> - const GenRegister src = ra->genReg(insn.src(0));
> + const GenRegister addr = ra->genReg(insn.src(0));
> + GenRegister data = ra->genReg(insn.src(1));
> + if (!insn.extra.splitSend)
> + data = addr;
> const uint32_t elemNum = insn.extra.elem;
> const GenRegister bti = ra->genReg(insn.src(elemNum+1));
> if (bti.file == GEN_IMMEDIATE_VALUE) {
> - p->UNTYPED_WRITE(src, bti, elemNum);
> + p->UNTYPED_WRITE(addr, data, bti, elemNum);
> } else {
> const GenRegister tmp = ra->genReg(insn.dst(0));
> const GenRegister btiTmp = ra->genReg(insn.dst(1));
> - unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum);
> + unsigned desc = 0;
> + if (insn.extra.splitSend)
> + desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum);
> + else
> + desc = p->generateUntypedWriteMessageDesc(0, elemNum);
>
> unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
>
> @@ -2181,7 +2188,7 @@ namespace gbe
> p->push();
> p->curr.predicate = GEN_PREDICATE_NORMAL;
> p->curr.useFlag(insn.state.flag, insn.state.subFlag);
> - p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum);
> + p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum);
> p->pop();
> afterMessage(insn, bti, tmp, btiTmp, jip0);
> }
> @@ -2881,14 +2888,14 @@ namespace gbe
> // Write it out.
> p->curr.execWidth = 8;
> p->curr.noMask = 1;
> - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
> p->ADD(addr, addr, GenRegister::immud(32));
>
> // time stamps
> for (int i = 0; i < 3; i++) {
> p->curr.execWidth = 8;
> p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD));
> - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
> p->ADD(addr, addr, GenRegister::immud(32));
> }
> } p->pop();
> @@ -3294,7 +3301,7 @@ namespace gbe
> p->curr.execWidth = 8;
> p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
> p->ADD(msgAddr, msgAddr, msgSlmOff);
> - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
> + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
> }
> else
> {
> @@ -3302,7 +3309,7 @@ namespace gbe
> p->MOV(msgData, threadData);
> p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
> p->ADD(msgAddr, msgAddr, msgSlmOff);
> - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
> + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
> }
>
> /* init partialData register, it will hold the final result */
> @@ -3460,11 +3467,11 @@ namespace gbe
> void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister&
> data,
> GenRegister& src, uint32_t bti) {
> p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half());
> - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
>
> p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.top_half(this-
> >simdWidth));
> - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> }
>
> @@ -3492,15 +3499,15 @@ namespace gbe
> p->ATOMIC(addr, GEN_ATOMIC_OP_ADD, addr,
> GenRegister::immud(insn.extra.printfBTI), 2);
> /* Write out the header. */
> p->MOV(data, GenRegister::immud(0xAABBCCDD));
> - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI),
> 1);
>
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> p->MOV(data, GenRegister::immud(insn.extra.printfSize + 12));
> - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI),
> 1);
>
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> p->MOV(data, GenRegister::immud(insn.extra.printfNum));
> - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI),
> 1);
>
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> }
> @@ -3510,11 +3517,11 @@ namespace gbe
> src = ra->genReg(insn.src(i));
> if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D || src.type ==
> GEN_TYPE_F) {
> p->MOV(GenRegister::retype(data, src.type), src);
> - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI),
> 1);
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> } else if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_UB ) {
> p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src);
> - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
> + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI),
> 1);
> p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
> } else if (src.type == GEN_TYPE_L || src.type == GEN_TYPE_UL ) {
> emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI);
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index a69adc7..dc6dc63 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -392,6 +392,11 @@ namespace gbe
> return setUntypedWriteMessageDesc(&insn, bti, elemNum);
> }
>
> + unsigned GenEncoder::generateUntypedWriteSendsMessageDesc(unsigned
> bti, unsigned elemNum) {
> + GenNativeInstruction insn;
> + memset(&insn, 0, sizeof(GenNativeInstruction));
> + return setUntypedWriteSendsMessageDesc(&insn, bti, elemNum);
> + }
> unsigned GenEncoder::setUntypedWriteMessageDesc(GenNativeInstruction
> *insn, unsigned bti, unsigned elemNum) {
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> @@ -411,6 +416,11 @@ namespace gbe
> return insn->bits3.ud;
> }
>
> + unsigned
> GenEncoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum)
> + {
> + assert(0);
> + return 0;
> + }
> void GenEncoder::UNTYPED_READA64(GenRegister dst, GenRegister src,
> uint32_t elemNum) {
> assert(0);
> }
> @@ -423,7 +433,7 @@ namespace gbe
> assert(0);
> }
>
> - void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti,
> uint32_t elemNum) {
> + void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data,
> GenRegister bti, uint32_t elemNum) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4);
> this->setHeader(insn);
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index 00d3eaa..e6f362b 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -177,7 +177,7 @@ namespace gbe
> /*! Untyped read (upto 4 channels) */
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister
> bti, uint32_t elemNum);
> /*! Untyped write (upto 4 channels) */
> - virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
> + virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data,
> GenRegister bti, uint32_t elemNum);
> /*! Untyped read A64(upto 4 channels) */
> virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t
> elemNum);
> /*! Untyped write (upto 4 channels) */
> @@ -260,12 +260,14 @@ namespace gbe
> virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn,
> unsigned function, unsigned bti, unsigned srcNum, int type_long);
> virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
> virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn,
> unsigned bti, unsigned elemNum);
> + virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction
> *insn, unsigned bti, unsigned elemNum);
> unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned
> bti, unsigned elemSize);
> unsigned setByteScatterMessageDesc(GenNativeInstruction *insn, unsigned
> bti, unsigned elemSize);
>
> unsigned generateAtomicMessageDesc(unsigned function, unsigned bti,
> unsigned srcNum);
> unsigned generateUntypedReadMessageDesc(unsigned bti, unsigned
> elemNum);
> unsigned generateUntypedWriteMessageDesc(unsigned bti, unsigned
> elemNum);
> + unsigned generateUntypedWriteSendsMessageDesc(unsigned bti, unsigned
> elemNum);
> unsigned generateByteGatherMessageDesc(unsigned bti, unsigned elemSize);
> unsigned generateByteScatterMessageDesc(unsigned bti, unsigned elemSize);
>
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index c14e0bc..deebafa 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -373,7 +373,9 @@ namespace gbe
> /*! spill a register (insert spill/unspill instructions) */
> INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
> bool has32X32Mul() const { return bHas32X32Mul; }
> + bool hasSends() const { return bHasSends; }
> void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
> + void setHasSends(bool b) { bHasSends = b; }
> bool hasLongType() const { return bHasLongType; }
> bool hasDoubleType() const { return bHasDoubleType; }
> bool hasHalfType() const { return bHasHalfType; }
> @@ -822,6 +824,7 @@ namespace gbe
> bool bHasDoubleType;
> bool bHasHalfType;
> bool bLongRegRestrict;
> + bool bHasSends;
> uint32_t ldMsgOrder;
> bool slowByteGather;
> INLINE ir::LabelIndex newAuxLabel()
> @@ -864,7 +867,7 @@ namespace gbe
> maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
> stateNum(0), vectorNum(0), bwdCodeGeneration(false),
> storeThreadMap(false),
> currAuxLabel(ctx.getFunction().labelNum()), bHas32X32Mul(false),
> bHasLongType(false),
> - bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false),
> + bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false),
> bHasSends(false),
> ldMsgOrder(LD_MSG_ORDER_IVB), slowByteGather(false)
> {
> const ir::Function &fn = ctx.getFunction();
> @@ -1665,7 +1668,6 @@ namespace gbe
> unsigned dstNum = temps.size();
> unsigned srcNum = elemNum + 2 + temps.size();
> SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE,
> dstNum, srcNum);
> - SelectionVector *vector = this->appendVector();
>
> if (bti.file != GEN_IMMEDIATE_VALUE) {
> insn->state.flag = 0;
> @@ -1685,11 +1687,26 @@ namespace gbe
> }
> insn->extra.elem = elemNum;
>
> + if (hasSends()) {
> + insn->extra.splitSend = 1;
> + SelectionVector *vector = this->appendVector();
> + vector->regNum = elemNum;
> + vector->reg = &insn->src(1);
> + vector->offsetID = 1;
> + vector->isSrc = 1;
> + vector = this->appendVector();
> + vector->regNum = 1;
> + vector->reg = &insn->src(0);
> + vector->offsetID = 0;
> + vector->isSrc = 1;
> + } else {
> // Sends require contiguous allocation for the sources
> + SelectionVector *vector = this->appendVector();
> vector->regNum = elemNum+1;
> vector->reg = &insn->src(0);
> vector->offsetID = 0;
> vector->isSrc = 1;
> + }
> }
>
> void Selection::Opaque::UNTYPED_WRITEA64(const GenRegister *src,
> @@ -2722,6 +2739,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
> this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
> this->opaque->setSlowByteGather(false);
> this->opaque->setHasHalfType(true);
> + this->opaque->setHasSends(true);
> opt_features = SIOF_LOGICAL_SRCMOD;
> }
>
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 14ac05f..7ce2b94 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -104,6 +104,7 @@ namespace gbe
> uint16_t function:8;
> /*! elemSize for byte scatters / gathers, elemNum for untyped msg,
> operand number for atomic */
> uint16_t elem:8;
> + uint16_t splitSend:1;
> };
> struct {
> /*! Number of sources in the tuple */
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list