[Beignet] [PATCH V3 3/3] support sends (split send) for untyped write

Guo, Yejun yejun.guo at intel.com
Tue Nov 29 03:48:42 UTC 2016


sends is a new instruction starting from gen9 to split the registers
of address and data for write, the register pressure can be loosed
since they are not necessary to be continuous any more.

more patches for sends will be sent out.

we can choose send or sends based on hasSends() in selection stage,
only enabeld as default for skylake now.

v2: add function setSendsOperands
v3: reuse function setDPUntypedRW
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen75_encoder.cpp      |  2 +-
 backend/src/backend/gen75_encoder.hpp      |  2 +-
 backend/src/backend/gen8_context.cpp       | 21 +++++---
 backend/src/backend/gen8_encoder.cpp       |  2 +-
 backend/src/backend/gen8_encoder.hpp       |  2 +-
 backend/src/backend/gen9_encoder.cpp       | 77 ++++++++++++++++++++++++++++++
 backend/src/backend/gen9_encoder.hpp       |  4 +-
 backend/src/backend/gen_context.cpp        | 41 +++++++++-------
 backend/src/backend/gen_encoder.cpp        | 14 +++++-
 backend/src/backend/gen_encoder.hpp        |  4 +-
 backend/src/backend/gen_insn_selection.cpp | 22 ++++++++-
 backend/src/backend/gen_insn_selection.hpp |  1 +
 12 files changed, 159 insertions(+), 33 deletions(-)

diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp
index fc37991..9cafaa7 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -199,7 +199,7 @@ namespace gbe
     return insn->bits3.ud;
   }
 
-  void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
+  void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     this->setHeader(insn);
diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp
index d06f393..517afff 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -44,7 +44,7 @@ namespace gbe
     virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
     virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
-    virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum);
     virtual void setHeader(GenNativeInstruction *insn);
     virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
                    uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 71c54fb..95b1013 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -968,6 +968,9 @@ namespace gbe
     GBE_ASSERT(elemNum == 1);
     const GenRegister addr = ra->genReg(insn.src(elemNum));
     const GenRegister bti = ra->genReg(insn.src(elemNum*2+1));
+    GenRegister data = ra->genReg(insn.src(elemNum+1));
+    if (!insn.extra.splitSend)
+      data = addr;
 
     /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
        which we can not accept. We just fallback to 2 DW untypewrite here. */
@@ -978,11 +981,15 @@ namespace gbe
     }
 
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->UNTYPED_WRITE(addr, bti, elemNum*2);
+      p->UNTYPED_WRITE(addr, data, bti, elemNum*2);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(elemNum));
       const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
-      unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
+      unsigned desc = 0;
+      if (insn.extra.splitSend)
+        desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum*2);
+      else
+        desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
 
       unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
@@ -990,7 +997,7 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2);
+        p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
@@ -1351,7 +1358,7 @@ namespace gbe
       nextDst = GenRegister::Qn(tempDst, 1);
       p->MOV(nextDst, nextSrc);
     p->pop();
-    p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
 
     p->push();
@@ -1367,7 +1374,7 @@ namespace gbe
       nextDst = GenRegister::Qn(tempDst, 1);
       p->MOV(nextDst, nextSrc);
     p->pop();
-    p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
   }
 
@@ -1794,7 +1801,7 @@ namespace gbe
       p->curr.execWidth = 8;
       p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
     }
     else
     {
@@ -1802,7 +1809,7 @@ namespace gbe
       p->MOV(msgData, threadData);
       p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
     }
 
     /* init partialData register, it will hold the final result */
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index 6638805..4239e84 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -268,7 +268,7 @@ namespace gbe
     return insn->bits3.ud;
   }
 
-  void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
+  void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     this->setHeader(insn);
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index b73beb3..f6a91a0 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -47,7 +47,7 @@ namespace gbe
     virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
     virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
-    virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum);
     virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
     virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
     virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize);
diff --git a/backend/src/backend/gen9_encoder.cpp b/backend/src/backend/gen9_encoder.cpp
index e66ae08..68ab7ae 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -26,6 +26,14 @@
  **********************************************************************/
 
 #include "backend/gen9_encoder.hpp"
+#include "backend/gen9_instruction.hpp"
+static const uint32_t untypedRWMask[] = {
+  GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+  GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+  GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+  GEN_UNTYPED_ALPHA,
+  0
+};
 
 namespace gbe
 {
@@ -66,4 +74,73 @@ namespace gbe
                        header_present,
                        simd_mode, return_format);
   }
+
+  void Gen9Encoder::setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, GenRegister src0, GenRegister src1)
+  {
+    assert(dst.subnr == 0 && src0.subnr == 0 && src1.subnr == 0);
+
+    if (dst.file == GEN_ARCHITECTURE_REGISTER_FILE)
+      gen9_insn->bits1.sends.dest_reg_file_0 = 0;
+    else if (dst.file == GEN_GENERAL_REGISTER_FILE)
+      gen9_insn->bits1.sends.dest_reg_file_0 = 1;
+    else
+      assert(!"should not reach here");
+
+    gen9_insn->bits1.sends.src1_reg_file_0 = 1;
+    gen9_insn->bits1.sends.src1_reg_nr = src1.nr;
+    gen9_insn->bits1.sends.dest_subreg_nr = 0;
+    gen9_insn->bits1.sends.dest_reg_nr = dst.nr;
+    gen9_insn->bits1.sends.dest_address_mode = 0;  //direct mode
+    gen9_insn->bits2.sends.src0_subreg_nr = 0;
+    gen9_insn->bits2.sends.src0_reg_nr = src0.nr;
+    gen9_insn->bits2.sends.src0_address_mode = 0;
+  }
+
+  unsigned Gen9Encoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum)
+  {
+    uint32_t msg_length = 0;
+    uint32_t response_length = 0;
+    if (this->curr.execWidth == 8) {
+      msg_length = 1;
+    } else if (this->curr.execWidth == 16) {
+      msg_length = 2;
+    }
+    else
+      NOT_IMPLEMENTED;
+    setDPUntypedRW(insn,
+                   bti,
+                   untypedRWMask[elemNum],
+                   GEN75_P1_UNTYPED_SURFACE_WRITE,
+                   msg_length,
+                   response_length);
+    return insn->bits3.ud;
+  }
+
+  void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum)
+  {
+    if (addr.reg() == data.reg())
+      Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum);
+    else {
+      GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+      Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
+      assert(elemNum >= 1 || elemNum <= 4);
+
+      this->setHeader(insn);
+      insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
+
+      setSendsOperands(gen9_insn, GenRegister::null(), addr, data);
+      if (this->curr.execWidth == 8)
+        gen9_insn->bits2.sends.src1_length = elemNum;
+      else if (this->curr.execWidth == 16)
+        gen9_insn->bits2.sends.src1_length = 2 * elemNum;
+      else
+        assert(!"unsupported");
+
+      if (bti.file == GEN_IMMEDIATE_VALUE) {
+        gen9_insn->bits2.sends.sel_reg32_desc = 0;
+        setUntypedWriteSendsMessageDesc(insn, bti.value.ud, elemNum);
+      } else
+        gen9_insn->bits2.sends.sel_reg32_desc = 1;
+    }
+  }
 } /* End of the name space. */
diff --git a/backend/src/backend/gen9_encoder.hpp b/backend/src/backend/gen9_encoder.hpp
index 319e871..5b6328d 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -47,7 +47,9 @@ namespace gbe
                 uint32_t return_format,
                 bool isLD,
                 bool isUniform);
-
+    void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, GenRegister src0, GenRegister src1);
+    virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum);
+    virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
   };
 }
 #endif /* __GBE_GEN9_ENCODER_HPP__ */
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index c38b7af..848933e 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2146,7 +2146,7 @@ namespace gbe
     const GenRegister bti = ra->genReg(insn.src(elemNum+1));
 
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->UNTYPED_WRITE(src, bti, elemNum*2);
+      p->UNTYPED_WRITE(src, src, bti, elemNum*2);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
       const GenRegister btiTmp = ra->genReg(insn.dst(1));
@@ -2158,22 +2158,29 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum*2);
+        p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
   }
 
   void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
-    const GenRegister src = ra->genReg(insn.src(0));
+    const GenRegister addr = ra->genReg(insn.src(0));
+    GenRegister data = ra->genReg(insn.src(1));
+    if (!insn.extra.splitSend)
+      data = addr;
     const uint32_t elemNum = insn.extra.elem;
     const GenRegister bti = ra->genReg(insn.src(elemNum+1));
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->UNTYPED_WRITE(src, bti, elemNum);
+      p->UNTYPED_WRITE(addr, data, bti, elemNum);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
       const GenRegister btiTmp = ra->genReg(insn.dst(1));
-      unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum);
+      unsigned desc = 0;
+      if (insn.extra.splitSend)
+        desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum);
+      else
+        desc = p->generateUntypedWriteMessageDesc(0, elemNum);
 
       unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
@@ -2181,7 +2188,7 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum);
+        p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
@@ -2881,14 +2888,14 @@ namespace gbe
       // Write it out.
       p->curr.execWidth = 8;
       p->curr.noMask = 1;
-      p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
       p->ADD(addr, addr, GenRegister::immud(32));
 
       // time stamps
       for (int i = 0; i < 3; i++) {
         p->curr.execWidth = 8;
         p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD));
-        p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
         p->ADD(addr, addr, GenRegister::immud(32));
       }
     } p->pop();
@@ -3294,7 +3301,7 @@ namespace gbe
       p->curr.execWidth = 8;
       p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
     }
     else
     {
@@ -3302,7 +3309,7 @@ namespace gbe
       p->MOV(msgData, threadData);
       p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
     }
 
     /* init partialData register, it will hold the final result */
@@ -3460,11 +3467,11 @@ namespace gbe
   void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister& data,
                                              GenRegister& src, uint32_t bti) {
     p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half());
-    p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
 
     p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.top_half(this->simdWidth));
-    p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
   }
 
@@ -3492,15 +3499,15 @@ namespace gbe
       p->ATOMIC(addr, GEN_ATOMIC_OP_ADD, addr, GenRegister::immud(insn.extra.printfBTI), 2);
       /* Write out the header. */
       p->MOV(data, GenRegister::immud(0xAABBCCDD));
-      p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
 
       p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       p->MOV(data, GenRegister::immud(insn.extra.printfSize + 12));
-      p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
 
       p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       p->MOV(data, GenRegister::immud(insn.extra.printfNum));
-      p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
 
       p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
     }
@@ -3510,11 +3517,11 @@ namespace gbe
       src = ra->genReg(insn.src(i));
       if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D || src.type == GEN_TYPE_F) {
         p->MOV(GenRegister::retype(data, src.type), src);
-        p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
         p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       } else if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_UB ) {
         p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src);
-        p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1);
+        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1);
         p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       } else if (src.type == GEN_TYPE_L || src.type == GEN_TYPE_UL ) {
         emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI);
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 060d65f..b379419 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -410,6 +410,18 @@ namespace gbe
     return insn->bits3.ud;
   }
 
+  unsigned GenEncoder::generateUntypedWriteSendsMessageDesc(unsigned bti, unsigned elemNum) {
+    GenNativeInstruction insn;
+    memset(&insn, 0, sizeof(GenNativeInstruction));
+    return setUntypedWriteSendsMessageDesc(&insn, bti, elemNum);
+  }
+
+  unsigned GenEncoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum)
+  {
+    assert(0);
+    return 0;
+  }
+
   void GenEncoder::UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum) {
     assert(0);
   }
@@ -422,7 +434,7 @@ namespace gbe
     assert(0);
   }
 
-  void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
+  void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     this->setHeader(insn);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 00d3eaa..e6f362b 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -177,7 +177,7 @@ namespace gbe
     /*! Untyped read (upto 4 channels) */
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
     /*! Untyped write (upto 4 channels) */
-    virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum);
     /*! Untyped read A64(upto 4 channels) */
     virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
     /*! Untyped write (upto 4 channels) */
@@ -260,12 +260,14 @@ namespace gbe
     virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long);
     virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
+    virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
     unsigned setByteScatterMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
 
     unsigned generateAtomicMessageDesc(unsigned function, unsigned bti, unsigned srcNum);
     unsigned generateUntypedReadMessageDesc(unsigned bti, unsigned elemNum);
     unsigned generateUntypedWriteMessageDesc(unsigned bti, unsigned elemNum);
+    unsigned generateUntypedWriteSendsMessageDesc(unsigned bti, unsigned elemNum);
     unsigned generateByteGatherMessageDesc(unsigned bti, unsigned elemSize);
     unsigned generateByteScatterMessageDesc(unsigned bti, unsigned elemSize);
 
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index c14e0bc..deebafa 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -373,7 +373,9 @@ namespace gbe
     /*! spill a register (insert spill/unspill instructions) */
     INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
     bool has32X32Mul() const { return bHas32X32Mul; }
+    bool hasSends() const { return bHasSends; }
     void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
+    void setHasSends(bool b) { bHasSends = b; }
     bool hasLongType() const { return bHasLongType; }
     bool hasDoubleType() const { return bHasDoubleType; }
     bool hasHalfType() const { return bHasHalfType; }
@@ -822,6 +824,7 @@ namespace gbe
     bool bHasDoubleType;
     bool bHasHalfType;
     bool bLongRegRestrict;
+    bool bHasSends;
     uint32_t ldMsgOrder;
     bool slowByteGather;
     INLINE ir::LabelIndex newAuxLabel()
@@ -864,7 +867,7 @@ namespace gbe
     maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
     stateNum(0), vectorNum(0), bwdCodeGeneration(false), storeThreadMap(false),
     currAuxLabel(ctx.getFunction().labelNum()), bHas32X32Mul(false), bHasLongType(false),
-    bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false),
+    bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false), bHasSends(false),
     ldMsgOrder(LD_MSG_ORDER_IVB), slowByteGather(false)
   {
     const ir::Function &fn = ctx.getFunction();
@@ -1665,7 +1668,6 @@ namespace gbe
     unsigned dstNum = temps.size();
     unsigned srcNum = elemNum + 2 + temps.size();
     SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, dstNum, srcNum);
-    SelectionVector *vector = this->appendVector();
 
     if (bti.file != GEN_IMMEDIATE_VALUE) {
       insn->state.flag = 0;
@@ -1685,11 +1687,26 @@ namespace gbe
     }
     insn->extra.elem = elemNum;
 
+    if (hasSends()) {
+      insn->extra.splitSend = 1;
+      SelectionVector *vector = this->appendVector();
+      vector->regNum = elemNum;
+      vector->reg = &insn->src(1);
+      vector->offsetID = 1;
+      vector->isSrc = 1;
+      vector = this->appendVector();
+      vector->regNum = 1;
+      vector->reg = &insn->src(0);
+      vector->offsetID = 0;
+      vector->isSrc = 1;
+    } else {
     // Sends require contiguous allocation for the sources
+      SelectionVector *vector = this->appendVector();
     vector->regNum = elemNum+1;
     vector->reg = &insn->src(0);
     vector->offsetID = 0;
     vector->isSrc = 1;
+    }
   }
 
   void Selection::Opaque::UNTYPED_WRITEA64(const GenRegister *src,
@@ -2722,6 +2739,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
     this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
     this->opaque->setSlowByteGather(false);
     this->opaque->setHasHalfType(true);
+    this->opaque->setHasSends(true);
     opt_features = SIOF_LOGICAL_SRCMOD;
   }
 
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 14ac05f..7ce2b94 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -104,6 +104,7 @@ namespace gbe
         uint16_t function:8;
         /*! elemSize for byte scatters / gathers, elemNum for untyped msg, operand number for atomic */
         uint16_t elem:8;
+        uint16_t splitSend:1;
       };
       struct {
         /*! Number of sources in the tuple */
-- 
1.9.1



More information about the Beignet mailing list