<div dir="ltr">This is great stuff. Will we also get the <span style="color:rgb(0,0,0);white-space:pre-wrap">intel_sub_group_block_* functions that take an image2d_t argument?</span><div><span style="color:rgb(0,0,0);white-space:pre-wrap"><br></span></div><div><span style="color:rgb(0,0,0);white-space:pre-wrap">Thanks,</span></div><div><span style="color:rgb(0,0,0);white-space:pre-wrap">Andrew</span></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, May 19, 2016 at 2:55 PM, Xiuli Pan <span dir="ltr"><<a href="mailto:xiuli.pan@intel.com" target="_blank">xiuli.pan@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Pan Xiuli <<a href="mailto:xiuli.pan@intel.com">xiuli.pan@intel.com</a>><br>
<br>
Using OWORD_BLOCK_RW to read/write a block of data for a thread.<br>
<br>
Signed-off-by: Pan Xiuli <<a href="mailto:xiuli.pan@intel.com">xiuli.pan@intel.com</a>><br>
---<br>
 backend/src/backend/gen/gen_mesa_disasm.c          | 15 +++++<br>
 backend/src/backend/gen_context.cpp                | 63 ++++++++++++++++++<br>
 backend/src/backend/gen_context.hpp                |  2 +<br>
 backend/src/backend/gen_encoder.cpp                | 38 ++++++++++-<br>
 backend/src/backend/gen_encoder.hpp                |  4 ++<br>
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  2 +<br>
 backend/src/backend/gen_insn_selection.cpp         | 77 ++++++++++++++++++++--<br>
 backend/src/backend/gen_insn_selection.hpp         |  4 ++<br>
 backend/src/backend/gen_insn_selection.hxx         |  2 +<br>
 backend/src/ir/instruction.cpp                     | 26 ++++++--<br>
 backend/src/ir/instruction.hpp                     |  8 ++-<br>
 backend/src/ir/liveness.cpp                        |  5 ++<br>
 backend/src/libocl/CMakeLists.txt                  |  2 +-<br>
 backend/src/libocl/src/ocl_substore.ll             |  9 +++<br>
 backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a>           | 54 +++++++++++++++<br>
 backend/src/libocl/tmpl/ocl_simd.tmpl.h            | 11 ++++<br>
 backend/src/llvm/llvm_gen_backend.cpp              | 65 ++++++++++++++++++<br>
 backend/src/llvm/llvm_gen_ocl_function.hxx         |  5 +-<br>
 18 files changed, 377 insertions(+), 15 deletions(-)<br>
 create mode 100644 backend/src/libocl/src/ocl_substore.ll<br>
<br>
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c<br>
index 067ddd8..9200c26 100644<br>
--- a/backend/src/backend/gen/gen_mesa_disasm.c<br>
+++ b/backend/src/backend/gen/gen_mesa_disasm.c<br>
@@ -432,6 +432,14 @@ static const char *data_port_data_cache_category[] = {<br>
   "scratch",<br>
 };<br>
<br>
+static const char *data_port_data_cache_block_size[] = {<br>
+  "1 OWORD LOW",<br>
+  "1 OWORD HIGH",<br>
+  "2 OWORD",<br>
+  "4 OWORD",<br>
+  "8 OWORD",<br>
+};<br>
+<br>
 static const char *data_port_scratch_block_size[] = {<br>
   "1 register",<br>
   "2 registers",<br>
@@ -576,6 +584,7 @@ static int gen_version;<br>
 #define MSG_GW_ACKREQ(inst)        GEN_BITS_FIELD(inst, bits3.gen7_msg_gw.ackreq)<br>
 #define GENERIC_MSG_LENGTH(inst)   GEN_BITS_FIELD(inst, bits3.generic_gen5.msg_length)<br>
 #define GENERIC_RESPONSE_LENGTH(inst) GEN_BITS_FIELD(inst, bits3.generic_gen5.response_length)<br>
+#define OWORD_RW_BLOCK_SIZE(inst)    GEN_BITS_FIELD(inst, bits3.gen7_oblock_rw.block_size)<br>
<br>
 static int is_special_acc(const void* inst)<br>
 {<br>
@@ -1483,6 +1492,12 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac<br>
                    data_port_data_cache_byte_scattered_simd_mode[BYTE_RW_SIMD_MODE(inst)],<br>
                    data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],<br>
                    data_port_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);<br>
+            else if(UNTYPED_RW_MSG_TYPE(inst) == 0 || UNTYPED_RW_MSG_TYPE(inst) == 8)<br>
+              format(file, " (bti: %d, data size: %s, %s, %s)",<br>
+                   UNTYPED_RW_BTI(inst),<br>
+                   data_port_data_cache_block_size[OWORD_RW_BLOCK_SIZE(inst)],<br>
+                   data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],<br>
+                   data_port_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);<br>
             else<br>
               format(file, " not implemented");<br>
           } else {<br>
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp<br>
index 4d0a3f3..cfb8be1 100644<br>
--- a/backend/src/backend/gen_context.cpp<br>
+++ b/backend/src/backend/gen_context.cpp<br>
@@ -3487,6 +3487,69 @@ namespace gbe<br>
     p->pop();<br>
   }<br>
<br>
+  void GenContext::emitOBReadInstruction(const SelectionInstruction &insn) {<br>
+    const GenRegister dst = ra->genReg(insn.dst(0));<br>
+    const GenRegister addr = ra->genReg(insn.src(0));<br>
+    const GenRegister first = GenRegister::ud1grf(<a href="http://addr.nr" rel="noreferrer" target="_blank">addr.nr</a>,addr.subnr/sizeof(float));<br>
+    GenRegister header;<br>
+    if (simdWidth == 8)<br>
+      header = GenRegister::retype(ra->genReg(insn.src(1)), GEN_TYPE_F);<br>
+    else<br>
+      header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(1)),1), GEN_TYPE_F);<br>
+<br>
+    p->push();<br>
+      // Copy r0 into the header first<br>
+      p->curr.execWidth = 8;<br>
+      p->curr.predicate = GEN_PREDICATE_NONE;<br>
+      p->curr.noMask = 1;<br>
+      p->MOV(header, GenRegister::f8grf(0,0));<br>
+<br>
+      // Update the header with the current address<br>
+      p->curr.execWidth = 1;<br>
+      const uint32_t nr = <a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>;<br>
+      const uint32_t subnr = header.subnr / sizeof(float);<br>
+      p->SHR(GenRegister::ud1grf(nr, subnr+2), first, GenRegister::immud(4));<br>
+      //p->MOV(GenRegister::ud1grf(nr, subnr+2), first);<br>
+<br>
+      // Put zero in the general state base address<br>
+      p->MOV(GenRegister::f1grf(nr, subnr+5), GenRegister::immf(0));<br>
+<br>
+    p->pop();<br>
+    // Now read the data<br>
+    p->OBREAD(dst, header, insn.getbti(), insn.extra.elem);<br>
+  }<br>
+<br>
+  void GenContext::emitOBWriteInstruction(const SelectionInstruction &insn) {<br>
+    const GenRegister addr = ra->genReg(insn.src(2));<br>
+    const GenRegister first = GenRegister::ud1grf(<a href="http://addr.nr" rel="noreferrer" target="_blank">addr.nr</a>,addr.subnr/sizeof(float));<br>
+    GenRegister header;<br>
+    if (simdWidth == 8)<br>
+      header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_F);<br>
+    else<br>
+      header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(0)),1), GEN_TYPE_F);<br>
+<br>
+    p->push();<br>
+      // Copy r0 into the header first<br>
+      p->curr.execWidth = 8;<br>
+      p->curr.predicate = GEN_PREDICATE_NONE;<br>
+      p->curr.noMask = 1;<br>
+      p->MOV(header, GenRegister::f8grf(0,0));<br>
+<br>
+      // Update the header with the current address<br>
+      p->curr.execWidth = 1;<br>
+      const uint32_t nr = <a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>;<br>
+      const uint32_t subnr = header.subnr / sizeof(float);<br>
+      p->SHR(GenRegister::ud1grf(nr, subnr+2), first, GenRegister::immud(4));<br>
+<br>
+      // Put zero in the general state base address<br>
+      p->MOV(GenRegister::f1grf(nr, subnr+5), GenRegister::immf(0));<br>
+<br>
+    p->pop();<br>
+    // Now write the data<br>
+    p->OBWRITE(header, insn.getbti(), insn.extra.elem);<br>
+  }<br>
+<br>
+<br>
   BVAR(OCL_OUTPUT_REG_ALLOC, false);<br>
   BVAR(OCL_OUTPUT_ASM, false);<br>
<br>
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp<br>
index 4c43ccb..56a5ec2 100644<br>
--- a/backend/src/backend/gen_context.hpp<br>
+++ b/backend/src/backend/gen_context.hpp<br>
@@ -187,6 +187,8 @@ namespace gbe<br>
     void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);<br>
     unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned desc);<br>
     void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned jip0);<br>
+    virtual void emitOBReadInstruction(const SelectionInstruction &insn);<br>
+    virtual void emitOBWriteInstruction(const SelectionInstruction &insn);<br>
<br>
     /*! Implements base class */<br>
     virtual Kernel *allocateKernel(void);<br>
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp<br>
index 31afa67..fc7b5cf 100644<br>
--- a/backend/src/backend/gen_encoder.cpp<br>
+++ b/backend/src/backend/gen_encoder.cpp<br>
@@ -258,7 +258,7 @@ namespace gbe<br>
     else<br>
       NOT_SUPPORTED;<br>
   }<br>
-#if 0<br>
+<br>
   static void setOBlockRW(GenEncoder *p,<br>
                           GenNativeInstruction *insn,<br>
                           uint32_t bti,<br>
@@ -272,10 +272,10 @@ namespace gbe<br>
     assert(size == 2 || size == 4);<br>
     insn->bits3.gen7_oblock_rw.msg_type = msg_type;<br>
     insn->bits3.gen7_oblock_rw.bti = bti;<br>
+    GBE_ASSERT(size == 2 || size == 4);<br>
     insn->bits3.gen7_oblock_rw.block_size = size == 2 ? 2 : 3;<br>
     insn->bits3.gen7_oblock_rw.header_present = 1;<br>
   }<br>
-#endif<br>
<br>
   static void setDWordScatterMessgae(GenEncoder *p,<br>
                                      GenNativeInstruction *insn,<br>
@@ -1244,6 +1244,40 @@ namespace gbe<br>
      setScratchMessage(this, insn, offset, block_size, channel_mode, GEN_SCRATCH_READ, 1, dst_num);<br>
   }<br>
<br>
+  void GenEncoder::OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) {<br>
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);<br>
+    const uint32_t msg_length = 1;<br>
+    const uint32_t response_length = size / 2; // Size is in owords<br>
+    this->setHeader(insn);<br>
+    this->setDst(insn, GenRegister::uw16grf(<a href="http://dst.nr" rel="noreferrer" target="_blank">dst.nr</a>, 0));<br>
+    this->setSrc0(insn, GenRegister::ud8grf(<a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>, 0));<br>
+    this->setSrc1(insn, GenRegister::immud(0));<br>
+    setOBlockRW(this,<br>
+                insn,<br>
+                bti,<br>
+                size,<br>
+                GEN7_OBLOCK_READ,<br>
+                msg_length,<br>
+                response_length);<br>
+  }<br>
+<br>
+  void GenEncoder::OBWRITE(GenRegister header, uint32_t bti, uint32_t size) {<br>
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);<br>
+    const uint32_t msg_length = 1 + size / 2; // Size is in owords<br>
+    const uint32_t response_length = 0;<br>
+    this->setHeader(insn);<br>
+    this->setSrc0(insn, GenRegister::ud8grf(<a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>, 0));<br>
+    this->setSrc1(insn, GenRegister::immud(0));<br>
+    this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));<br>
+    setOBlockRW(this,<br>
+                insn,<br>
+                bti,<br>
+                size,<br>
+                GEN7_OBLOCK_WRITE,<br>
+                msg_length,<br>
+                response_length);<br>
+  }<br>
+<br>
   void GenEncoder::EOT(uint32_t msg) {<br>
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);<br>
     this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));<br>
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp<br>
index 0239293..a53c879 100644<br>
--- a/backend/src/backend/gen_encoder.hpp<br>
+++ b/backend/src/backend/gen_encoder.hpp<br>
@@ -267,6 +267,10 @@ namespace gbe<br>
     virtual bool canHandleLong(uint32_t opcode, GenRegister dst, GenRegister src0,<br>
                             GenRegister src1 = GenRegister::null());<br>
     virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null());<br>
+    /*! OBlock read */<br>
+    void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);<br>
+    /*! OBlock write */<br>
+    void OBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize);<br>
<br>
     GBE_CLASS(GenEncoder); //!< Use custom allocators<br>
     virtual void alu3(uint32_t opcode, GenRegister dst,<br>
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx<br>
index cb5c4f1..d297726 100644<br>
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx<br>
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx<br>
@@ -50,3 +50,5 @@ DECL_GEN7_SCHEDULE(StoreProfiling,  80,        1,        1)<br>
 DECL_GEN7_SCHEDULE(WorkGroupOp,     80,        1,        1)<br>
 DECL_GEN7_SCHEDULE(SubGroupOp,      80,        1,        1)<br>
 DECL_GEN7_SCHEDULE(Printf,          80,        1,        1)<br>
+DECL_GEN7_SCHEDULE(OBRead,          80,        1,        1)<br>
+DECL_GEN7_SCHEDULE(OBWrite,         80,        1,        1)<br>
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp<br>
index 596e70b..7c49242 100644<br>
--- a/backend/src/backend/gen_insn_selection.cpp<br>
+++ b/backend/src/backend/gen_insn_selection.cpp<br>
@@ -188,7 +188,8 @@ namespace gbe<br>
            this->opcode == SEL_OP_BYTE_GATHER  ||<br>
            this->opcode == SEL_OP_SAMPLE ||<br>
            this->opcode == SEL_OP_VME ||<br>
-           this->opcode == SEL_OP_DWORD_GATHER;<br>
+           this->opcode == SEL_OP_DWORD_GATHER ||<br>
+           this->opcode == SEL_OP_OBREAD;<br>
   }<br>
<br>
   bool SelectionInstruction::modAcc(void) const {<br>
@@ -210,7 +211,8 @@ namespace gbe<br>
            this->opcode == SEL_OP_WRITE64       ||<br>
            this->opcode == SEL_OP_ATOMIC        ||<br>
            this->opcode == SEL_OP_BYTE_SCATTER  ||<br>
-           this->opcode == SEL_OP_TYPED_WRITE;<br>
+           this->opcode == SEL_OP_TYPED_WRITE ||<br>
+           this->opcode == SEL_OP_OBWRITE;<br>
   }<br>
<br>
   bool SelectionInstruction::isBranch(void) const {<br>
@@ -697,6 +699,11 @@ namespace gbe<br>
     /*! Sub Group Operations */<br>
     void SUBGROUP_OP(uint32_t wg_op, Reg dst, GenRegister src,<br>
                       GenRegister tmpData1, GenRegister tmpData2);<br>
+    /*! Oblock read */<br>
+    void OBREAD(GenRegister dst, GenRegister addr, GenRegister header, uint32_t bti, uint32_t size);<br>
+    /*! Oblock write */<br>
+    void OBWRITE(GenRegister addr, GenRegister value, GenRegister header, uint32_t bti, uint32_t size);<br>
+<br>
     /* common functions for both binary instruction and sel_cmp and compare instruction.<br>
        It will handle the IMM or normal register assignment, and will try to avoid LOADI<br>
        as much as possible. */<br>
@@ -2014,6 +2021,40 @@ namespace gbe<br>
     insn->src(0) = src;<br>
     insn->src(1) = tmpData2;<br>
   }<br>
+  void Selection::Opaque::OBREAD(GenRegister dst,<br>
+                                 GenRegister addr,<br>
+                                 GenRegister header,<br>
+                                 uint32_t bti,<br>
+                                 uint32_t size) {<br>
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_OBREAD, 1, 2);<br>
+    insn->dst(0) = dst;<br>
+    insn->src(0) = addr;<br>
+    insn->src(1) = header;<br>
+    insn->setbti(bti);<br>
+    insn->extra.elem = size / sizeof(int[4]); // number of owords<br>
+  }<br>
+<br>
+  void Selection::Opaque::OBWRITE(GenRegister addr,<br>
+                                  GenRegister value,<br>
+                                  GenRegister header,<br>
+                                  uint32_t bti,<br>
+                                  uint32_t size) {<br>
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_OBWRITE, 0, 3);<br>
+    SelectionVector *vector = this->appendVector();<br>
+    insn->src(0) = header;<br>
+    insn->src(1) = value;<br>
+    insn->src(2) = addr;<br>
+    insn->state = this->curr;<br>
+    insn->setbti(bti);<br>
+    insn->extra.elem = size / sizeof(int[4]); // number of owords<br>
+<br>
+    // We need to put the header and the data together<br>
+    vector->regNum = 2;<br>
+    vector->reg = &insn->src(0);<br>
+    vector->offsetID = 0;<br>
+    vector->isSrc = 1;<br>
+  }<br>
+<br>
<br>
   // Boiler plate to initialize the selection library at c++ pre-main<br>
   static SelectionLibrary *selLib = NULL;<br>
@@ -4002,6 +4043,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
       }<br>
     }<br>
<br>
+    void emitOWordRead(Selection::Opaque &sel,<br>
+                       const ir::LoadInstruction &insn,<br>
+                       GenRegister address,<br>
+                       ir::BTI bti) const<br>
+    {<br>
+      using namespace ir;<br>
+      const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);<br>
+      const GenRegister value = sel.selReg(insn.getValue(0), TYPE_U32);<br>
+      const uint32_t simdWidth = sel.ctx.getSimdWidth();<br>
+      sel.OBREAD(value, address, header, bti.imm, simdWidth * sizeof(int));<br>
+    }<br>
+<br>
     // check whether all binded table index point to constant memory<br>
     INLINE bool isAllConstant(const ir::BTI &bti) const {<br>
       if (bti.isConst && bti.imm == BTI_CONSTANT)<br>
@@ -4037,7 +4090,9 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
       const uint32_t elemSize = getByteScatterGatherSize(sel, type);<br>
       bool allConstant = isAllConstant(bti);<br>
<br>
-      if (allConstant) {<br>
+      if (insn.isBlock())<br>
+        this->emitOWordRead(sel, insn, address, bti);<br>
+      else if (allConstant) {<br>
         // XXX TODO read 64bit constant through constant cache<br>
         // Per HW Spec, constant cache messages can read at least DWORD data.<br>
         // So, byte/short data type, we have to read through data cache.<br>
@@ -4164,6 +4219,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
       }<br>
     }<br>
<br>
+    void emitOWordWrite(Selection::Opaque &sel,<br>
+                        const ir::StoreInstruction &insn,<br>
+                        GenRegister address,<br>
+                        ir::BTI bti) const<br>
+    {<br>
+      using namespace ir;<br>
+      const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);<br>
+      const GenRegister value = sel.selReg(insn.getValue(0), TYPE_U32);<br>
+      const uint32_t simdWidth = sel.ctx.getSimdWidth();<br>
+      sel.OBWRITE(address, value, header, bti.imm, simdWidth * sizeof(int));<br>
+    }<br>
+<br>
     virtual bool emit(Selection::Opaque  &sel, SelectionDAG &dag) const<br>
     {<br>
       using namespace ir;<br>
@@ -4185,7 +4252,9 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
         assert(0 && "stateless not supported yet");<br>
       }<br>
<br>
-      if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)<br>
+      if (insn.isBlock())<br>
+        this->emitOWordWrite(sel, insn, address, bti);<br>
+      else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)<br>
         this->emitWrite64(sel, insn, address, bti);<br>
       else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)<br>
         this->emitUntypedWrite(sel, insn, address,  bti);<br>
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp<br>
index 8d2e1da..51af686 100644<br>
--- a/backend/src/backend/gen_insn_selection.hpp<br>
+++ b/backend/src/backend/gen_insn_selection.hpp<br>
@@ -175,6 +175,8 @@ namespace gbe<br>
     INLINE uint32_t getbti() const {<br>
       GBE_ASSERT(isRead() || isWrite());<br>
       switch (opcode) {<br>
+        case SEL_OP_OBREAD:<br>
+        case SEL_OP_OBWRITE:<br>
         case SEL_OP_DWORD_GATHER: return extra.function;<br>
         case SEL_OP_SAMPLE: return extra.rdbti;<br>
         case SEL_OP_VME: return extra.vme_bti;<br>
@@ -188,6 +190,8 @@ namespace gbe<br>
     INLINE void setbti(uint32_t bti) {<br>
       GBE_ASSERT(isRead() || isWrite());<br>
       switch (opcode) {<br>
+        case SEL_OP_OBREAD:<br>
+        case SEL_OP_OBWRITE:<br>
         case SEL_OP_DWORD_GATHER: extra.function = bti; return;<br>
         case SEL_OP_SAMPLE: extra.rdbti = bti; return;<br>
         case SEL_OP_VME: extra.vme_bti = bti; return;<br>
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx<br>
index 0e11f9f..4a7caff 100644<br>
--- a/backend/src/backend/gen_insn_selection.hxx<br>
+++ b/backend/src/backend/gen_insn_selection.hxx<br>
@@ -96,3 +96,5 @@ DECL_SELECTION_IR(STORE_PROFILING, StoreProfilingInstruction)<br>
 DECL_SELECTION_IR(WORKGROUP_OP, WorkGroupOpInstruction)<br>
 DECL_SELECTION_IR(SUBGROUP_OP, SubGroupOpInstruction)<br>
 DECL_SELECTION_IR(PRINTF, PrintfInstruction)<br>
+DECL_SELECTION_IR(OBREAD, OBReadInstruction)<br>
+DECL_SELECTION_IR(OBWRITE, OBWriteInstruction)<br>
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp<br>
index 47606b2..88491a7 100644<br>
--- a/backend/src/ir/instruction.cpp<br>
+++ b/backend/src/ir/instruction.cpp<br>
@@ -483,10 +483,12 @@ namespace ir {<br>
                         AddressSpace AS,<br>
                         uint32_t _valueNum,<br>
                         bool dwAligned,<br>
-                        AddressMode AM)<br>
+                        AddressMode AM,<br>
+                        bool ifBlock = false)<br>
                       : MemInstruction(AM, AS, dwAligned, type, offset),<br>
                         valueNum(_valueNum),<br>
-                        values(dstValues)<br>
+                        values(dstValues),<br>
+                        ifBlock(ifBlock)<br>
         {<br>
           this->opcode = OP_LOAD;<br>
         }<br>
@@ -519,9 +521,11 @@ namespace ir {<br>
         }<br>
         INLINE bool wellFormed(const Function &fn, std::string &why) const;<br>
         INLINE void out(std::ostream &out, const Function &fn) const;<br>
+        INLINE bool isBlock() const { return ifBlock; }<br>
<br>
         uint8_t         valueNum;<br>
         Tuple             values;<br>
+        bool             ifBlock;<br>
     };<br>
     class ALIGNED_INSTRUCTION StoreInstruction :<br>
       public MemInstruction,<br>
@@ -534,12 +538,14 @@ namespace ir {<br>
                          AddressSpace addrSpace,<br>
                          uint32_t valueNum,<br>
                          bool dwAligned,<br>
-                         AddressMode AM)<br>
+                         AddressMode AM,<br>
+                         bool ifBlock = false)<br>
           : MemInstruction(AM, addrSpace, dwAligned, type, offset)<br>
         {<br>
           this->opcode = OP_STORE;<br>
           this->values = values;<br>
           this->valueNum = valueNum;<br>
+          this->ifBlock = ifBlock;<br>
         }<br>
         INLINE unsigned getValueNum()      const { return valueNum; }<br>
         INLINE Register getValue(const Function &fn, unsigned id) const {<br>
@@ -565,9 +571,12 @@ namespace ir {<br>
         }<br>
         INLINE bool wellFormed(const Function &fn, std::string &why) const;<br>
         INLINE void out(std::ostream &out, const Function &fn) const;<br>
+        INLINE bool isBlock() const { return ifBlock; }<br>
+<br>
         Register      dst[0];<br>
         uint8_t     valueNum;<br>
         Tuple         values;<br>
+        bool         ifBlock;<br>
     };<br>
<br>
     class ALIGNED_INSTRUCTION SampleInstruction : // TODO<br>
@@ -1655,6 +1664,8 @@ namespace ir {<br>
     }<br>
<br>
     INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const {<br>
+      if(ifBlock)<br>
+        out<< "BLOCK";<br>
       this->outOpcode(out);<br>
       out << "." << type << "." << AS << (dwAligned ? "." : ".un") << "aligned";<br>
       out << " {";<br>
@@ -1672,6 +1683,8 @@ namespace ir {<br>
     }<br>
<br>
     INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) const {<br>
+      if(ifBlock)<br>
+        out<< "BLOCK";<br>
       this->outOpcode(out);<br>
       out << "." << type << "." << AS << (dwAligned ? "." : ".un") << "aligned";<br>
       out << " %" << this->getSrc(fn, 0) << " {";<br>
@@ -2221,7 +2234,9 @@ DECL_MEM_FN(MemInstruction, bool,     isAligned(void), isAligned())<br>
 DECL_MEM_FN(MemInstruction, unsigned, getAddressIndex(void), getAddressIndex())<br>
 DECL_MEM_FN(AtomicInstruction, AtomicOps, getAtomicOpcode(void), getAtomicOpcode())<br>
 DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum())<br>
+DECL_MEM_FN(StoreInstruction, bool, isBlock(void), isBlock())<br>
 DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())<br>
+DECL_MEM_FN(LoadInstruction, bool, isBlock(void), isBlock())<br>
 DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())<br>
 DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())<br>
 DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())<br>
@@ -2475,9 +2490,10 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))<br>
                    uint32_t valueNum, \<br>
                    bool dwAligned, \<br>
                    AddressMode AM, \<br>
-                   unsigned SurfaceIndex) \<br>
+                   unsigned SurfaceIndex, \<br>
+                   bool isBlock) \<br>
   { \<br>
-    internal::CLASS insn = internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM); \<br>
+    internal::CLASS insn = internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM, isBlock); \<br>
     insn.setSurfaceIndex(SurfaceIndex);\<br>
     return insn.convert(); \<br>
   } \<br>
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp<br>
index 799a7bf..4a5811b 100644<br>
--- a/backend/src/ir/instruction.hpp<br>
+++ b/backend/src/ir/instruction.hpp<br>
@@ -356,6 +356,8 @@ namespace ir {<br>
     }<br>
     /*! Return true if the given instruction is an instance of this class */<br>
     static bool isClassOf(const Instruction &insn);<br>
+    /*! Return true if the given instruction is block write */<br>
+    bool isBlock() const;<br>
   };<br>
<br>
   /*! Load instruction. The source is simply the address where to get the data.<br>
@@ -372,6 +374,8 @@ namespace ir {<br>
     }<br>
     /*! Return true if the given instruction is an instance of this class */<br>
     static bool isClassOf(const Instruction &insn);<br>
+    /*! Return true if the given instruction is block read */<br>
+    bool isBlock() const;<br>
   };<br>
<br>
   /*! Load immediate instruction loads an typed immediate value into the given<br>
@@ -827,10 +831,10 @@ namespace ir {<br>
   /*! ret */<br>
   Instruction RET(void);<br>
   /*! load.type.space {dst1,...,dst_valueNum} offset value, {bti} */<br>
-  Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex);<br>
+  Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex, bool isBlock = false);<br>
   Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti);<br>
   /*! store.type.space offset {src1,...,src_valueNum} value {bti}*/<br>
-  Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex);<br>
+  Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex, bool isBlock = false);<br>
   Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti);<br>
   /*! loadi.type dst value */<br>
   Instruction LOADI(Type type, Register dst, ImmediateIndex value);<br>
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp<br>
index d48f067..3162d13 100644<br>
--- a/backend/src/ir/liveness.cpp<br>
+++ b/backend/src/ir/liveness.cpp<br>
@@ -117,11 +117,16 @@ namespace ir {<br>
         if (insn.getOpcode() == ir::OP_SIMD_ID)<br>
           uniform = false;<br>
<br>
+        // do not change dst uniform for block read<br>
+        if (insn.getOpcode() == ir::OP_LOAD && ir::cast<ir::LoadInstruction>(insn).isBlock())<br>
+          uniform = false;<br>
+<br>
         for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {<br>
           const Register reg = insn.getSrc(srcID);<br>
           if (!fn.isUniformRegister(reg))<br>
             uniform = false;<br>
         }<br>
+<br>
         // A destination is a killed value<br>
         for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {<br>
           const Register reg = insn.getDst(dstID);<br>
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt<br>
index 1d1ec68..83e767c 100644<br>
--- a/backend/src/libocl/CMakeLists.txt<br>
+++ b/backend/src/libocl/CMakeLists.txt<br>
@@ -182,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M)<br>
        )<br>
 ENDMACRO(ADD_LL_TO_BC_TARGET)<br>
<br>
-SET (OCL_LL_MODULES ocl_barrier ocl_clz)<br>
+SET (OCL_LL_MODULES ocl_barrier ocl_clz ocl_substore)<br>
 FOREACH(f ${OCL_LL_MODULES})<br>
     COPY_THE_LL(${f})<br>
     ADD_LL_TO_BC_TARGET(${f})<br>
diff --git a/backend/src/libocl/src/ocl_substore.ll b/backend/src/libocl/src/ocl_substore.ll<br>
new file mode 100644<br>
index 0000000..665cdfa<br>
--- /dev/null<br>
+++ b/backend/src/libocl/src/ocl_substore.ll<br>
@@ -0,0 +1,9 @@<br>
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"<br>
+target triple = "spir"<br>
+<br>
+declare void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* nocapture, i32) nounwind alwaysinline noduplicate<br>
+<br>
+define void @_Z27intel_sub_group_block_writePKU3AS1jj(i32 addrspace(1)* %p, i32 %data) nounwind alwaysinline noduplicate {<br>
+  call void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* %p, i32 %data)<br>
+  ret void<br>
+}<br>
diff --git a/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a> b/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a><br>
index a25dcef..66490cc 100644<br>
--- a/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a><br>
+++ b/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a><br>
@@ -133,3 +133,57 @@ RANGE_OP(scan_exclusive, max, float, true)<br>
 RANGE_OP(scan_exclusive, max, double, true)<br>
<br>
 #undef RANGE_OP<br>
+PURE CONST uint __gen_ocl_sub_group_block_read_mem(const global uint* p);<br>
+OVERLOADABLE uint intel_sub_group_block_read(const global uint* p)<br>
+{<br>
+  return __gen_ocl_sub_group_block_read_mem(p);<br>
+}<br>
+OVERLOADABLE uint2 intel_sub_group_block_read2(const global uint* p)<br>
+{<br>
+  return (uint2)(intel_sub_group_block_read(p),<br>
+                 intel_sub_group_block_read(p + get_simd_size()));<br>
+}<br>
+OVERLOADABLE uint4 intel_sub_group_block_read4(const global uint* p)<br>
+{<br>
+  return (uint4)(intel_sub_group_block_read(p),<br>
+                 intel_sub_group_block_read(p + get_simd_size()),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 2),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 3));<br>
+<br>
+}<br>
+OVERLOADABLE uint8 intel_sub_group_block_read8(const global uint* p)<br>
+{<br>
+  return (uint8)(intel_sub_group_block_read(p),<br>
+                 intel_sub_group_block_read(p + get_simd_size()),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 2),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 3),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 4),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 5),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 6),<br>
+                 intel_sub_group_block_read(p + get_simd_size() * 7));<br>
+}<br>
+<br>
+OVERLOADABLE void intel_sub_group_block_write2(const global uint* p, uint2 data)<br>
+{<br>
+  intel_sub_group_block_write(p, data.s0);<br>
+  intel_sub_group_block_write(p + get_simd_size(), data.s1);<br>
+}<br>
+OVERLOADABLE void intel_sub_group_block_write4(const global uint* p,uint4 data)<br>
+{<br>
+  intel_sub_group_block_write(p, data.s0);<br>
+  intel_sub_group_block_write(p + get_simd_size(), data.s1);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 2, data.s2);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 3, data.s3);<br>
+<br>
+}<br>
+OVERLOADABLE void intel_sub_group_block_write8(const global uint* p,uint8 data)<br>
+{<br>
+  intel_sub_group_block_write(p, data.s0);<br>
+  intel_sub_group_block_write(p + get_simd_size(), data.s1);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 2, data.s2);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 3, data.s3);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 4, data.s4);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 5, data.s5);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 6, data.s6);<br>
+  intel_sub_group_block_write(p + get_simd_size() * 7, data.s7);<br>
+}<br>
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h<br>
index 355ee30..d0676be 100644<br>
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h<br>
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h<br>
@@ -132,3 +132,14 @@ OVERLOADABLE double sub_group_scan_exclusive_max(double x);<br>
 OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);<br>
 OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);<br>
 OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);<br>
+<br>
+/* blocak read/write */<br>
+OVERLOADABLE uint intel_sub_group_block_read(const global uint* p);<br>
+OVERLOADABLE uint2 intel_sub_group_block_read2(const global uint* p);<br>
+OVERLOADABLE uint4 intel_sub_group_block_read4(const global uint* p);<br>
+OVERLOADABLE uint8 intel_sub_group_block_read8(const global uint* p);<br>
+<br>
+OVERLOADABLE void intel_sub_group_block_write(const __global uint* p, uint data);<br>
+OVERLOADABLE void intel_sub_group_block_write2(const __global uint* p, uint2 data);<br>
+OVERLOADABLE void intel_sub_group_block_write4(const __global uint* p, uint4 data);<br>
+OVERLOADABLE void intel_sub_group_block_write8(const __global uint* p, uint8 data);<br>
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp<br>
index 3ddbfcc..e77290f 100644<br>
--- a/backend/src/llvm/llvm_gen_backend.cpp<br>
+++ b/backend/src/llvm/llvm_gen_backend.cpp<br>
@@ -697,6 +697,8 @@ namespace gbe<br>
     void emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode);<br>
     // Emit subgroup instructions<br>
     void emitSubGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode);<br>
+    // Emit subgroup instructions<br>
+    void emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite);<br>
<br>
     uint8_t appendSampler(CallSite::arg_iterator AI);<br>
     uint8_t getImageID(CallInst &I);<br>
@@ -3730,6 +3732,9 @@ namespace gbe<br>
       case GEN_OCL_LRP:<br>
         this->newRegister(&I);<br>
         break;<br>
+      case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM:<br>
+        this->newRegister(&I, NULL, false);<br>
+        break;<br>
       case GEN_OCL_PRINTF:<br>
         this->newRegister(&I);  // fall through<br>
       case GEN_OCL_PUTS:<br>
@@ -3744,6 +3749,7 @@ namespace gbe<br>
       case GEN_OCL_CALC_TIMESTAMP:<br>
       case GEN_OCL_STORE_PROFILING:<br>
       case GEN_OCL_DEBUGWAIT:<br>
+      case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:<br>
         break;<br>
       case GEN_OCL_NOT_FOUND:<br>
       default:<br>
@@ -3938,6 +3944,61 @@ namespace gbe<br>
     GBE_ASSERT(AI == AE);<br>
   }<br>
<br>
+  void GenWriter::emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite) {<br>
+    CallSite::arg_iterator AI = CS.arg_begin();<br>
+    CallSite::arg_iterator AE = CS.arg_end();<br>
+    GBE_ASSERT(AI != AE);<br>
+<br>
+    Value *llvmPtr = *(AI++);<br>
+    Value *llvmValues;<br>
+    ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmPtr->getType()->getPointerAddressSpace());<br>
+    GBE_ASSERT(addrSpace == ir::MEM_GLOBAL);<br>
+    ir::Register pointer = this->getRegister(llvmPtr);<br>
+<br>
+    ir::Register ptr;<br>
+    ir::Register btiReg;<br>
+    unsigned SurfaceIndex = 0xff;<br>
+<br>
+    ir::AddressMode AM;<br>
+    if (legacyMode) {<br>
+      Value *bti = getBtiRegister(llvmPtr);<br>
+      Value *ptrBase = getPointerBase(llvmPtr);<br>
+      ir::Register baseReg = this->getRegister(ptrBase);<br>
+      if (isa<ConstantInt>(bti)) {<br>
+        AM = ir::AM_StaticBti;<br>
+        SurfaceIndex = cast<ConstantInt>(bti)->getZExtValue();<br>
+        addrSpace = btiToGen(SurfaceIndex);<br>
+      } else {<br>
+        AM = ir::AM_DynamicBti;<br>
+        addrSpace = ir::MEM_MIXED;<br>
+        btiReg = this->getRegister(bti);<br>
+      }<br>
+      const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();<br>
+      ptr = ctx.reg(pointerFamily);<br>
+      ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg);<br>
+    } else {<br>
+      AM = ir::AM_Stateless;<br>
+      ptr = pointer;<br>
+    }<br>
+<br>
+    ir::Type type = ir::TYPE_U32;<br>
+    GBE_ASSERT(AM != ir::AM_DynamicBti);<br>
+<br>
+    if(isWrite){<br>
+      llvmValues = *(AI++);<br>
+      const ir::Register values = getRegister(llvmValues);<br>
+      const ir::Tuple tuple = ctx.arrayTuple(&values, 1);<br>
+      ctx.STORE(type, tuple, ptr, addrSpace, 1, true, AM, SurfaceIndex, true);<br>
+    } else {<br>
+      llvmValues = &I;<br>
+      const ir::Register values = getRegister(llvmValues);<br>
+      const ir::Tuple tuple = ctx.arrayTuple(&values, 1);<br>
+      ctx.LOAD(type, tuple, ptr, addrSpace, 1, true, AM, SurfaceIndex, true);<br>
+    }<br>
+<br>
+    GBE_ASSERT(AI == AE);<br>
+  }<br>
+<br>
   /* append a new sampler. should be called before any reference to<br>
    * a sampler_t value. */<br>
   uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) {<br>
@@ -4762,6 +4823,10 @@ namespace gbe<br>
             ctx.LRP(ir::TYPE_FLOAT, dst, src0, src1, src2);<br>
             break;<br>
           }<br>
+          case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM:<br>
+            this->emitBlockReadWriteMemInst(I, CS, false); break;<br>
+          case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:<br>
+            this->emitBlockReadWriteMemInst(I, CS, true); break;<br>
           default: break;<br>
         }<br>
       }<br>
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx<br>
index 213ead0..003be91 100644<br>
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx<br>
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx<br>
@@ -202,7 +202,7 @@ DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_work_group_scan_<br>
 DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ALL, __gen_ocl_work_group_all)<br>
 DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ANY, __gen_ocl_work_group_any)<br>
<br>
-// work group function<br>
+// sub group function<br>
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BROADCAST, __gen_ocl_sub_group_broadcast)<br>
<br>
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_REDUCE_ADD, __gen_ocl_sub_group_reduce_add)<br>
@@ -217,5 +217,8 @@ DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_ADD, __gen_ocl_sub_group_scan_in<br>
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MAX, __gen_ocl_sub_group_scan_inclusive_max)<br>
 DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_sub_group_scan_inclusive_min)<br>
<br>
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_MEM, __gen_ocl_sub_group_block_read_mem)<br>
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_MEM, __gen_ocl_sub_group_block_write_mem)<br>
+<br>
 // common function<br>
 DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.7.4<br>
<br>
_______________________________________________<br>
Beignet mailing list<br>
<a href="mailto:Beignet@lists.freedesktop.org">Beignet@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/beignet" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/beignet</a><br>
</font></span></blockquote></div><br></div>