<div dir="ltr">This is great stuff. Will we also get the <span style="color:rgb(0,0,0);white-space:pre-wrap">intel_sub_group_block_* functions that take an image2d_t argument?</span><div><span style="color:rgb(0,0,0);white-space:pre-wrap"><br></span></div><div><span style="color:rgb(0,0,0);white-space:pre-wrap">Thanks,</span></div><div><span style="color:rgb(0,0,0);white-space:pre-wrap">Andrew</span></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, May 19, 2016 at 2:55 PM, Xiuli Pan <span dir="ltr"><<a href="mailto:xiuli.pan@intel.com" target="_blank">xiuli.pan@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Pan Xiuli <<a href="mailto:xiuli.pan@intel.com">xiuli.pan@intel.com</a>><br>
<br>
Using OWORD_BLOCK_RW to read/write a block of data for a thread.<br>
<br>
Signed-off-by: Pan Xiuli <<a href="mailto:xiuli.pan@intel.com">xiuli.pan@intel.com</a>><br>
---<br>
backend/src/backend/gen/gen_mesa_disasm.c | 15 +++++<br>
backend/src/backend/gen_context.cpp | 63 ++++++++++++++++++<br>
backend/src/backend/gen_context.hpp | 2 +<br>
backend/src/backend/gen_encoder.cpp | 38 ++++++++++-<br>
backend/src/backend/gen_encoder.hpp | 4 ++<br>
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +<br>
backend/src/backend/gen_insn_selection.cpp | 77 ++++++++++++++++++++--<br>
backend/src/backend/gen_insn_selection.hpp | 4 ++<br>
backend/src/backend/gen_insn_selection.hxx | 2 +<br>
backend/src/ir/instruction.cpp | 26 ++++++--<br>
backend/src/ir/instruction.hpp | 8 ++-<br>
backend/src/ir/liveness.cpp | 5 ++<br>
backend/src/libocl/CMakeLists.txt | 2 +-<br>
backend/src/libocl/src/ocl_substore.ll | 9 +++<br>
backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a> | 54 +++++++++++++++<br>
backend/src/libocl/tmpl/ocl_simd.tmpl.h | 11 ++++<br>
backend/src/llvm/llvm_gen_backend.cpp | 65 ++++++++++++++++++<br>
backend/src/llvm/llvm_gen_ocl_function.hxx | 5 +-<br>
18 files changed, 377 insertions(+), 15 deletions(-)<br>
create mode 100644 backend/src/libocl/src/ocl_substore.ll<br>
<br>
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c<br>
index 067ddd8..9200c26 100644<br>
--- a/backend/src/backend/gen/gen_mesa_disasm.c<br>
+++ b/backend/src/backend/gen/gen_mesa_disasm.c<br>
@@ -432,6 +432,14 @@ static const char *data_port_data_cache_category[] = {<br>
"scratch",<br>
};<br>
<br>
+static const char *data_port_data_cache_block_size[] = {<br>
+ "1 OWORD LOW",<br>
+ "1 OWORD HIGH",<br>
+ "2 OWORD",<br>
+ "4 OWORD",<br>
+ "8 OWORD",<br>
+};<br>
+<br>
static const char *data_port_scratch_block_size[] = {<br>
"1 register",<br>
"2 registers",<br>
@@ -576,6 +584,7 @@ static int gen_version;<br>
#define MSG_GW_ACKREQ(inst) GEN_BITS_FIELD(inst, bits3.gen7_msg_gw.ackreq)<br>
#define GENERIC_MSG_LENGTH(inst) GEN_BITS_FIELD(inst, bits3.generic_gen5.msg_length)<br>
#define GENERIC_RESPONSE_LENGTH(inst) GEN_BITS_FIELD(inst, bits3.generic_gen5.response_length)<br>
+#define OWORD_RW_BLOCK_SIZE(inst) GEN_BITS_FIELD(inst, bits3.gen7_oblock_rw.block_size)<br>
<br>
static int is_special_acc(const void* inst)<br>
{<br>
@@ -1483,6 +1492,12 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac<br>
data_port_data_cache_byte_scattered_simd_mode[BYTE_RW_SIMD_MODE(inst)],<br>
data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],<br>
data_port_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);<br>
+ else if(UNTYPED_RW_MSG_TYPE(inst) == 0 || UNTYPED_RW_MSG_TYPE(inst) == 8)<br>
+ format(file, " (bti: %d, data size: %s, %s, %s)",<br>
+ UNTYPED_RW_BTI(inst),<br>
+ data_port_data_cache_block_size[OWORD_RW_BLOCK_SIZE(inst)],<br>
+ data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],<br>
+ data_port_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);<br>
else<br>
format(file, " not implemented");<br>
} else {<br>
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp<br>
index 4d0a3f3..cfb8be1 100644<br>
--- a/backend/src/backend/gen_context.cpp<br>
+++ b/backend/src/backend/gen_context.cpp<br>
@@ -3487,6 +3487,69 @@ namespace gbe<br>
p->pop();<br>
}<br>
<br>
+ void GenContext::emitOBReadInstruction(const SelectionInstruction &insn) {<br>
+ const GenRegister dst = ra->genReg(insn.dst(0));<br>
+ const GenRegister addr = ra->genReg(insn.src(0));<br>
+ const GenRegister first = GenRegister::ud1grf(<a href="http://addr.nr" rel="noreferrer" target="_blank">addr.nr</a>,addr.subnr/sizeof(float));<br>
+ GenRegister header;<br>
+ if (simdWidth == 8)<br>
+ header = GenRegister::retype(ra->genReg(insn.src(1)), GEN_TYPE_F);<br>
+ else<br>
+ header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(1)),1), GEN_TYPE_F);<br>
+<br>
+ p->push();<br>
+ // Copy r0 into the header first<br>
+ p->curr.execWidth = 8;<br>
+ p->curr.predicate = GEN_PREDICATE_NONE;<br>
+ p->curr.noMask = 1;<br>
+ p->MOV(header, GenRegister::f8grf(0,0));<br>
+<br>
+ // Update the header with the current address<br>
+ p->curr.execWidth = 1;<br>
+ const uint32_t nr = <a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>;<br>
+ const uint32_t subnr = header.subnr / sizeof(float);<br>
+ p->SHR(GenRegister::ud1grf(nr, subnr+2), first, GenRegister::immud(4));<br>
+ //p->MOV(GenRegister::ud1grf(nr, subnr+2), first);<br>
+<br>
+ // Put zero in the general state base address<br>
+ p->MOV(GenRegister::f1grf(nr, subnr+5), GenRegister::immf(0));<br>
+<br>
+ p->pop();<br>
+ // Now read the data<br>
+ p->OBREAD(dst, header, insn.getbti(), insn.extra.elem);<br>
+ }<br>
+<br>
+ void GenContext::emitOBWriteInstruction(const SelectionInstruction &insn) {<br>
+ const GenRegister addr = ra->genReg(insn.src(2));<br>
+ const GenRegister first = GenRegister::ud1grf(<a href="http://addr.nr" rel="noreferrer" target="_blank">addr.nr</a>,addr.subnr/sizeof(float));<br>
+ GenRegister header;<br>
+ if (simdWidth == 8)<br>
+ header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_F);<br>
+ else<br>
+ header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(0)),1), GEN_TYPE_F);<br>
+<br>
+ p->push();<br>
+ // Copy r0 into the header first<br>
+ p->curr.execWidth = 8;<br>
+ p->curr.predicate = GEN_PREDICATE_NONE;<br>
+ p->curr.noMask = 1;<br>
+ p->MOV(header, GenRegister::f8grf(0,0));<br>
+<br>
+ // Update the header with the current address<br>
+ p->curr.execWidth = 1;<br>
+ const uint32_t nr = <a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>;<br>
+ const uint32_t subnr = header.subnr / sizeof(float);<br>
+ p->SHR(GenRegister::ud1grf(nr, subnr+2), first, GenRegister::immud(4));<br>
+<br>
+ // Put zero in the general state base address<br>
+ p->MOV(GenRegister::f1grf(nr, subnr+5), GenRegister::immf(0));<br>
+<br>
+ p->pop();<br>
+ // Now write the data<br>
+ p->OBWRITE(header, insn.getbti(), insn.extra.elem);<br>
+ }<br>
+<br>
+<br>
BVAR(OCL_OUTPUT_REG_ALLOC, false);<br>
BVAR(OCL_OUTPUT_ASM, false);<br>
<br>
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp<br>
index 4c43ccb..56a5ec2 100644<br>
--- a/backend/src/backend/gen_context.hpp<br>
+++ b/backend/src/backend/gen_context.hpp<br>
@@ -187,6 +187,8 @@ namespace gbe<br>
void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);<br>
unsigned beforeMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned desc);<br>
void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned jip0);<br>
+ virtual void emitOBReadInstruction(const SelectionInstruction &insn);<br>
+ virtual void emitOBWriteInstruction(const SelectionInstruction &insn);<br>
<br>
/*! Implements base class */<br>
virtual Kernel *allocateKernel(void);<br>
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp<br>
index 31afa67..fc7b5cf 100644<br>
--- a/backend/src/backend/gen_encoder.cpp<br>
+++ b/backend/src/backend/gen_encoder.cpp<br>
@@ -258,7 +258,7 @@ namespace gbe<br>
else<br>
NOT_SUPPORTED;<br>
}<br>
-#if 0<br>
+<br>
static void setOBlockRW(GenEncoder *p,<br>
GenNativeInstruction *insn,<br>
uint32_t bti,<br>
@@ -272,10 +272,10 @@ namespace gbe<br>
assert(size == 2 || size == 4);<br>
insn->bits3.gen7_oblock_rw.msg_type = msg_type;<br>
insn->bits3.gen7_oblock_rw.bti = bti;<br>
+ GBE_ASSERT(size == 2 || size == 4);<br>
insn->bits3.gen7_oblock_rw.block_size = size == 2 ? 2 : 3;<br>
insn->bits3.gen7_oblock_rw.header_present = 1;<br>
}<br>
-#endif<br>
<br>
static void setDWordScatterMessgae(GenEncoder *p,<br>
GenNativeInstruction *insn,<br>
@@ -1244,6 +1244,40 @@ namespace gbe<br>
setScratchMessage(this, insn, offset, block_size, channel_mode, GEN_SCRATCH_READ, 1, dst_num);<br>
}<br>
<br>
+ void GenEncoder::OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) {<br>
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);<br>
+ const uint32_t msg_length = 1;<br>
+ const uint32_t response_length = size / 2; // Size is in owords<br>
+ this->setHeader(insn);<br>
+ this->setDst(insn, GenRegister::uw16grf(<a href="http://dst.nr" rel="noreferrer" target="_blank">dst.nr</a>, 0));<br>
+ this->setSrc0(insn, GenRegister::ud8grf(<a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>, 0));<br>
+ this->setSrc1(insn, GenRegister::immud(0));<br>
+ setOBlockRW(this,<br>
+ insn,<br>
+ bti,<br>
+ size,<br>
+ GEN7_OBLOCK_READ,<br>
+ msg_length,<br>
+ response_length);<br>
+ }<br>
+<br>
+ void GenEncoder::OBWRITE(GenRegister header, uint32_t bti, uint32_t size) {<br>
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);<br>
+ const uint32_t msg_length = 1 + size / 2; // Size is in owords<br>
+ const uint32_t response_length = 0;<br>
+ this->setHeader(insn);<br>
+ this->setSrc0(insn, GenRegister::ud8grf(<a href="http://header.nr" rel="noreferrer" target="_blank">header.nr</a>, 0));<br>
+ this->setSrc1(insn, GenRegister::immud(0));<br>
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));<br>
+ setOBlockRW(this,<br>
+ insn,<br>
+ bti,<br>
+ size,<br>
+ GEN7_OBLOCK_WRITE,<br>
+ msg_length,<br>
+ response_length);<br>
+ }<br>
+<br>
void GenEncoder::EOT(uint32_t msg) {<br>
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);<br>
this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));<br>
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp<br>
index 0239293..a53c879 100644<br>
--- a/backend/src/backend/gen_encoder.hpp<br>
+++ b/backend/src/backend/gen_encoder.hpp<br>
@@ -267,6 +267,10 @@ namespace gbe<br>
virtual bool canHandleLong(uint32_t opcode, GenRegister dst, GenRegister src0,<br>
GenRegister src1 = GenRegister::null());<br>
virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null());<br>
+ /*! OBlock read */<br>
+ void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);<br>
+ /*! OBlock write */<br>
+ void OBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize);<br>
<br>
GBE_CLASS(GenEncoder); //!< Use custom allocators<br>
virtual void alu3(uint32_t opcode, GenRegister dst,<br>
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx<br>
index cb5c4f1..d297726 100644<br>
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx<br>
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx<br>
@@ -50,3 +50,5 @@ DECL_GEN7_SCHEDULE(StoreProfiling, 80, 1, 1)<br>
DECL_GEN7_SCHEDULE(WorkGroupOp, 80, 1, 1)<br>
DECL_GEN7_SCHEDULE(SubGroupOp, 80, 1, 1)<br>
DECL_GEN7_SCHEDULE(Printf, 80, 1, 1)<br>
+DECL_GEN7_SCHEDULE(OBRead, 80, 1, 1)<br>
+DECL_GEN7_SCHEDULE(OBWrite, 80, 1, 1)<br>
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp<br>
index 596e70b..7c49242 100644<br>
--- a/backend/src/backend/gen_insn_selection.cpp<br>
+++ b/backend/src/backend/gen_insn_selection.cpp<br>
@@ -188,7 +188,8 @@ namespace gbe<br>
this->opcode == SEL_OP_BYTE_GATHER ||<br>
this->opcode == SEL_OP_SAMPLE ||<br>
this->opcode == SEL_OP_VME ||<br>
- this->opcode == SEL_OP_DWORD_GATHER;<br>
+ this->opcode == SEL_OP_DWORD_GATHER ||<br>
+ this->opcode == SEL_OP_OBREAD;<br>
}<br>
<br>
bool SelectionInstruction::modAcc(void) const {<br>
@@ -210,7 +211,8 @@ namespace gbe<br>
this->opcode == SEL_OP_WRITE64 ||<br>
this->opcode == SEL_OP_ATOMIC ||<br>
this->opcode == SEL_OP_BYTE_SCATTER ||<br>
- this->opcode == SEL_OP_TYPED_WRITE;<br>
+ this->opcode == SEL_OP_TYPED_WRITE ||<br>
+ this->opcode == SEL_OP_OBWRITE;<br>
}<br>
<br>
bool SelectionInstruction::isBranch(void) const {<br>
@@ -697,6 +699,11 @@ namespace gbe<br>
/*! Sub Group Operations */<br>
void SUBGROUP_OP(uint32_t wg_op, Reg dst, GenRegister src,<br>
GenRegister tmpData1, GenRegister tmpData2);<br>
+ /*! Oblock read */<br>
+ void OBREAD(GenRegister dst, GenRegister addr, GenRegister header, uint32_t bti, uint32_t size);<br>
+ /*! Oblock write */<br>
+ void OBWRITE(GenRegister addr, GenRegister value, GenRegister header, uint32_t bti, uint32_t size);<br>
+<br>
/* common functions for both binary instruction and sel_cmp and compare instruction.<br>
It will handle the IMM or normal register assignment, and will try to avoid LOADI<br>
as much as possible. */<br>
@@ -2014,6 +2021,40 @@ namespace gbe<br>
insn->src(0) = src;<br>
insn->src(1) = tmpData2;<br>
}<br>
+ void Selection::Opaque::OBREAD(GenRegister dst,<br>
+ GenRegister addr,<br>
+ GenRegister header,<br>
+ uint32_t bti,<br>
+ uint32_t size) {<br>
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_OBREAD, 1, 2);<br>
+ insn->dst(0) = dst;<br>
+ insn->src(0) = addr;<br>
+ insn->src(1) = header;<br>
+ insn->setbti(bti);<br>
+ insn->extra.elem = size / sizeof(int[4]); // number of owords<br>
+ }<br>
+<br>
+ void Selection::Opaque::OBWRITE(GenRegister addr,<br>
+ GenRegister value,<br>
+ GenRegister header,<br>
+ uint32_t bti,<br>
+ uint32_t size) {<br>
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_OBWRITE, 0, 3);<br>
+ SelectionVector *vector = this->appendVector();<br>
+ insn->src(0) = header;<br>
+ insn->src(1) = value;<br>
+ insn->src(2) = addr;<br>
+ insn->state = this->curr;<br>
+ insn->setbti(bti);<br>
+ insn->extra.elem = size / sizeof(int[4]); // number of owords<br>
+<br>
+ // We need to put the header and the data together<br>
+ vector->regNum = 2;<br>
+ vector->reg = &insn->src(0);<br>
+ vector->offsetID = 0;<br>
+ vector->isSrc = 1;<br>
+ }<br>
+<br>
<br>
// Boiler plate to initialize the selection library at c++ pre-main<br>
static SelectionLibrary *selLib = NULL;<br>
@@ -4002,6 +4043,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
}<br>
}<br>
<br>
+ void emitOWordRead(Selection::Opaque &sel,<br>
+ const ir::LoadInstruction &insn,<br>
+ GenRegister address,<br>
+ ir::BTI bti) const<br>
+ {<br>
+ using namespace ir;<br>
+ const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);<br>
+ const GenRegister value = sel.selReg(insn.getValue(0), TYPE_U32);<br>
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();<br>
+ sel.OBREAD(value, address, header, bti.imm, simdWidth * sizeof(int));<br>
+ }<br>
+<br>
// check whether all binded table index point to constant memory<br>
INLINE bool isAllConstant(const ir::BTI &bti) const {<br>
if (bti.isConst && bti.imm == BTI_CONSTANT)<br>
@@ -4037,7 +4090,9 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
const uint32_t elemSize = getByteScatterGatherSize(sel, type);<br>
bool allConstant = isAllConstant(bti);<br>
<br>
- if (allConstant) {<br>
+ if (insn.isBlock())<br>
+ this->emitOWordRead(sel, insn, address, bti);<br>
+ else if (allConstant) {<br>
// XXX TODO read 64bit constant through constant cache<br>
// Per HW Spec, constant cache messages can read at least DWORD data.<br>
// So, byte/short data type, we have to read through data cache.<br>
@@ -4164,6 +4219,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
}<br>
}<br>
<br>
+ void emitOWordWrite(Selection::Opaque &sel,<br>
+ const ir::StoreInstruction &insn,<br>
+ GenRegister address,<br>
+ ir::BTI bti) const<br>
+ {<br>
+ using namespace ir;<br>
+ const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);<br>
+ const GenRegister value = sel.selReg(insn.getValue(0), TYPE_U32);<br>
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();<br>
+ sel.OBWRITE(address, value, header, bti.imm, simdWidth * sizeof(int));<br>
+ }<br>
+<br>
virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const<br>
{<br>
using namespace ir;<br>
@@ -4185,7 +4252,9 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp<br>
assert(0 && "stateless not supported yet");<br>
}<br>
<br>
- if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)<br>
+ if (insn.isBlock())<br>
+ this->emitOWordWrite(sel, insn, address, bti);<br>
+ else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)<br>
this->emitWrite64(sel, insn, address, bti);<br>
else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)<br>
this->emitUntypedWrite(sel, insn, address, bti);<br>
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp<br>
index 8d2e1da..51af686 100644<br>
--- a/backend/src/backend/gen_insn_selection.hpp<br>
+++ b/backend/src/backend/gen_insn_selection.hpp<br>
@@ -175,6 +175,8 @@ namespace gbe<br>
INLINE uint32_t getbti() const {<br>
GBE_ASSERT(isRead() || isWrite());<br>
switch (opcode) {<br>
+ case SEL_OP_OBREAD:<br>
+ case SEL_OP_OBWRITE:<br>
case SEL_OP_DWORD_GATHER: return extra.function;<br>
case SEL_OP_SAMPLE: return extra.rdbti;<br>
case SEL_OP_VME: return extra.vme_bti;<br>
@@ -188,6 +190,8 @@ namespace gbe<br>
INLINE void setbti(uint32_t bti) {<br>
GBE_ASSERT(isRead() || isWrite());<br>
switch (opcode) {<br>
+ case SEL_OP_OBREAD:<br>
+ case SEL_OP_OBWRITE:<br>
case SEL_OP_DWORD_GATHER: extra.function = bti; return;<br>
case SEL_OP_SAMPLE: extra.rdbti = bti; return;<br>
case SEL_OP_VME: extra.vme_bti = bti; return;<br>
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx<br>
index 0e11f9f..4a7caff 100644<br>
--- a/backend/src/backend/gen_insn_selection.hxx<br>
+++ b/backend/src/backend/gen_insn_selection.hxx<br>
@@ -96,3 +96,5 @@ DECL_SELECTION_IR(STORE_PROFILING, StoreProfilingInstruction)<br>
DECL_SELECTION_IR(WORKGROUP_OP, WorkGroupOpInstruction)<br>
DECL_SELECTION_IR(SUBGROUP_OP, SubGroupOpInstruction)<br>
DECL_SELECTION_IR(PRINTF, PrintfInstruction)<br>
+DECL_SELECTION_IR(OBREAD, OBReadInstruction)<br>
+DECL_SELECTION_IR(OBWRITE, OBWriteInstruction)<br>
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp<br>
index 47606b2..88491a7 100644<br>
--- a/backend/src/ir/instruction.cpp<br>
+++ b/backend/src/ir/instruction.cpp<br>
@@ -483,10 +483,12 @@ namespace ir {<br>
AddressSpace AS,<br>
uint32_t _valueNum,<br>
bool dwAligned,<br>
- AddressMode AM)<br>
+ AddressMode AM,<br>
+ bool ifBlock = false)<br>
: MemInstruction(AM, AS, dwAligned, type, offset),<br>
valueNum(_valueNum),<br>
- values(dstValues)<br>
+ values(dstValues),<br>
+ ifBlock(ifBlock)<br>
{<br>
this->opcode = OP_LOAD;<br>
}<br>
@@ -519,9 +521,11 @@ namespace ir {<br>
}<br>
INLINE bool wellFormed(const Function &fn, std::string &why) const;<br>
INLINE void out(std::ostream &out, const Function &fn) const;<br>
+ INLINE bool isBlock() const { return ifBlock; }<br>
<br>
uint8_t valueNum;<br>
Tuple values;<br>
+ bool ifBlock;<br>
};<br>
class ALIGNED_INSTRUCTION StoreInstruction :<br>
public MemInstruction,<br>
@@ -534,12 +538,14 @@ namespace ir {<br>
AddressSpace addrSpace,<br>
uint32_t valueNum,<br>
bool dwAligned,<br>
- AddressMode AM)<br>
+ AddressMode AM,<br>
+ bool ifBlock = false)<br>
: MemInstruction(AM, addrSpace, dwAligned, type, offset)<br>
{<br>
this->opcode = OP_STORE;<br>
this->values = values;<br>
this->valueNum = valueNum;<br>
+ this->ifBlock = ifBlock;<br>
}<br>
INLINE unsigned getValueNum() const { return valueNum; }<br>
INLINE Register getValue(const Function &fn, unsigned id) const {<br>
@@ -565,9 +571,12 @@ namespace ir {<br>
}<br>
INLINE bool wellFormed(const Function &fn, std::string &why) const;<br>
INLINE void out(std::ostream &out, const Function &fn) const;<br>
+ INLINE bool isBlock() const { return ifBlock; }<br>
+<br>
Register dst[0];<br>
uint8_t valueNum;<br>
Tuple values;<br>
+ bool ifBlock;<br>
};<br>
<br>
class ALIGNED_INSTRUCTION SampleInstruction : // TODO<br>
@@ -1655,6 +1664,8 @@ namespace ir {<br>
}<br>
<br>
INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) const {<br>
+ if(ifBlock)<br>
+ out<< "BLOCK";<br>
this->outOpcode(out);<br>
out << "." << type << "." << AS << (dwAligned ? "." : ".un") << "aligned";<br>
out << " {";<br>
@@ -1672,6 +1683,8 @@ namespace ir {<br>
}<br>
<br>
INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) const {<br>
+ if(ifBlock)<br>
+ out<< "BLOCK";<br>
this->outOpcode(out);<br>
out << "." << type << "." << AS << (dwAligned ? "." : ".un") << "aligned";<br>
out << " %" << this->getSrc(fn, 0) << " {";<br>
@@ -2221,7 +2234,9 @@ DECL_MEM_FN(MemInstruction, bool, isAligned(void), isAligned())<br>
DECL_MEM_FN(MemInstruction, unsigned, getAddressIndex(void), getAddressIndex())<br>
DECL_MEM_FN(AtomicInstruction, AtomicOps, getAtomicOpcode(void), getAtomicOpcode())<br>
DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum())<br>
+DECL_MEM_FN(StoreInstruction, bool, isBlock(void), isBlock())<br>
DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())<br>
+DECL_MEM_FN(LoadInstruction, bool, isBlock(void), isBlock())<br>
DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())<br>
DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())<br>
DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())<br>
@@ -2475,9 +2490,10 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg))<br>
uint32_t valueNum, \<br>
bool dwAligned, \<br>
AddressMode AM, \<br>
- unsigned SurfaceIndex) \<br>
+ unsigned SurfaceIndex, \<br>
+ bool isBlock) \<br>
{ \<br>
- internal::CLASS insn = internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM); \<br>
+ internal::CLASS insn = internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM, isBlock); \<br>
insn.setSurfaceIndex(SurfaceIndex);\<br>
return insn.convert(); \<br>
} \<br>
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp<br>
index 799a7bf..4a5811b 100644<br>
--- a/backend/src/ir/instruction.hpp<br>
+++ b/backend/src/ir/instruction.hpp<br>
@@ -356,6 +356,8 @@ namespace ir {<br>
}<br>
/*! Return true if the given instruction is an instance of this class */<br>
static bool isClassOf(const Instruction &insn);<br>
+ /*! Return true if the given instruction is block write */<br>
+ bool isBlock() const;<br>
};<br>
<br>
/*! Load instruction. The source is simply the address where to get the data.<br>
@@ -372,6 +374,8 @@ namespace ir {<br>
}<br>
/*! Return true if the given instruction is an instance of this class */<br>
static bool isClassOf(const Instruction &insn);<br>
+ /*! Return true if the given instruction is block read */<br>
+ bool isBlock() const;<br>
};<br>
<br>
/*! Load immediate instruction loads an typed immediate value into the given<br>
@@ -827,10 +831,10 @@ namespace ir {<br>
/*! ret */<br>
Instruction RET(void);<br>
/*! load.type.space {dst1,...,dst_valueNum} offset value, {bti} */<br>
- Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex);<br>
+ Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex, bool isBlock = false);<br>
Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti);<br>
/*! store.type.space offset {src1,...,src_valueNum} value {bti}*/<br>
- Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex);<br>
+ Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned SurfaceIndex, bool isBlock = false);<br>
Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti);<br>
/*! loadi.type dst value */<br>
Instruction LOADI(Type type, Register dst, ImmediateIndex value);<br>
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp<br>
index d48f067..3162d13 100644<br>
--- a/backend/src/ir/liveness.cpp<br>
+++ b/backend/src/ir/liveness.cpp<br>
@@ -117,11 +117,16 @@ namespace ir {<br>
if (insn.getOpcode() == ir::OP_SIMD_ID)<br>
uniform = false;<br>
<br>
+ // do not change dst uniform for block read<br>
+ if (insn.getOpcode() == ir::OP_LOAD && ir::cast<ir::LoadInstruction>(insn).isBlock())<br>
+ uniform = false;<br>
+<br>
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {<br>
const Register reg = insn.getSrc(srcID);<br>
if (!fn.isUniformRegister(reg))<br>
uniform = false;<br>
}<br>
+<br>
// A destination is a killed value<br>
for (uint32_t dstID = 0; dstID < dstNum; ++dstID) {<br>
const Register reg = insn.getDst(dstID);<br>
diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt<br>
index 1d1ec68..83e767c 100644<br>
--- a/backend/src/libocl/CMakeLists.txt<br>
+++ b/backend/src/libocl/CMakeLists.txt<br>
@@ -182,7 +182,7 @@ MACRO(ADD_LL_TO_BC_TARGET M)<br>
)<br>
ENDMACRO(ADD_LL_TO_BC_TARGET)<br>
<br>
-SET (OCL_LL_MODULES ocl_barrier ocl_clz)<br>
+SET (OCL_LL_MODULES ocl_barrier ocl_clz ocl_substore)<br>
FOREACH(f ${OCL_LL_MODULES})<br>
COPY_THE_LL(${f})<br>
ADD_LL_TO_BC_TARGET(${f})<br>
diff --git a/backend/src/libocl/src/ocl_substore.ll b/backend/src/libocl/src/ocl_substore.ll<br>
new file mode 100644<br>
index 0000000..665cdfa<br>
--- /dev/null<br>
+++ b/backend/src/libocl/src/ocl_substore.ll<br>
@@ -0,0 +1,9 @@<br>
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"<br>
+target triple = "spir"<br>
+<br>
+declare void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* nocapture, i32) nounwind alwaysinline noduplicate<br>
+<br>
+define void @_Z27intel_sub_group_block_writePKU3AS1jj(i32 addrspace(1)* %p, i32 %data) nounwind alwaysinline noduplicate {<br>
+ call void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* %p, i32 %data)<br>
+ ret void<br>
+}<br>
diff --git a/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a> b/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a><br>
index a25dcef..66490cc 100644<br>
--- a/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a><br>
+++ b/backend/src/libocl/tmpl/<a href="http://ocl_simd.tmpl.cl" rel="noreferrer" target="_blank">ocl_simd.tmpl.cl</a><br>
@@ -133,3 +133,57 @@ RANGE_OP(scan_exclusive, max, float, true)<br>
RANGE_OP(scan_exclusive, max, double, true)<br>
<br>
#undef RANGE_OP<br>
+PURE CONST uint __gen_ocl_sub_group_block_read_mem(const global uint* p);<br>
+OVERLOADABLE uint intel_sub_group_block_read(const global uint* p)<br>
+{<br>
+ return __gen_ocl_sub_group_block_read_mem(p);<br>
+}<br>
+OVERLOADABLE uint2 intel_sub_group_block_read2(const global uint* p)<br>
+{<br>
+ return (uint2)(intel_sub_group_block_read(p),<br>
+ intel_sub_group_block_read(p + get_simd_size()));<br>
+}<br>
+OVERLOADABLE uint4 intel_sub_group_block_read4(const global uint* p)<br>
+{<br>
+ return (uint4)(intel_sub_group_block_read(p),<br>
+ intel_sub_group_block_read(p + get_simd_size()),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 2),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 3));<br>
+<br>
+}<br>
+OVERLOADABLE uint8 intel_sub_group_block_read8(const global uint* p)<br>
+{<br>
+ return (uint8)(intel_sub_group_block_read(p),<br>
+ intel_sub_group_block_read(p + get_simd_size()),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 2),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 3),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 4),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 5),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 6),<br>
+ intel_sub_group_block_read(p + get_simd_size() * 7));<br>
+}<br>
+<br>
+OVERLOADABLE void intel_sub_group_block_write2(const global uint* p, uint2 data)<br>
+{<br>
+ intel_sub_group_block_write(p, data.s0);<br>
+ intel_sub_group_block_write(p + get_simd_size(), data.s1);<br>
+}<br>
+OVERLOADABLE void intel_sub_group_block_write4(const global uint* p,uint4 data)<br>
+{<br>
+ intel_sub_group_block_write(p, data.s0);<br>
+ intel_sub_group_block_write(p + get_simd_size(), data.s1);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 2, data.s2);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 3, data.s3);<br>
+<br>
+}<br>
+OVERLOADABLE void intel_sub_group_block_write8(const global uint* p,uint8 data)<br>
+{<br>
+ intel_sub_group_block_write(p, data.s0);<br>
+ intel_sub_group_block_write(p + get_simd_size(), data.s1);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 2, data.s2);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 3, data.s3);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 4, data.s4);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 5, data.s5);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 6, data.s6);<br>
+ intel_sub_group_block_write(p + get_simd_size() * 7, data.s7);<br>
+}<br>
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h<br>
index 355ee30..d0676be 100644<br>
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h<br>
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h<br>
@@ -132,3 +132,14 @@ OVERLOADABLE double sub_group_scan_exclusive_max(double x);<br>
OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);<br>
OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);<br>
OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);<br>
+<br>
+/* blocak read/write */<br>
+OVERLOADABLE uint intel_sub_group_block_read(const global uint* p);<br>
+OVERLOADABLE uint2 intel_sub_group_block_read2(const global uint* p);<br>
+OVERLOADABLE uint4 intel_sub_group_block_read4(const global uint* p);<br>
+OVERLOADABLE uint8 intel_sub_group_block_read8(const global uint* p);<br>
+<br>
+OVERLOADABLE void intel_sub_group_block_write(const __global uint* p, uint data);<br>
+OVERLOADABLE void intel_sub_group_block_write2(const __global uint* p, uint2 data);<br>
+OVERLOADABLE void intel_sub_group_block_write4(const __global uint* p, uint4 data);<br>
+OVERLOADABLE void intel_sub_group_block_write8(const __global uint* p, uint8 data);<br>
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp<br>
index 3ddbfcc..e77290f 100644<br>
--- a/backend/src/llvm/llvm_gen_backend.cpp<br>
+++ b/backend/src/llvm/llvm_gen_backend.cpp<br>
@@ -697,6 +697,8 @@ namespace gbe<br>
void emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode);<br>
// Emit subgroup instructions<br>
void emitSubGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode);<br>
+ // Emit subgroup instructions<br>
+ void emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite);<br>
<br>
uint8_t appendSampler(CallSite::arg_iterator AI);<br>
uint8_t getImageID(CallInst &I);<br>
@@ -3730,6 +3732,9 @@ namespace gbe<br>
case GEN_OCL_LRP:<br>
this->newRegister(&I);<br>
break;<br>
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM:<br>
+ this->newRegister(&I, NULL, false);<br>
+ break;<br>
case GEN_OCL_PRINTF:<br>
this->newRegister(&I); // fall through<br>
case GEN_OCL_PUTS:<br>
@@ -3744,6 +3749,7 @@ namespace gbe<br>
case GEN_OCL_CALC_TIMESTAMP:<br>
case GEN_OCL_STORE_PROFILING:<br>
case GEN_OCL_DEBUGWAIT:<br>
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:<br>
break;<br>
case GEN_OCL_NOT_FOUND:<br>
default:<br>
@@ -3938,6 +3944,61 @@ namespace gbe<br>
GBE_ASSERT(AI == AE);<br>
}<br>
<br>
+ void GenWriter::emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite) {<br>
+ CallSite::arg_iterator AI = CS.arg_begin();<br>
+ CallSite::arg_iterator AE = CS.arg_end();<br>
+ GBE_ASSERT(AI != AE);<br>
+<br>
+ Value *llvmPtr = *(AI++);<br>
+ Value *llvmValues;<br>
+ ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmPtr->getType()->getPointerAddressSpace());<br>
+ GBE_ASSERT(addrSpace == ir::MEM_GLOBAL);<br>
+ ir::Register pointer = this->getRegister(llvmPtr);<br>
+<br>
+ ir::Register ptr;<br>
+ ir::Register btiReg;<br>
+ unsigned SurfaceIndex = 0xff;<br>
+<br>
+ ir::AddressMode AM;<br>
+ if (legacyMode) {<br>
+ Value *bti = getBtiRegister(llvmPtr);<br>
+ Value *ptrBase = getPointerBase(llvmPtr);<br>
+ ir::Register baseReg = this->getRegister(ptrBase);<br>
+ if (isa<ConstantInt>(bti)) {<br>
+ AM = ir::AM_StaticBti;<br>
+ SurfaceIndex = cast<ConstantInt>(bti)->getZExtValue();<br>
+ addrSpace = btiToGen(SurfaceIndex);<br>
+ } else {<br>
+ AM = ir::AM_DynamicBti;<br>
+ addrSpace = ir::MEM_MIXED;<br>
+ btiReg = this->getRegister(bti);<br>
+ }<br>
+ const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();<br>
+ ptr = ctx.reg(pointerFamily);<br>
+ ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg);<br>
+ } else {<br>
+ AM = ir::AM_Stateless;<br>
+ ptr = pointer;<br>
+ }<br>
+<br>
+ ir::Type type = ir::TYPE_U32;<br>
+ GBE_ASSERT(AM != ir::AM_DynamicBti);<br>
+<br>
+ if(isWrite){<br>
+ llvmValues = *(AI++);<br>
+ const ir::Register values = getRegister(llvmValues);<br>
+ const ir::Tuple tuple = ctx.arrayTuple(&values, 1);<br>
+ ctx.STORE(type, tuple, ptr, addrSpace, 1, true, AM, SurfaceIndex, true);<br>
+ } else {<br>
+ llvmValues = &I;<br>
+ const ir::Register values = getRegister(llvmValues);<br>
+ const ir::Tuple tuple = ctx.arrayTuple(&values, 1);<br>
+ ctx.LOAD(type, tuple, ptr, addrSpace, 1, true, AM, SurfaceIndex, true);<br>
+ }<br>
+<br>
+ GBE_ASSERT(AI == AE);<br>
+ }<br>
+<br>
/* append a new sampler. should be called before any reference to<br>
* a sampler_t value. */<br>
uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) {<br>
@@ -4762,6 +4823,10 @@ namespace gbe<br>
ctx.LRP(ir::TYPE_FLOAT, dst, src0, src1, src2);<br>
break;<br>
}<br>
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM:<br>
+ this->emitBlockReadWriteMemInst(I, CS, false); break;<br>
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:<br>
+ this->emitBlockReadWriteMemInst(I, CS, true); break;<br>
default: break;<br>
}<br>
}<br>
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx<br>
index 213ead0..003be91 100644<br>
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx<br>
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx<br>
@@ -202,7 +202,7 @@ DECL_LLVM_GEN_FUNCTION(WORK_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_work_group_scan_<br>
DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ALL, __gen_ocl_work_group_all)<br>
DECL_LLVM_GEN_FUNCTION(WORK_GROUP_ANY, __gen_ocl_work_group_any)<br>
<br>
-// work group function<br>
+// sub group function<br>
DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BROADCAST, __gen_ocl_sub_group_broadcast)<br>
<br>
DECL_LLVM_GEN_FUNCTION(SUB_GROUP_REDUCE_ADD, __gen_ocl_sub_group_reduce_add)<br>
@@ -217,5 +217,8 @@ DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_ADD, __gen_ocl_sub_group_scan_in<br>
DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MAX, __gen_ocl_sub_group_scan_inclusive_max)<br>
DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_sub_group_scan_inclusive_min)<br>
<br>
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_MEM, __gen_ocl_sub_group_block_read_mem)<br>
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_MEM, __gen_ocl_sub_group_block_write_mem)<br>
+<br>
// common function<br>
DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.7.4<br>
<br>
_______________________________________________<br>
Beignet mailing list<br>
<a href="mailto:Beignet@lists.freedesktop.org">Beignet@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/beignet" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/beignet</a><br>
</font></span></blockquote></div><br></div>