[Beignet] [PATCH V2 11/12] Backend: Add intel_sub_group_block_read/write form image
Xiuli Pan
xiuli.pan at intel.com
Thu Jun 2 00:24:48 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Using meida block read/write to read data in block. In simd16 mode the
need some reg relocation for later use.
GEN7 has some different data port.
V2: Refine block read simd16 with tmp reg to avoide MOVs
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 27 +++-
backend/src/backend/gen7_encoder.cpp | 48 +++++++
backend/src/backend/gen7_encoder.hpp | 4 +
backend/src/backend/gen7_instruction.hpp | 16 +++
backend/src/backend/gen8_instruction.hpp | 16 +++
backend/src/backend/gen_context.cpp | 155 +++++++++++++++++++++
backend/src/backend/gen_context.hpp | 2 +
backend/src/backend/gen_defs.hpp | 16 +++
backend/src/backend/gen_encoder.cpp | 47 +++++++
backend/src/backend/gen_encoder.hpp | 4 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
backend/src/backend/gen_insn_selection.cpp | 115 ++++++++++++++-
backend/src/backend/gen_insn_selection.hpp | 4 +
backend/src/backend/gen_insn_selection.hxx | 2 +
backend/src/ir/instruction.cpp | 112 ++++++++++++++-
backend/src/ir/instruction.hpp | 22 +++
backend/src/ir/instruction.hxx | 2 +
backend/src/ir/liveness.cpp | 3 +-
backend/src/libocl/src/ocl_substore.ll | 33 +++++
backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 21 +++
backend/src/libocl/tmpl/ocl_simd.tmpl.h | 10 ++
backend/src/llvm/llvm_gen_backend.cpp | 62 ++++++++-
backend/src/llvm/llvm_gen_ocl_function.hxx | 8 ++
backend/src/llvm/llvm_scalarize.cpp | 14 ++
24 files changed, 732 insertions(+), 13 deletions(-)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index 9200c26..9955dfc 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -1476,6 +1476,15 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac
SAMPLER_MSG_TYPE(inst),
SAMPLER_SIMD_MODE(inst));
break;
+ case GEN_SFID_DATAPORT_RENDER:
+ if(UNTYPED_RW_MSG_TYPE(inst) == 4 || UNTYPED_RW_MSG_TYPE(inst) == 10)
+ format(file, " (bti: %d, %s, %s)",
+ UNTYPED_RW_BTI(inst),
+ data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
+ data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
+ else
+ format(file, " not implemented");
+ break;
case GEN_SFID_DATAPORT_DATA:
if(UNTYPED_RW_CATEGORY(inst) == 0) {
if(UNTYPED_RW_MSG_TYPE(inst) == 5 || UNTYPED_RW_MSG_TYPE(inst) == 13)
@@ -1510,12 +1519,18 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac
}
break;
case GEN_SFID_DATAPORT1_DATA:
- format(file, " (bti: %d, rgba: %d, %s, %s, %s)",
- UNTYPED_RW_BTI(inst),
- UNTYPED_RW_RGBA(inst),
- data_port_data_cache_simd_mode[UNTYPED_RW_SIMD_MODE(inst)],
- data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
- data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
+ if(UNTYPED_RW_MSG_TYPE(inst) == 4 || UNTYPED_RW_MSG_TYPE(inst) == 10)
+ format(file, " (bti: %d, %s, %s)",
+ UNTYPED_RW_BTI(inst),
+ data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
+ data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
+ else
+ format(file, " (bti: %d, rgba: %d, %s, %s, %s)",
+ UNTYPED_RW_BTI(inst),
+ UNTYPED_RW_RGBA(inst),
+ data_port_data_cache_simd_mode[UNTYPED_RW_SIMD_MODE(inst)],
+ data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
+ data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
break;
case GEN_SFID_DATAPORT_CONSTANT:
format(file, " (bti: %d, %s)",
diff --git a/backend/src/backend/gen7_encoder.cpp b/backend/src/backend/gen7_encoder.cpp
index fc358be..abb8b77 100644
--- a/backend/src/backend/gen7_encoder.cpp
+++ b/backend/src/backend/gen7_encoder.cpp
@@ -239,5 +239,53 @@ namespace gbe
}
}
+ static void setMBlockRWGEN7(GenEncoder *p,
+ GenNativeInstruction *insn,
+ uint32_t bti,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_RENDER;
+ p->setMessageDescriptor(insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_mblock_rw.msg_type = msg_type;
+ insn->bits3.gen7_mblock_rw.bti = bti;
+ insn->bits3.gen7_mblock_rw.header_present = 1;
+ }
+
+
+ void Gen7Encoder::MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ const uint32_t msg_length = 1;
+ const uint32_t response_length = size; // Size of registers
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::ud8grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setMBlockRWGEN7(this,
+ insn,
+ bti,
+ GEN75_P1_MEDIA_BREAD,
+ msg_length,
+ response_length);
+ }
+
+ void Gen7Encoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t size) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ const uint32_t msg_length = 1 + size;
+ const uint32_t response_length = 0; // Size of registers
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setMBlockRWGEN7(this,
+ insn,
+ bti,
+ GEN75_P1_MEDIA_TYPED_BWRITE,
+ msg_length,
+ response_length);
+ }
+
+
#undef NO_SWIZZLE
}
diff --git a/backend/src/backend/gen7_encoder.hpp b/backend/src/backend/gen7_encoder.hpp
index 1276c67..edb711d 100644
--- a/backend/src/backend/gen7_encoder.hpp
+++ b/backend/src/backend/gen7_encoder.hpp
@@ -42,6 +42,10 @@ namespace gbe
virtual void setSrc1(GenNativeInstruction *insn, GenRegister reg);
virtual void alu3(uint32_t opcode, GenRegister dst,
GenRegister src0, GenRegister src1, GenRegister src2);
+ /*! MBlock read */
+ virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);
+ /*! MBlock write */
+ virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize);
};
}
#endif /* __GBE_GEN7_ENCODER_HPP__ */
diff --git a/backend/src/backend/gen7_instruction.hpp b/backend/src/backend/gen7_instruction.hpp
index 258dd24..7d7eada 100644
--- a/backend/src/backend/gen7_instruction.hpp
+++ b/backend/src/backend/gen7_instruction.hpp
@@ -531,6 +531,22 @@ union Gen7NativeInstruction
uint32_t uip:16;
} gen7_branch;
+ /*! Data port Media block read / write */
+ struct {
+ uint32_t bti:8;
+ uint32_t ver_line_stride_offset:1;
+ uint32_t ver_line_stride:1;
+ uint32_t ver_line_stride_override:1;
+ uint32_t ignored:3;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_mblock_rw;
+
int d;
uint32_t ud;
float f;
diff --git a/backend/src/backend/gen8_instruction.hpp b/backend/src/backend/gen8_instruction.hpp
index ada9ffc..549948a 100644
--- a/backend/src/backend/gen8_instruction.hpp
+++ b/backend/src/backend/gen8_instruction.hpp
@@ -608,6 +608,22 @@ union Gen8NativeInstruction
uint32_t jip:32;
} gen8_branch;
+ /*! Data port Media block read / write */
+ struct {
+ uint32_t bti:8;
+ uint32_t ver_line_stride_offset:1;
+ uint32_t ver_line_stride:1;
+ uint32_t ver_line_stride_override:1;
+ uint32_t ignored:3;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_mblock_rw;
+
int d;
uint32_t ud;
float f;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 90b8b45..98a94ba 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3538,6 +3538,161 @@ namespace gbe
p->OBWRITE(header, insn.getbti(), insn.extra.elem);
}
+ void GenContext::emitMBReadInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister coordx = GenRegister::toUniform(ra->genReg(insn.src(0)),GEN_TYPE_D);
+ const GenRegister coordy = GenRegister::toUniform(ra->genReg(insn.src(1)),GEN_TYPE_D);
+ GenRegister header, offsetx, offsety, blocksizereg;
+ if (simdWidth == 8)
+ header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
+ else
+ header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.src(2)),1), GEN_TYPE_UD);
+
+ offsetx = GenRegister::offset(header, 0, 0*4);
+ offsety = GenRegister::offset(header, 0, 1*4);
+ blocksizereg = GenRegister::offset(header, 0, 2*4);
+ size_t vec_size = insn.extra.elem;
+ uint32_t blocksize = 0x1F | (vec_size-1) << 16;
+
+ if (simdWidth == 8)
+ {
+ p->push();
+ // Copy r0 into the header first
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(header, GenRegister::ud8grf(0,0));
+
+ // Update the header with the coord
+ p->curr.execWidth = 1;
+ p->MOV(offsetx, coordx);
+ p->MOV(offsety, coordy);
+ // Update block width and height
+ p->MOV(blocksizereg, GenRegister::immud(blocksize));
+ // Now read the data
+ p->curr.execWidth = 8;
+ p->MBREAD(dst, header, insn.getbti(), vec_size);
+ p->pop();
+
+ }
+ else
+ {
+ const GenRegister tmp = ra->genReg(insn.dst(vec_size));
+ p->push();
+ // Copy r0 into the header first
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(header, GenRegister::ud8grf(0,0));
+
+ // First half
+ // Update the header with the coord
+ p->curr.execWidth = 1;
+ p->MOV(offsetx, coordx);
+ p->MOV(offsety, coordy);
+ // Update block width and height
+ p->MOV(blocksizereg, GenRegister::immud(blocksize));
+ // Now read the data
+ p->curr.execWidth = 8;
+ p->MBREAD(tmp, header, insn.getbti(), vec_size);
+
+ // Second half
+ // Update the header with the coord
+ p->curr.execWidth = 1;
+ p->ADD(offsetx, offsetx, GenRegister::immud(32));
+
+ const GenRegister tmp2 = GenRegister::offset(tmp, vec_size);
+ // Now read the data
+ p->curr.execWidth = 8;
+ p->MBREAD(tmp2, header, insn.getbti(), vec_size);
+
+ // Move the reg to fit vector rule.
+ for (int i = 0; i < vec_size; i++) {
+ p->MOV(GenRegister::offset(dst, i * 2), GenRegister::offset(tmp, i));
+ p->MOV(GenRegister::offset(dst, i * 2 + 1),
+ GenRegister::offset(tmp2, i));
+ }
+ p->pop();
+ }
+ }
+
+ void GenContext::emitMBWriteInstruction(const SelectionInstruction &insn) {
+ const GenRegister coordx = GenRegister::toUniform(ra->genReg(insn.src(0)), GEN_TYPE_D);
+ const GenRegister coordy = GenRegister::toUniform(ra->genReg(insn.src(1)), GEN_TYPE_D);
+ GenRegister header, offsetx, offsety, blocksizereg;
+ size_t vec_size = insn.extra.elem;
+ uint32_t blocksize = 0x1F | (vec_size-1) << 16;
+ if (simdWidth == 8)
+ header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
+ else
+ header = GenRegister::retype(GenRegister::Qn(ra->genReg(insn.dst(0)),1), GEN_TYPE_UD);
+
+ offsetx = GenRegister::offset(header, 0, 0*4);
+ offsety = GenRegister::offset(header, 0, 1*4);
+ blocksizereg = GenRegister::offset(header, 0, 2*4);
+
+ if (simdWidth == 8)
+ {
+ p->push();
+ // Copy r0 into the header first
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(header, GenRegister::ud8grf(0,0));
+
+ // Update the header with the coord
+ p->curr.execWidth = 1;
+ p->MOV(offsetx, coordx);
+ p->MOV(offsety, coordy);
+ // Update block width and height
+ p->MOV(blocksizereg, GenRegister::immud(blocksize));
+ p->curr.execWidth = 8;
+ // Mov what we need into msgs
+ for(uint32_t i = 0; i < vec_size; i++)
+ p->MOV(ra->genReg(insn.dst(1 + i)), ra->genReg(insn.src(2 + i)));
+ // Now read the data
+ p->MBWRITE(header, insn.getbti(), vec_size);
+ p->pop();
+
+ }
+ else
+ {
+ p->push();
+ // Copy r0 into the header first
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(header, GenRegister::ud8grf(0,0));
+
+ // First half
+ // Update the header with the coord
+ p->curr.execWidth = 1;
+ p->MOV(offsetx, coordx);
+ p->MOV(offsety, coordy);
+ // Update block width and height
+ p->MOV(blocksizereg, GenRegister::immud(blocksize));
+ // Now read the data
+ p->curr.execWidth = 8;
+ // Mov what we need into msgs
+ for(uint32_t i = 0; i < vec_size; i++)
+ p->MOV(GenRegister::offset(header, 1 + i), ra->genReg(insn.src(2 + i)));
+ p->MBWRITE(header, insn.getbti(), vec_size);
+
+ // Second half
+ // Update the header with the coord
+ p->curr.execWidth = 1;
+ p->ADD(offsetx, offsetx, GenRegister::immud(32));
+
+ p->curr.execWidth = 8;
+ // Mov what we need into msgs
+ for(uint32_t i = 0; i < vec_size; i++)
+ p->MOV(GenRegister::offset(header, 1 + i), GenRegister::Qn(ra->genReg(insn.src(2 + i)), 1));
+ // Now write the data
+ p->MBWRITE(header, insn.getbti(), vec_size);
+
+ p->pop();
+ }
+ }
BVAR(OCL_OUTPUT_REG_ALLOC, false);
BVAR(OCL_OUTPUT_ASM, false);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index a634338..fb3d4fe 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -189,6 +189,8 @@ namespace gbe
void afterMessage(const SelectionInstruction &insn, GenRegister bti, GenRegister flagTemp, GenRegister btiTmp, unsigned jip0);
void emitOBReadInstruction(const SelectionInstruction &insn);
void emitOBWriteInstruction(const SelectionInstruction &insn);
+ void emitMBReadInstruction(const SelectionInstruction &insn);
+ void emitMBWriteInstruction(const SelectionInstruction &insn);
/*! Implements base class */
virtual Kernel *allocateKernel(void);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 09cb2ba..66ae5b5 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -784,6 +784,22 @@ union GenNativeInstruction
uint32_t jip:32;
} gen8_branch;
+ /*! Data port Media block read / write */
+ struct {
+ uint32_t bti:8;
+ uint32_t ver_line_stride_offset:1;
+ uint32_t ver_line_stride:1;
+ uint32_t ver_line_stride_override:1;
+ uint32_t ignored:3;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_mblock_rw;
+
int d;
uint32_t ud;
float f;
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index e745b9c..eb9fbeb 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -276,6 +276,21 @@ namespace gbe
insn->bits3.gen7_oblock_rw.header_present = 1;
}
+ static void setMBlockRW(GenEncoder *p,
+ GenNativeInstruction *insn,
+ uint32_t bti,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
+ p->setMessageDescriptor(insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_mblock_rw.msg_type = msg_type;
+ insn->bits3.gen7_mblock_rw.bti = bti;
+ insn->bits3.gen7_mblock_rw.header_present = 1;
+ }
+
+
static void setDWordScatterMessgae(GenEncoder *p,
GenNativeInstruction *insn,
uint32_t bti,
@@ -1277,6 +1292,38 @@ namespace gbe
response_length);
}
+ void GenEncoder::MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ const uint32_t msg_length = 1;
+ const uint32_t response_length = size; // Size of registers
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::ud8grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setMBlockRW(this,
+ insn,
+ bti,
+ GEN75_P1_MEDIA_BREAD,
+ msg_length,
+ response_length);
+ }
+
+ void GenEncoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t size) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ const uint32_t msg_length = 1 + size;
+ const uint32_t response_length = 0; // Size of registers
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setMBlockRW(this,
+ insn,
+ bti,
+ GEN75_P1_MEDIA_TYPED_BWRITE,
+ msg_length,
+ response_length);
+ }
+
void GenEncoder::EOT(uint32_t msg) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index a53c879..4979305 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -271,6 +271,10 @@ namespace gbe
void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);
/*! OBlock write */
void OBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize);
+ /*! MBlock read */
+ virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);
+ /*! MBlock write */
+ virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize);
GBE_CLASS(GenEncoder); //!< Use custom allocators
virtual void alu3(uint32_t opcode, GenRegister dst,
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d297726..c396626 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -52,3 +52,5 @@ DECL_GEN7_SCHEDULE(SubGroupOp, 80, 1, 1)
DECL_GEN7_SCHEDULE(Printf, 80, 1, 1)
DECL_GEN7_SCHEDULE(OBRead, 80, 1, 1)
DECL_GEN7_SCHEDULE(OBWrite, 80, 1, 1)
+DECL_GEN7_SCHEDULE(MBRead, 80, 1, 1)
+DECL_GEN7_SCHEDULE(MBWrite, 80, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e974e97..39688ad 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -189,7 +189,8 @@ namespace gbe
this->opcode == SEL_OP_SAMPLE ||
this->opcode == SEL_OP_VME ||
this->opcode == SEL_OP_DWORD_GATHER ||
- this->opcode == SEL_OP_OBREAD;
+ this->opcode == SEL_OP_OBREAD ||
+ this->opcode == SEL_OP_MBREAD;
}
bool SelectionInstruction::modAcc(void) const {
@@ -212,7 +213,8 @@ namespace gbe
this->opcode == SEL_OP_ATOMIC ||
this->opcode == SEL_OP_BYTE_SCATTER ||
this->opcode == SEL_OP_TYPED_WRITE ||
- this->opcode == SEL_OP_OBWRITE;
+ this->opcode == SEL_OP_OBWRITE ||
+ this->opcode == SEL_OP_MBWRITE;
}
bool SelectionInstruction::isBranch(void) const {
@@ -703,6 +705,10 @@ namespace gbe
void OBREAD(GenRegister dst, GenRegister addr, GenRegister header, uint32_t bti, uint32_t size);
/*! Oblock write */
void OBWRITE(GenRegister addr, GenRegister value, GenRegister header, uint32_t bti, uint32_t size);
+ /*! Media block read */
+ void MBREAD(GenRegister* dsts, GenRegister coordx, GenRegister coordy, GenRegister header, GenRegister* tmp, uint32_t bti, uint32_t vec_size);
+ /*! Media block write */
+ void MBWRITE(GenRegister coordx, GenRegister coordy, GenRegister* values, GenRegister header, GenRegister* tmp, uint32_t bti, uint32_t vec_size);
/* common functions for both binary instruction and sel_cmp and compare instruction.
It will handle the IMM or normal register assignment, and will try to avoid LOADI
@@ -2055,6 +2061,63 @@ namespace gbe
vector->isSrc = 1;
}
+ void Selection::Opaque::MBREAD(GenRegister* dsts,
+ GenRegister coordx,
+ GenRegister coordy,
+ GenRegister header,
+ GenRegister* tmp,
+ uint32_t bti,
+ uint32_t vec_size) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, vec_size * 2, 3);
+ SelectionVector *vector = this->appendVector();
+ SelectionVector *vectortmp = this->appendVector();
+ for (uint32_t i = 0; i < vec_size; ++i) {
+ insn->dst(i) = dsts[i];
+ insn->dst(i + vec_size) = tmp[i];
+ }
+ insn->src(0) = coordx;
+ insn->src(1) = coordy;
+ insn->src(2) = header;
+ insn->setbti(bti);
+ insn->extra.elem = vec_size; // vector size
+
+ vector->regNum = vec_size;
+ vector->reg = &insn->dst(0);
+ vector->offsetID = 0;
+ vector->isSrc = 0;
+ vectortmp->regNum = vec_size;
+ vectortmp->reg = &insn->dst(vec_size);
+ vectortmp->offsetID = 0;
+ vectortmp->isSrc = 0;
+
+ }
+
+ void Selection::Opaque::MBWRITE(GenRegister coordx,
+ GenRegister coordy,
+ GenRegister* values,
+ GenRegister header,
+ GenRegister* tmp,
+ uint32_t bti,
+ uint32_t vec_size) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_MBWRITE, 1 + vec_size, 2 + vec_size);
+ SelectionVector *vector = this->appendVector();
+ insn->src(0) = coordx;
+ insn->src(1) = coordy;
+ for (uint32_t i = 0; i < vec_size; ++i)
+ insn->src(2 + i) = values[i];
+ insn->dst(0) = header;
+ for (uint32_t i = 0; i < vec_size; ++i)
+ insn->dst(1 + i) = tmp[i];
+ insn->state = this->curr;
+ insn->setbti(bti);
+ insn->extra.elem = vec_size; // vector size
+
+ // We need to put the header and the data together
+ vector->regNum = 1 + vec_size;
+ vector->reg = &insn->dst(0);
+ vector->offsetID = 0;
+ vector->isSrc = 0;
+ }
// Boiler plate to initialize the selection library at c++ pre-main
static SelectionLibrary *selLib = NULL;
@@ -6583,6 +6646,52 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
}
};
+ /*! Media Block Read pattern */
+ DECL_PATTERN(MediaBlockReadInstruction)
+ {
+ bool emitOne(Selection::Opaque &sel, const ir::MediaBlockReadInstruction &insn, bool &markChildren) const
+ {
+ using namespace ir;
+ uint32_t vec_size = insn.getVectorSize();
+ GenRegister values[vec_size];
+ GenRegister tmp[vec_size];
+ for (uint32_t i = 0; i < vec_size; ++i) {
+ values[i] = sel.selReg(insn.getDst(i), TYPE_U32);
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ }
+ const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32);
+ const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32);
+ const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ sel.MBREAD(values, coordx, coordy, header, tmp, insn.getImageIndex(), insn.getVectorSize());
+ return true;
+ }
+ DECL_CTOR(MediaBlockReadInstruction, 1, 1);
+ };
+
+ /*! Media Block Write pattern */
+ DECL_PATTERN(MediaBlockWriteInstruction)
+ {
+ bool emitOne(Selection::Opaque &sel, const ir::MediaBlockWriteInstruction &insn, bool &markChildren) const
+ {
+ using namespace ir;
+ uint32_t vec_size = insn.getVectorSize();
+ const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32);
+ const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32);
+ GenRegister values[vec_size];
+ GenRegister tmp[vec_size];
+ for(uint32_t i = 0; i < vec_size; i++)
+ {
+ values[i] = sel.selReg(insn.getSrc(2 + i), TYPE_U32);
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ }
+ const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ sel.MBWRITE(coordx, coordy, values, header, tmp, insn.getImageIndex(), vec_size);
+ return true;
+ }
+ DECL_CTOR(MediaBlockWriteInstruction, 1, 1);
+ };
+
+
/*! Sort patterns */
INLINE bool cmp(const SelectionPattern *p0, const SelectionPattern *p1) {
if (p0->insnNum != p1->insnNum)
@@ -6624,6 +6733,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
this->insert<NullaryInstructionPattern>();
this->insert<WaitInstructionPattern>();
this->insert<PrintfInstructionPattern>();
+ this->insert<MediaBlockReadInstructionPattern>();
+ this->insert<MediaBlockWriteInstructionPattern>();
// Sort all the patterns with the number of instructions they output
for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 51af686..b481de8 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -177,6 +177,8 @@ namespace gbe
switch (opcode) {
case SEL_OP_OBREAD:
case SEL_OP_OBWRITE:
+ case SEL_OP_MBREAD:
+ case SEL_OP_MBWRITE:
case SEL_OP_DWORD_GATHER: return extra.function;
case SEL_OP_SAMPLE: return extra.rdbti;
case SEL_OP_VME: return extra.vme_bti;
@@ -192,6 +194,8 @@ namespace gbe
switch (opcode) {
case SEL_OP_OBREAD:
case SEL_OP_OBWRITE:
+ case SEL_OP_MBREAD:
+ case SEL_OP_MBWRITE:
case SEL_OP_DWORD_GATHER: extra.function = bti; return;
case SEL_OP_SAMPLE: extra.rdbti = bti; return;
case SEL_OP_VME: extra.vme_bti = bti; return;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 4a7caff..ccaf526 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -98,3 +98,5 @@ DECL_SELECTION_IR(SUBGROUP_OP, SubGroupOpInstruction)
DECL_SELECTION_IR(PRINTF, PrintfInstruction)
DECL_SELECTION_IR(OBREAD, OBReadInstruction)
DECL_SELECTION_IR(OBWRITE, OBWriteInstruction)
+DECL_SELECTION_IR(MBREAD, MBReadInstruction)
+DECL_SELECTION_IR(MBWRITE, MBWriteInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 88491a7..ed64580 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1064,6 +1064,78 @@ namespace ir {
Register dst[1];
};
+ class ALIGNED_INSTRUCTION MediaBlockReadInstruction :
+ public BasePolicy,
+ public TupleSrcPolicy<MediaBlockReadInstruction>,
+ public TupleDstPolicy<MediaBlockReadInstruction>
+ {
+ public:
+ INLINE MediaBlockReadInstruction(uint8_t imageIdx, Tuple dst, uint8_t vec_size, Tuple srcTuple, uint8_t srcNum) {
+ this->opcode = OP_MBREAD;
+ this->dst = dst;
+ this->dstNum = vec_size;
+ this->src = srcTuple;
+ this->srcNum = srcNum;
+ this->imageIdx = imageIdx;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << (int)this->getVectorSize();
+ out << " {";
+ for (uint32_t i = 0; i < dstNum; ++i)
+ out << "%" << this->getDst(fn, i) << (i != (dstNum-1u) ? " " : "");
+ out << "}";
+ out << " 2D surface id " << (int)this->getImageIndex()
+ << " byte coord x %" << this->getSrc(fn, 0)
+ << " row coord y %" << this->getSrc(fn, 1);
+ }
+ INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
+ INLINE uint8_t getVectorSize(void) const { return this->dstNum; }
+
+ Tuple src;
+ Tuple dst;
+ uint8_t imageIdx;
+ uint8_t srcNum;
+ uint8_t dstNum;
+ };
+
+ class ALIGNED_INSTRUCTION MediaBlockWriteInstruction :
+ public BasePolicy,
+ public TupleSrcPolicy<MediaBlockWriteInstruction>,
+ public NDstPolicy<MediaBlockWriteInstruction, 0>
+ {
+ public:
+
+ INLINE MediaBlockWriteInstruction(uint8_t imageIdx, Tuple srcTuple, uint8_t srcNum, uint8_t vec_size) {
+ this->opcode = OP_MBWRITE;
+ this->src = srcTuple;
+ this->srcNum = srcNum;
+ this->imageIdx = imageIdx;
+ this->vec_size = vec_size;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << (int)this->getVectorSize()
+ << " 2D surface id " << (int)this->getImageIndex()
+ << " byte coord x %" << this->getSrc(fn, 0)
+ << " row coord y %" << this->getSrc(fn, 1);
+ out << " {";
+ for (uint32_t i = 0; i < vec_size; ++i)
+ out << "%" << this->getSrc(fn, i + 2) << (i != (vec_size-1u) ? " " : "");
+ out << "}";
+ }
+ INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
+ INLINE uint8_t getVectorSize(void) const { return this->vec_size; }
+
+ Tuple src;
+ Register dst[0];
+ uint8_t imageIdx;
+ uint8_t srcNum;
+ uint8_t vec_size;
+ };
+
#undef ALIGNED_INSTRUCTION
/////////////////////////////////////////////////////////////////////////
@@ -1591,6 +1663,22 @@ namespace ir {
return true;
}
+ INLINE bool MediaBlockReadInstruction::wellFormed(const Function &fn, std::string &whyNot) const {
+ if (this->srcNum != 2) {
+ whyNot = "Wrong number of source.";
+ return false;
+ }
+ return true;
+ }
+
+ INLINE bool MediaBlockWriteInstruction::wellFormed(const Function &fn, std::string &whyNot) const {
+ if (this->srcNum != 2 + this->vec_size) {
+ whyNot = "Wrong number of source.";
+ return false;
+ }
+ return true;
+ }
+
#undef CHECK_TYPE
/////////////////////////////////////////////////////////////////////////
@@ -2058,6 +2146,14 @@ START_INTROSPECTION(PrintfInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(PrintfInstruction)
+START_INTROSPECTION(MediaBlockReadInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(MediaBlockReadInstruction)
+
+START_INTROSPECTION(MediaBlockWriteInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(MediaBlockWriteInstruction)
+
#undef END_INTROSPECTION
#undef START_INTROSPECTION
#undef DECL_INSN
@@ -2205,7 +2301,8 @@ END_FUNCTION(Instruction, Register)
opcode == OP_CALC_TIMESTAMP ||
opcode == OP_STORE_PROFILING ||
opcode == OP_WAIT ||
- opcode == OP_PRINTF;
+ opcode == OP_PRINTF ||
+ opcode == OP_MBWRITE;
}
#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
@@ -2275,6 +2372,10 @@ DECL_MEM_FN(SubGroupInstruction, WorkGroupOps, getWorkGroupOpcode(void), getWork
DECL_MEM_FN(PrintfInstruction, uint32_t, getNum(void), getNum())
DECL_MEM_FN(PrintfInstruction, uint32_t, getBti(void), getBti())
DECL_MEM_FN(PrintfInstruction, Type, getType(const Function& fn, uint32_t ID), getType(fn, ID))
+DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getImageIndex(void), getImageIndex())
+DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getVectorSize(void), getVectorSize())
+DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getImageIndex(void), getImageIndex())
+DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getVectorSize(void), getVectorSize())
#undef DECL_MEM_FN
@@ -2582,6 +2683,15 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg))
return internal::PrintfInstruction(dst, srcTuple, typeTuple, srcNum, bti, num).convert();
}
+ Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, Tuple coord, uint8_t srcNum) {
+ return internal::MediaBlockReadInstruction(imageIndex, dst, vec_size, coord, srcNum).convert();
+ }
+
+ Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum, uint8_t vec_size) {
+ return internal::MediaBlockWriteInstruction(imageIndex, srcTuple, srcNum, vec_size).convert();
+ }
+
+
std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
const Function &fn = insn.getFunction();
const BasicBlock *bb = insn.getParent();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 4e7d5b7..b2b0b49 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -635,6 +635,24 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
+ /*! Media Block Read. */
+ class MediaBlockReadInstruction : public Instruction {
+ public:
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ uint8_t getImageIndex() const;
+ uint8_t getVectorSize() const;
+ };
+
+ /*! Media Block Write. */
+ class MediaBlockWriteInstruction : public Instruction {
+ public:
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ uint8_t getImageIndex() const;
+ uint8_t getVectorSize() const;
+ };
+
/*! Specialize the instruction. Also performs typechecking first based on the
* opcode. Crashes if it fails
*/
@@ -867,6 +885,10 @@ namespace ir {
Instruction SUBGROUP(WorkGroupOps opcode, Register dst, Tuple srcTuple, uint8_t srcNum, Type type);
/*! printf */
Instruction PRINTF(Register dst, Tuple srcTuple, Tuple typeTuple, uint8_t srcNum, uint8_t bti, uint16_t num);
+ /*! media block read */
+ Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, Tuple coord, uint8_t srcNum);
+ /*! media block write */
+ Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum, uint8_t vec_size);
} /* namespace ir */
} /* namespace gbe */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 57e13eb..7d755ae 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -114,3 +114,5 @@ DECL_INSN(WAIT, WaitInstruction)
DECL_INSN(WORKGROUP, WorkGroupInstruction)
DECL_INSN(SUBGROUP, SubGroupInstruction)
DECL_INSN(PRINTF, PrintfInstruction)
+DECL_INSN(MBREAD, MediaBlockReadInstruction)
+DECL_INSN(MBWRITE, MediaBlockWriteInstruction)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 3162d13..43d4c87 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -118,7 +118,8 @@ namespace ir {
uniform = false;
// do not change dst uniform for block read
- if (insn.getOpcode() == ir::OP_LOAD && ir::cast<ir::LoadInstruction>(insn).isBlock())
+ if ((insn.getOpcode() == ir::OP_LOAD && ir::cast<ir::LoadInstruction>(insn).isBlock()) ||
+ insn.getOpcode() == ir::OP_MBREAD)
uniform = false;
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
diff --git a/backend/src/libocl/src/ocl_substore.ll b/backend/src/libocl/src/ocl_substore.ll
index 665cdfa..f6c2c70 100644
--- a/backend/src/libocl/src/ocl_substore.ll
+++ b/backend/src/libocl/src/ocl_substore.ll
@@ -1,9 +1,42 @@
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir"
+%opencl.image2d_t = type opaque
declare void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* nocapture, i32) nounwind alwaysinline noduplicate
+declare void @__gen_ocl_sub_group_block_write_image(%opencl.image2d_t addrspace(1)*, i32, i32, i32) nounwind alwaysinline noduplicate
+declare void @__gen_ocl_sub_group_block_write_image2(%opencl.image2d_t addrspace(1)*, i32, i32, <2 x i32>) nounwind alwaysinline noduplicate
+declare void @__gen_ocl_sub_group_block_write_image4(%opencl.image2d_t addrspace(1)*, i32, i32, <4 x i32>) nounwind alwaysinline noduplicate
+declare void @__gen_ocl_sub_group_block_write_image8(%opencl.image2d_t addrspace(1)*, i32, i32, <8 x i32>) nounwind alwaysinline noduplicate
define void @_Z27intel_sub_group_block_writePKU3AS1jj(i32 addrspace(1)* %p, i32 %data) nounwind alwaysinline noduplicate {
call void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* %p, i32 %data)
ret void
}
+
+define void @_Z27intel_sub_group_block_write11ocl_image2dDv2_ij(%opencl.image2d_t addrspace(1)* %image, <2 x i32> %byte_coord, i32 %data) nounwind alwaysinline noduplicate {
+ %1 = extractelement <2 x i32> %byte_coord, i32 0
+ %2 = extractelement <2 x i32> %byte_coord, i32 1
+ call void @__gen_ocl_sub_group_block_write_image(%opencl.image2d_t addrspace(1)* %image, i32 %1, i32 %2, i32 %data)
+ ret void
+}
+
+define void @_Z28intel_sub_group_block_write211ocl_image2dDv2_iDv2_j(%opencl.image2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <2 x i32> %data) nounwind alwaysinline noduplicate {
+ %1 = extractelement <2 x i32> %byte_coord, i32 0
+ %2 = extractelement <2 x i32> %byte_coord, i32 1
+ call void @__gen_ocl_sub_group_block_write_image2(%opencl.image2d_t addrspace(1)* %image, i32 %1, i32 %2, <2 x i32> %data)
+ ret void
+}
+
+define void @_Z28intel_sub_group_block_write411ocl_image2dDv2_iDv4_j(%opencl.image2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <4 x i32> %data) nounwind alwaysinline noduplicate {
+ %1 = extractelement <2 x i32> %byte_coord, i32 0
+ %2 = extractelement <2 x i32> %byte_coord, i32 1
+ call void @__gen_ocl_sub_group_block_write_image4(%opencl.image2d_t addrspace(1)* %image, i32 %1, i32 %2, <4 x i32> %data)
+ ret void
+}
+
+define void @_Z28intel_sub_group_block_write811ocl_image2dDv2_iDv8_j(%opencl.image2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <8 x i32> %data) nounwind alwaysinline noduplicate {
+ %1 = extractelement <2 x i32> %byte_coord, i32 0
+ %2 = extractelement <2 x i32> %byte_coord, i32 1
+ call void @__gen_ocl_sub_group_block_write_image8(%opencl.image2d_t addrspace(1)* %image, i32 %1, i32 %2, <8 x i32> %data)
+ ret void
+}
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index 66490cc..753a045 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -187,3 +187,24 @@ OVERLOADABLE void intel_sub_group_block_write8(const global uint* p,uint8 data)
intel_sub_group_block_write(p + get_simd_size() * 6, data.s6);
intel_sub_group_block_write(p + get_simd_size() * 7, data.s7);
}
+
+PURE CONST uint __gen_ocl_sub_group_block_read_image(image2d_t p, int x, int y);
+PURE CONST uint2 __gen_ocl_sub_group_block_read_image2(image2d_t p, int x, int y);
+PURE CONST uint4 __gen_ocl_sub_group_block_read_image4(image2d_t p, int x, int y);
+PURE CONST uint8 __gen_ocl_sub_group_block_read_image8(image2d_t p, int x, int y);
+OVERLOADABLE uint intel_sub_group_block_read(image2d_t p, int2 cord)
+{
+ return __gen_ocl_sub_group_block_read_image(p, cord.x, cord.y);
+}
+OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t p, int2 cord)
+{
+ return __gen_ocl_sub_group_block_read_image2(p, cord.x, cord.y);
+}
+OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t p, int2 cord)
+{
+ return __gen_ocl_sub_group_block_read_image4(p, cord.x, cord.y);
+}
+OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t p, int2 cord)
+{
+ return __gen_ocl_sub_group_block_read_image8(p, cord.x, cord.y);
+}
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index d0676be..799f772 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -143,3 +143,13 @@ OVERLOADABLE void intel_sub_group_block_write(const __global uint* p, uint data)
OVERLOADABLE void intel_sub_group_block_write2(const __global uint* p, uint2 data);
OVERLOADABLE void intel_sub_group_block_write4(const __global uint* p, uint4 data);
OVERLOADABLE void intel_sub_group_block_write8(const __global uint* p, uint8 data);
+
+OVERLOADABLE uint intel_sub_group_block_read(image2d_t image, int2 byte_coord);
+OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t image, int2 byte_coord);
+OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t image, int2 byte_coord);
+OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t image, int2 byte_coord);
+
+OVERLOADABLE void intel_sub_group_block_write(image2d_t image, int2 byte_coord, uint data);
+OVERLOADABLE void intel_sub_group_block_write2(image2d_t image, int2 byte_coord, uint2 data);
+OVERLOADABLE void intel_sub_group_block_write4(image2d_t image, int2 byte_coord, uint4 data);
+OVERLOADABLE void intel_sub_group_block_write8(image2d_t image, int2 byte_coord, uint8 data);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index ffa838c..38c0f2b 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -699,6 +699,7 @@ namespace gbe
void emitSubGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps opcode);
// Emit subgroup instructions
void emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite);
+ void emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, bool isWrite, uint8_t vec_size);
uint8_t appendSampler(CallSite::arg_iterator AI);
uint8_t getImageID(CallInst &I);
@@ -3744,10 +3745,12 @@ namespace gbe
case GEN_OCL_SUB_GROUP_SCAN_INCLUSIVE_MAX:
case GEN_OCL_SUB_GROUP_SCAN_INCLUSIVE_MIN:
case GEN_OCL_LRP:
- this->newRegister(&I);
- break;
case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM:
- this->newRegister(&I, NULL, false);
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE:
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2:
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4:
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8:
+ this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
this->newRegister(&I); // fall through
@@ -3764,6 +3767,10 @@ namespace gbe
case GEN_OCL_STORE_PROFILING:
case GEN_OCL_DEBUGWAIT:
case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8:
break;
case GEN_OCL_NOT_FOUND:
default:
@@ -4013,6 +4020,39 @@ namespace gbe
GBE_ASSERT(AI == AE);
}
+ void GenWriter::emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, bool isWrite, uint8_t vec_size) {
+ CallSite::arg_iterator AI = CS.arg_begin();
+ CallSite::arg_iterator AE = CS.arg_end();
+ GBE_ASSERT(AI != AE);
+
+ const uint8_t imageID = getImageID(I);
+ AI++;
+
+ if(isWrite){
+ ir::Register src[2 + vec_size];
+ src[0] = getRegister(*(AI++));
+ src[1] = getRegister(*(AI++));
+ for(int i = 0;i < vec_size; i++)
+ src[2 + i] = getRegister(*(AI), i);
+ AI++;
+ const ir::Tuple srctuple = ctx.arrayTuple(src, 2 + vec_size);
+ ctx.MBWRITE(imageID, srctuple, 2 + vec_size, vec_size);
+ } else {
+ ir::Register src[2];
+ src[0] = getRegister(*(AI++));
+ src[1] = getRegister(*(AI++));
+ ir::Register dst[vec_size];
+ for(int i = 0;i < vec_size; i++)
+ dst[i] = getRegister(&I, i);
+ const ir::Tuple srctuple = ctx.arrayTuple(src, 2);
+ const ir::Tuple dsttuple = ctx.arrayTuple(dst, vec_size);
+ ctx.MBREAD(imageID, dsttuple, vec_size, srctuple, 2);
+ }
+
+ GBE_ASSERT(AI == AE);
+ }
+
+
/* append a new sampler. should be called before any reference to
* a sampler_t value. */
uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) {
@@ -4841,6 +4881,22 @@ namespace gbe
this->emitBlockReadWriteMemInst(I, CS, false); break;
case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:
this->emitBlockReadWriteMemInst(I, CS, true); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE:
+ this->emitBlockReadWriteImageInst(I, CS, false, 1); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2:
+ this->emitBlockReadWriteImageInst(I, CS, false, 2); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4:
+ this->emitBlockReadWriteImageInst(I, CS, false, 4); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8:
+ this->emitBlockReadWriteImageInst(I, CS, false, 8); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE:
+ this->emitBlockReadWriteImageInst(I, CS, true, 1); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2:
+ this->emitBlockReadWriteImageInst(I, CS, true, 2); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4:
+ this->emitBlockReadWriteImageInst(I, CS, true, 4); break;
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8:
+ this->emitBlockReadWriteImageInst(I, CS, true, 8); break;
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 003be91..456ab58 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -219,6 +219,14 @@ DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MIN, __gen_ocl_sub_group_scan_in
DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_MEM, __gen_ocl_sub_group_block_read_mem)
DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_MEM, __gen_ocl_sub_group_block_write_mem)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE, __gen_ocl_sub_group_block_read_image)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE2, __gen_ocl_sub_group_block_read_image2)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE4, __gen_ocl_sub_group_block_read_image4)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE8, __gen_ocl_sub_group_block_read_image8)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE, __gen_ocl_sub_group_block_write_image)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE2, __gen_ocl_sub_group_block_write_image2)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE4, __gen_ocl_sub_group_block_write_image4)
+DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE8, __gen_ocl_sub_group_block_write_image8)
// common function
DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 53fd320..e60bf4b 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -682,7 +682,21 @@ namespace gbe {
*CI = InsertToVector(call, *CI);
break;
}
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4:
+ case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8:
+ {
+ ++CI;
+ ++CI;
+ if ((*CI)->getType()->isVectorTy())
+ *CI = InsertToVector(call, *CI);
+ break;
+ }
case GEN_OCL_VME:
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2:
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4:
+ case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8:
setAppendPoint(call);
extractFromVector(call);
break;
--
2.7.4
More information about the Beignet
mailing list