[Beignet] [PATCH V3 11/12] Backend: Add intel_sub_group_block_read/write form image
Yang, Rong R
rong.r.yang at intel.com
Mon Jun 13 07:36:02 UTC 2016
The patchset LGTM, pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Sunday, June 12, 2016 5:33
> To: beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: [Beignet] [PATCH V3 11/12] Backend: Add
> intel_sub_group_block_read/write form image
>
> From: Pan Xiuli <xiuli.pan at intel.com>
>
> Using meida block read/write to read data in block. In simd16 mode the
> need some reg relocation for later use.
> GEN7 has some different data port.
> V2: Refine block read simd16 with tmp reg to avoide MOVs
> V3: Fix build bug with clang.
>
> Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 27 +++-
> backend/src/backend/gen7_encoder.cpp | 48 +++++++
> backend/src/backend/gen7_encoder.hpp | 4 +
> backend/src/backend/gen7_instruction.hpp | 16 +++
> backend/src/backend/gen8_instruction.hpp | 16 +++
> backend/src/backend/gen_context.cpp | 155
> +++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 2 +
> backend/src/backend/gen_defs.hpp | 16 +++
> backend/src/backend/gen_encoder.cpp | 47 +++++++
> backend/src/backend/gen_encoder.hpp | 4 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
> backend/src/backend/gen_insn_selection.cpp | 115 ++++++++++++++-
> backend/src/backend/gen_insn_selection.hpp | 4 +
> backend/src/backend/gen_insn_selection.hxx | 2 +
> backend/src/ir/instruction.cpp | 112 ++++++++++++++-
> backend/src/ir/instruction.hpp | 22 +++
> backend/src/ir/instruction.hxx | 2 +
> backend/src/ir/liveness.cpp | 3 +-
> backend/src/libocl/src/ocl_substore.ll | 33 +++++
> backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 21 +++
> backend/src/libocl/tmpl/ocl_simd.tmpl.h | 10 ++
> backend/src/llvm/llvm_gen_backend.cpp | 62 ++++++++-
> backend/src/llvm/llvm_gen_ocl_function.hxx | 8 ++
> backend/src/llvm/llvm_scalarize.cpp | 14 ++
> 24 files changed, 732 insertions(+), 13 deletions(-)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index 9200c26..9955dfc 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -1476,6 +1476,15 @@ int gen_disasm (FILE *file, const void *inst,
> uint32_t deviceID, uint32_t compac
> SAMPLER_MSG_TYPE(inst),
> SAMPLER_SIMD_MODE(inst));
> break;
> + case GEN_SFID_DATAPORT_RENDER:
> + if(UNTYPED_RW_MSG_TYPE(inst) == 4 ||
> UNTYPED_RW_MSG_TYPE(inst) == 10)
> + format(file, " (bti: %d, %s, %s)",
> + UNTYPED_RW_BTI(inst),
> +
> data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
> +
> data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
> + else
> + format(file, " not implemented");
> + break;
> case GEN_SFID_DATAPORT_DATA:
> if(UNTYPED_RW_CATEGORY(inst) == 0) {
> if(UNTYPED_RW_MSG_TYPE(inst) == 5 ||
> UNTYPED_RW_MSG_TYPE(inst) == 13)
> @@ -1510,12 +1519,18 @@ int gen_disasm (FILE *file, const void *inst,
> uint32_t deviceID, uint32_t compac
> }
> break;
> case GEN_SFID_DATAPORT1_DATA:
> - format(file, " (bti: %d, rgba: %d, %s, %s, %s)",
> - UNTYPED_RW_BTI(inst),
> - UNTYPED_RW_RGBA(inst),
> -
> data_port_data_cache_simd_mode[UNTYPED_RW_SIMD_MODE(inst)],
> - data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
> -
> data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
> + if(UNTYPED_RW_MSG_TYPE(inst) == 4 ||
> UNTYPED_RW_MSG_TYPE(inst) == 10)
> + format(file, " (bti: %d, %s, %s)",
> + UNTYPED_RW_BTI(inst),
> +
> data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
> +
> data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
> + else
> + format(file, " (bti: %d, rgba: %d, %s, %s, %s)",
> + UNTYPED_RW_BTI(inst),
> + UNTYPED_RW_RGBA(inst),
> +
> data_port_data_cache_simd_mode[UNTYPED_RW_SIMD_MODE(inst)],
> +
> data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)],
> +
> data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]);
> break;
> case GEN_SFID_DATAPORT_CONSTANT:
> format(file, " (bti: %d, %s)",
> diff --git a/backend/src/backend/gen7_encoder.cpp
> b/backend/src/backend/gen7_encoder.cpp
> index fc358be..abb8b77 100644
> --- a/backend/src/backend/gen7_encoder.cpp
> +++ b/backend/src/backend/gen7_encoder.cpp
> @@ -239,5 +239,53 @@ namespace gbe
> }
> }
>
> + static void setMBlockRWGEN7(GenEncoder *p,
> + GenNativeInstruction *insn,
> + uint32_t bti,
> + uint32_t msg_type,
> + uint32_t msg_length,
> + uint32_t response_length)
> + {
> + const GenMessageTarget sfid = GEN_SFID_DATAPORT_RENDER;
> + p->setMessageDescriptor(insn, sfid, msg_length, response_length);
> + insn->bits3.gen7_mblock_rw.msg_type = msg_type;
> + insn->bits3.gen7_mblock_rw.bti = bti;
> + insn->bits3.gen7_mblock_rw.header_present = 1;
> + }
> +
> +
> + void Gen7Encoder::MBREAD(GenRegister dst, GenRegister header,
> uint32_t bti, uint32_t size) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + const uint32_t msg_length = 1;
> + const uint32_t response_length = size; // Size of registers
> + this->setHeader(insn);
> + this->setDst(insn, GenRegister::ud8grf(dst.nr, 0));
> + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setMBlockRWGEN7(this,
> + insn,
> + bti,
> + GEN75_P1_MEDIA_BREAD,
> + msg_length,
> + response_length);
> + }
> +
> + void Gen7Encoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t
> size) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + const uint32_t msg_length = 1 + size;
> + const uint32_t response_length = 0; // Size of registers
> + this->setHeader(insn);
> + this->setDst(insn, GenRegister::retype(GenRegister::null(),
> GEN_TYPE_UW));
> + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setMBlockRWGEN7(this,
> + insn,
> + bti,
> + GEN75_P1_MEDIA_TYPED_BWRITE,
> + msg_length,
> + response_length);
> + }
> +
> +
> #undef NO_SWIZZLE
> }
> diff --git a/backend/src/backend/gen7_encoder.hpp
> b/backend/src/backend/gen7_encoder.hpp
> index 1276c67..edb711d 100644
> --- a/backend/src/backend/gen7_encoder.hpp
> +++ b/backend/src/backend/gen7_encoder.hpp
> @@ -42,6 +42,10 @@ namespace gbe
> virtual void setSrc1(GenNativeInstruction *insn, GenRegister reg);
> virtual void alu3(uint32_t opcode, GenRegister dst,
> GenRegister src0, GenRegister src1, GenRegister src2);
> + /*! MBlock read */
> + virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti,
> uint32_t elemSize);
> + /*! MBlock write */
> + virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t
> elemSize);
> };
> }
> #endif /* __GBE_GEN7_ENCODER_HPP__ */
> diff --git a/backend/src/backend/gen7_instruction.hpp
> b/backend/src/backend/gen7_instruction.hpp
> index 258dd24..7d7eada 100644
> --- a/backend/src/backend/gen7_instruction.hpp
> +++ b/backend/src/backend/gen7_instruction.hpp
> @@ -531,6 +531,22 @@ union Gen7NativeInstruction
> uint32_t uip:16;
> } gen7_branch;
>
> + /*! Data port Media block read / write */
> + struct {
> + uint32_t bti:8;
> + uint32_t ver_line_stride_offset:1;
> + uint32_t ver_line_stride:1;
> + uint32_t ver_line_stride_override:1;
> + uint32_t ignored:3;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_mblock_rw;
> +
> int d;
> uint32_t ud;
> float f;
> diff --git a/backend/src/backend/gen8_instruction.hpp
> b/backend/src/backend/gen8_instruction.hpp
> index ada9ffc..549948a 100644
> --- a/backend/src/backend/gen8_instruction.hpp
> +++ b/backend/src/backend/gen8_instruction.hpp
> @@ -608,6 +608,22 @@ union Gen8NativeInstruction
> uint32_t jip:32;
> } gen8_branch;
>
> + /*! Data port Media block read / write */
> + struct {
> + uint32_t bti:8;
> + uint32_t ver_line_stride_offset:1;
> + uint32_t ver_line_stride:1;
> + uint32_t ver_line_stride_override:1;
> + uint32_t ignored:3;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_mblock_rw;
> +
> int d;
> uint32_t ud;
> float f;
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 90b8b45..98a94ba 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -3538,6 +3538,161 @@ namespace gbe
> p->OBWRITE(header, insn.getbti(), insn.extra.elem);
> }
>
> + void GenContext::emitMBReadInstruction(const SelectionInstruction
> &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const GenRegister coordx = GenRegister::toUniform(ra-
> >genReg(insn.src(0)),GEN_TYPE_D);
> + const GenRegister coordy = GenRegister::toUniform(ra-
> >genReg(insn.src(1)),GEN_TYPE_D);
> + GenRegister header, offsetx, offsety, blocksizereg;
> + if (simdWidth == 8)
> + header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
> + else
> + header = GenRegister::retype(GenRegister::Qn(ra-
> >genReg(insn.src(2)),1), GEN_TYPE_UD);
> +
> + offsetx = GenRegister::offset(header, 0, 0*4);
> + offsety = GenRegister::offset(header, 0, 1*4);
> + blocksizereg = GenRegister::offset(header, 0, 2*4);
> + size_t vec_size = insn.extra.elem;
> + uint32_t blocksize = 0x1F | (vec_size-1) << 16;
> +
> + if (simdWidth == 8)
> + {
> + p->push();
> + // Copy r0 into the header first
> + p->curr.execWidth = 8;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->MOV(header, GenRegister::ud8grf(0,0));
> +
> + // Update the header with the coord
> + p->curr.execWidth = 1;
> + p->MOV(offsetx, coordx);
> + p->MOV(offsety, coordy);
> + // Update block width and height
> + p->MOV(blocksizereg, GenRegister::immud(blocksize));
> + // Now read the data
> + p->curr.execWidth = 8;
> + p->MBREAD(dst, header, insn.getbti(), vec_size);
> + p->pop();
> +
> + }
> + else
> + {
> + const GenRegister tmp = ra->genReg(insn.dst(vec_size));
> + p->push();
> + // Copy r0 into the header first
> + p->curr.execWidth = 8;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->MOV(header, GenRegister::ud8grf(0,0));
> +
> + // First half
> + // Update the header with the coord
> + p->curr.execWidth = 1;
> + p->MOV(offsetx, coordx);
> + p->MOV(offsety, coordy);
> + // Update block width and height
> + p->MOV(blocksizereg, GenRegister::immud(blocksize));
> + // Now read the data
> + p->curr.execWidth = 8;
> + p->MBREAD(tmp, header, insn.getbti(), vec_size);
> +
> + // Second half
> + // Update the header with the coord
> + p->curr.execWidth = 1;
> + p->ADD(offsetx, offsetx, GenRegister::immud(32));
> +
> + const GenRegister tmp2 = GenRegister::offset(tmp, vec_size);
> + // Now read the data
> + p->curr.execWidth = 8;
> + p->MBREAD(tmp2, header, insn.getbti(), vec_size);
> +
> + // Move the reg to fit vector rule.
> + for (int i = 0; i < vec_size; i++) {
> + p->MOV(GenRegister::offset(dst, i * 2), GenRegister::offset(tmp, i));
> + p->MOV(GenRegister::offset(dst, i * 2 + 1),
> + GenRegister::offset(tmp2, i));
> + }
> + p->pop();
> + }
> + }
> +
> + void GenContext::emitMBWriteInstruction(const SelectionInstruction
> &insn) {
> + const GenRegister coordx = GenRegister::toUniform(ra-
> >genReg(insn.src(0)), GEN_TYPE_D);
> + const GenRegister coordy = GenRegister::toUniform(ra-
> >genReg(insn.src(1)), GEN_TYPE_D);
> + GenRegister header, offsetx, offsety, blocksizereg;
> + size_t vec_size = insn.extra.elem;
> + uint32_t blocksize = 0x1F | (vec_size-1) << 16;
> + if (simdWidth == 8)
> + header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD);
> + else
> + header = GenRegister::retype(GenRegister::Qn(ra-
> >genReg(insn.dst(0)),1), GEN_TYPE_UD);
> +
> + offsetx = GenRegister::offset(header, 0, 0*4);
> + offsety = GenRegister::offset(header, 0, 1*4);
> + blocksizereg = GenRegister::offset(header, 0, 2*4);
> +
> + if (simdWidth == 8)
> + {
> + p->push();
> + // Copy r0 into the header first
> + p->curr.execWidth = 8;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->MOV(header, GenRegister::ud8grf(0,0));
> +
> + // Update the header with the coord
> + p->curr.execWidth = 1;
> + p->MOV(offsetx, coordx);
> + p->MOV(offsety, coordy);
> + // Update block width and height
> + p->MOV(blocksizereg, GenRegister::immud(blocksize));
> + p->curr.execWidth = 8;
> + // Mov what we need into msgs
> + for(uint32_t i = 0; i < vec_size; i++)
> + p->MOV(ra->genReg(insn.dst(1 + i)), ra->genReg(insn.src(2 + i)));
> + // Now read the data
> + p->MBWRITE(header, insn.getbti(), vec_size);
> + p->pop();
> +
> + }
> + else
> + {
> + p->push();
> + // Copy r0 into the header first
> + p->curr.execWidth = 8;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->MOV(header, GenRegister::ud8grf(0,0));
> +
> + // First half
> + // Update the header with the coord
> + p->curr.execWidth = 1;
> + p->MOV(offsetx, coordx);
> + p->MOV(offsety, coordy);
> + // Update block width and height
> + p->MOV(blocksizereg, GenRegister::immud(blocksize));
> + // Now read the data
> + p->curr.execWidth = 8;
> + // Mov what we need into msgs
> + for(uint32_t i = 0; i < vec_size; i++)
> + p->MOV(GenRegister::offset(header, 1 + i), ra->genReg(insn.src(2 +
> i)));
> + p->MBWRITE(header, insn.getbti(), vec_size);
> +
> + // Second half
> + // Update the header with the coord
> + p->curr.execWidth = 1;
> + p->ADD(offsetx, offsetx, GenRegister::immud(32));
> +
> + p->curr.execWidth = 8;
> + // Mov what we need into msgs
> + for(uint32_t i = 0; i < vec_size; i++)
> + p->MOV(GenRegister::offset(header, 1 + i), GenRegister::Qn(ra-
> >genReg(insn.src(2 + i)), 1));
> + // Now write the data
> + p->MBWRITE(header, insn.getbti(), vec_size);
> +
> + p->pop();
> + }
> + }
>
> BVAR(OCL_OUTPUT_REG_ALLOC, false);
> BVAR(OCL_OUTPUT_ASM, false);
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index a634338..fb3d4fe 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -189,6 +189,8 @@ namespace gbe
> void afterMessage(const SelectionInstruction &insn, GenRegister bti,
> GenRegister flagTemp, GenRegister btiTmp, unsigned jip0);
> void emitOBReadInstruction(const SelectionInstruction &insn);
> void emitOBWriteInstruction(const SelectionInstruction &insn);
> + void emitMBReadInstruction(const SelectionInstruction &insn);
> + void emitMBWriteInstruction(const SelectionInstruction &insn);
>
> /*! Implements base class */
> virtual Kernel *allocateKernel(void);
> diff --git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index 09cb2ba..66ae5b5 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -784,6 +784,22 @@ union GenNativeInstruction
> uint32_t jip:32;
> } gen8_branch;
>
> + /*! Data port Media block read / write */
> + struct {
> + uint32_t bti:8;
> + uint32_t ver_line_stride_offset:1;
> + uint32_t ver_line_stride:1;
> + uint32_t ver_line_stride_override:1;
> + uint32_t ignored:3;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_mblock_rw;
> +
> int d;
> uint32_t ud;
> float f;
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index e745b9c..eb9fbeb 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -276,6 +276,21 @@ namespace gbe
> insn->bits3.gen7_oblock_rw.header_present = 1;
> }
>
> + static void setMBlockRW(GenEncoder *p,
> + GenNativeInstruction *insn,
> + uint32_t bti,
> + uint32_t msg_type,
> + uint32_t msg_length,
> + uint32_t response_length)
> + {
> + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
> + p->setMessageDescriptor(insn, sfid, msg_length, response_length);
> + insn->bits3.gen7_mblock_rw.msg_type = msg_type;
> + insn->bits3.gen7_mblock_rw.bti = bti;
> + insn->bits3.gen7_mblock_rw.header_present = 1;
> + }
> +
> +
> static void setDWordScatterMessgae(GenEncoder *p,
> GenNativeInstruction *insn,
> uint32_t bti,
> @@ -1277,6 +1292,38 @@ namespace gbe
> response_length);
> }
>
> + void GenEncoder::MBREAD(GenRegister dst, GenRegister header,
> uint32_t bti, uint32_t size) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + const uint32_t msg_length = 1;
> + const uint32_t response_length = size; // Size of registers
> + this->setHeader(insn);
> + this->setDst(insn, GenRegister::ud8grf(dst.nr, 0));
> + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setMBlockRW(this,
> + insn,
> + bti,
> + GEN75_P1_MEDIA_BREAD,
> + msg_length,
> + response_length);
> + }
> +
> + void GenEncoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t
> size) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + const uint32_t msg_length = 1 + size;
> + const uint32_t response_length = 0; // Size of registers
> + this->setHeader(insn);
> + this->setDst(insn, GenRegister::retype(GenRegister::null(),
> GEN_TYPE_UW));
> + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setMBlockRW(this,
> + insn,
> + bti,
> + GEN75_P1_MEDIA_TYPED_BWRITE,
> + msg_length,
> + response_length);
> + }
> +
> void GenEncoder::EOT(uint32_t msg) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setDst(insn, GenRegister::retype(GenRegister::null(),
> GEN_TYPE_UD));
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index a53c879..4979305 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -271,6 +271,10 @@ namespace gbe
> void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t
> elemSize);
> /*! OBlock write */
> void OBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize);
> + /*! MBlock read */
> + virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti,
> uint32_t elemSize);
> + /*! MBlock write */
> + virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t
> elemSize);
>
> GBE_CLASS(GenEncoder); //!< Use custom allocators
> virtual void alu3(uint32_t opcode, GenRegister dst,
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index d297726..c396626 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -52,3 +52,5 @@ DECL_GEN7_SCHEDULE(SubGroupOp, 80, 1, 1)
> DECL_GEN7_SCHEDULE(Printf, 80, 1, 1)
> DECL_GEN7_SCHEDULE(OBRead, 80, 1, 1)
> DECL_GEN7_SCHEDULE(OBWrite, 80, 1, 1)
> +DECL_GEN7_SCHEDULE(MBRead, 80, 1, 1)
> +DECL_GEN7_SCHEDULE(MBWrite, 80, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index e974e97..d3c5a40c 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -189,7 +189,8 @@ namespace gbe
> this->opcode == SEL_OP_SAMPLE ||
> this->opcode == SEL_OP_VME ||
> this->opcode == SEL_OP_DWORD_GATHER ||
> - this->opcode == SEL_OP_OBREAD;
> + this->opcode == SEL_OP_OBREAD ||
> + this->opcode == SEL_OP_MBREAD;
> }
>
> bool SelectionInstruction::modAcc(void) const {
> @@ -212,7 +213,8 @@ namespace gbe
> this->opcode == SEL_OP_ATOMIC ||
> this->opcode == SEL_OP_BYTE_SCATTER ||
> this->opcode == SEL_OP_TYPED_WRITE ||
> - this->opcode == SEL_OP_OBWRITE;
> + this->opcode == SEL_OP_OBWRITE ||
> + this->opcode == SEL_OP_MBWRITE;
> }
>
> bool SelectionInstruction::isBranch(void) const {
> @@ -703,6 +705,10 @@ namespace gbe
> void OBREAD(GenRegister dst, GenRegister addr, GenRegister header,
> uint32_t bti, uint32_t size);
> /*! Oblock write */
> void OBWRITE(GenRegister addr, GenRegister value, GenRegister header,
> uint32_t bti, uint32_t size);
> + /*! Media block read */
> + void MBREAD(GenRegister* dsts, GenRegister coordx, GenRegister
> coordy, GenRegister header, GenRegister* tmp, uint32_t bti, uint32_t
> vec_size);
> + /*! Media block write */
> + void MBWRITE(GenRegister coordx, GenRegister coordy, GenRegister*
> values, GenRegister header, GenRegister* tmp, uint32_t bti, uint32_t
> vec_size);
>
> /* common functions for both binary instruction and sel_cmp and
> compare instruction.
> It will handle the IMM or normal register assignment, and will try to avoid
> LOADI
> @@ -2055,6 +2061,63 @@ namespace gbe
> vector->isSrc = 1;
> }
>
> + void Selection::Opaque::MBREAD(GenRegister* dsts,
> + GenRegister coordx,
> + GenRegister coordy,
> + GenRegister header,
> + GenRegister* tmp,
> + uint32_t bti,
> + uint32_t vec_size) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD,
> vec_size * 2, 3);
> + SelectionVector *vector = this->appendVector();
> + SelectionVector *vectortmp = this->appendVector();
> + for (uint32_t i = 0; i < vec_size; ++i) {
> + insn->dst(i) = dsts[i];
> + insn->dst(i + vec_size) = tmp[i];
> + }
> + insn->src(0) = coordx;
> + insn->src(1) = coordy;
> + insn->src(2) = header;
> + insn->setbti(bti);
> + insn->extra.elem = vec_size; // vector size
> +
> + vector->regNum = vec_size;
> + vector->reg = &insn->dst(0);
> + vector->offsetID = 0;
> + vector->isSrc = 0;
> + vectortmp->regNum = vec_size;
> + vectortmp->reg = &insn->dst(vec_size);
> + vectortmp->offsetID = 0;
> + vectortmp->isSrc = 0;
> +
> + }
> +
> + void Selection::Opaque::MBWRITE(GenRegister coordx,
> + GenRegister coordy,
> + GenRegister* values,
> + GenRegister header,
> + GenRegister* tmp,
> + uint32_t bti,
> + uint32_t vec_size) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_MBWRITE, 1 +
> vec_size, 2 + vec_size);
> + SelectionVector *vector = this->appendVector();
> + insn->src(0) = coordx;
> + insn->src(1) = coordy;
> + for (uint32_t i = 0; i < vec_size; ++i)
> + insn->src(2 + i) = values[i];
> + insn->dst(0) = header;
> + for (uint32_t i = 0; i < vec_size; ++i)
> + insn->dst(1 + i) = tmp[i];
> + insn->state = this->curr;
> + insn->setbti(bti);
> + insn->extra.elem = vec_size; // vector size
> +
> + // We need to put the header and the data together
> + vector->regNum = 1 + vec_size;
> + vector->reg = &insn->dst(0);
> + vector->offsetID = 0;
> + vector->isSrc = 0;
> + }
>
> // Boiler plate to initialize the selection library at c++ pre-main
> static SelectionLibrary *selLib = NULL;
> @@ -6583,6 +6646,52 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
> }
> };
>
> + /*! Media Block Read pattern */
> + DECL_PATTERN(MediaBlockReadInstruction)
> + {
> + bool emitOne(Selection::Opaque &sel, const
> ir::MediaBlockReadInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + uint32_t vec_size = insn.getVectorSize();
> + vector<GenRegister> valuesVec;
> + vector<GenRegister> tmpVec;
> + for (uint32_t i = 0; i < vec_size; ++i) {
> + valuesVec.push_back(sel.selReg(insn.getSrc(i), TYPE_U32));
> + tmpVec.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
> + }
> + const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32);
> + const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32);
> + const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD),
> TYPE_U32);
> + sel.MBREAD(values, coordx, coordy, header, tmp, insn.getImageIndex(),
> insn.getVectorSize());
> + return true;
> + }
> + DECL_CTOR(MediaBlockReadInstruction, 1, 1);
> + };
> +
> + /*! Media Block Write pattern */
> + DECL_PATTERN(MediaBlockWriteInstruction)
> + {
> + bool emitOne(Selection::Opaque &sel, const
> ir::MediaBlockWriteInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + uint32_t vec_size = insn.getVectorSize();
> + const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32);
> + const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32);
> + vector<GenRegister> valuesVec;
> + vector<GenRegister> tmpVec;
> + for(uint32_t i = 0; i < vec_size; i++)
> + {
> + valuesVec.push_back(sel.selReg(insn.getSrc(2 + i), TYPE_U32));
> + tmpVec.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
> + }
> + const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD),
> TYPE_U32);
> + sel.MBWRITE(coordx, coordy, &valuesVec[0], header, &tmpVec[0],
> insn.getImageIndex(), vec_size);
> + return true;
> + }
> + DECL_CTOR(MediaBlockWriteInstruction, 1, 1);
> + };
> +
> +
> /*! Sort patterns */
> INLINE bool cmp(const SelectionPattern *p0, const SelectionPattern *p1) {
> if (p0->insnNum != p1->insnNum)
> @@ -6624,6 +6733,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
> this->insert<NullaryInstructionPattern>();
> this->insert<WaitInstructionPattern>();
> this->insert<PrintfInstructionPattern>();
> + this->insert<MediaBlockReadInstructionPattern>();
> + this->insert<MediaBlockWriteInstructionPattern>();
>
> // Sort all the patterns with the number of instructions they output
> for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 51af686..b481de8 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -177,6 +177,8 @@ namespace gbe
> switch (opcode) {
> case SEL_OP_OBREAD:
> case SEL_OP_OBWRITE:
> + case SEL_OP_MBREAD:
> + case SEL_OP_MBWRITE:
> case SEL_OP_DWORD_GATHER: return extra.function;
> case SEL_OP_SAMPLE: return extra.rdbti;
> case SEL_OP_VME: return extra.vme_bti;
> @@ -192,6 +194,8 @@ namespace gbe
> switch (opcode) {
> case SEL_OP_OBREAD:
> case SEL_OP_OBWRITE:
> + case SEL_OP_MBREAD:
> + case SEL_OP_MBWRITE:
> case SEL_OP_DWORD_GATHER: extra.function = bti; return;
> case SEL_OP_SAMPLE: extra.rdbti = bti; return;
> case SEL_OP_VME: extra.vme_bti = bti; return;
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 4a7caff..ccaf526 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -98,3 +98,5 @@ DECL_SELECTION_IR(SUBGROUP_OP,
> SubGroupOpInstruction)
> DECL_SELECTION_IR(PRINTF, PrintfInstruction)
> DECL_SELECTION_IR(OBREAD, OBReadInstruction)
> DECL_SELECTION_IR(OBWRITE, OBWriteInstruction)
> +DECL_SELECTION_IR(MBREAD, MBReadInstruction)
> +DECL_SELECTION_IR(MBWRITE, MBWriteInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 88491a7..ed64580 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1064,6 +1064,78 @@ namespace ir {
> Register dst[1];
> };
>
> + class ALIGNED_INSTRUCTION MediaBlockReadInstruction :
> + public BasePolicy,
> + public TupleSrcPolicy<MediaBlockReadInstruction>,
> + public TupleDstPolicy<MediaBlockReadInstruction>
> + {
> + public:
> + INLINE MediaBlockReadInstruction(uint8_t imageIdx, Tuple dst, uint8_t
> vec_size, Tuple srcTuple, uint8_t srcNum) {
> + this->opcode = OP_MBREAD;
> + this->dst = dst;
> + this->dstNum = vec_size;
> + this->src = srcTuple;
> + this->srcNum = srcNum;
> + this->imageIdx = imageIdx;
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const {
> + this->outOpcode(out);
> + out << (int)this->getVectorSize();
> + out << " {";
> + for (uint32_t i = 0; i < dstNum; ++i)
> + out << "%" << this->getDst(fn, i) << (i != (dstNum-1u) ? " " : "");
> + out << "}";
> + out << " 2D surface id " << (int)this->getImageIndex()
> + << " byte coord x %" << this->getSrc(fn, 0)
> + << " row coord y %" << this->getSrc(fn, 1);
> + }
> + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
> + INLINE uint8_t getVectorSize(void) const { return this->dstNum; }
> +
> + Tuple src;
> + Tuple dst;
> + uint8_t imageIdx;
> + uint8_t srcNum;
> + uint8_t dstNum;
> + };
> +
> + class ALIGNED_INSTRUCTION MediaBlockWriteInstruction :
> + public BasePolicy,
> + public TupleSrcPolicy<MediaBlockWriteInstruction>,
> + public NDstPolicy<MediaBlockWriteInstruction, 0>
> + {
> + public:
> +
> + INLINE MediaBlockWriteInstruction(uint8_t imageIdx, Tuple srcTuple,
> uint8_t srcNum, uint8_t vec_size) {
> + this->opcode = OP_MBWRITE;
> + this->src = srcTuple;
> + this->srcNum = srcNum;
> + this->imageIdx = imageIdx;
> + this->vec_size = vec_size;
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const {
> + this->outOpcode(out);
> + out << (int)this->getVectorSize()
> + << " 2D surface id " << (int)this->getImageIndex()
> + << " byte coord x %" << this->getSrc(fn, 0)
> + << " row coord y %" << this->getSrc(fn, 1);
> + out << " {";
> + for (uint32_t i = 0; i < vec_size; ++i)
> + out << "%" << this->getSrc(fn, i + 2) << (i != (vec_size-1u) ? " " : "");
> + out << "}";
> + }
> + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
> + INLINE uint8_t getVectorSize(void) const { return this->vec_size; }
> +
> + Tuple src;
> + Register dst[0];
> + uint8_t imageIdx;
> + uint8_t srcNum;
> + uint8_t vec_size;
> + };
> +
> #undef ALIGNED_INSTRUCTION
>
> /////////////////////////////////////////////////////////////////////////
> @@ -1591,6 +1663,22 @@ namespace ir {
> return true;
> }
>
> + INLINE bool MediaBlockReadInstruction::wellFormed(const Function &fn,
> std::string &whyNot) const {
> + if (this->srcNum != 2) {
> + whyNot = "Wrong number of source.";
> + return false;
> + }
> + return true;
> + }
> +
> + INLINE bool MediaBlockWriteInstruction::wellFormed(const Function &fn,
> std::string &whyNot) const {
> + if (this->srcNum != 2 + this->vec_size) {
> + whyNot = "Wrong number of source.";
> + return false;
> + }
> + return true;
> + }
> +
> #undef CHECK_TYPE
>
> /////////////////////////////////////////////////////////////////////////
> @@ -2058,6 +2146,14 @@ START_INTROSPECTION(PrintfInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(PrintfInstruction)
>
> +START_INTROSPECTION(MediaBlockReadInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(MediaBlockReadInstruction)
> +
> +START_INTROSPECTION(MediaBlockWriteInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(MediaBlockWriteInstruction)
> +
> #undef END_INTROSPECTION
> #undef START_INTROSPECTION
> #undef DECL_INSN
> @@ -2205,7 +2301,8 @@ END_FUNCTION(Instruction, Register)
> opcode == OP_CALC_TIMESTAMP ||
> opcode == OP_STORE_PROFILING ||
> opcode == OP_WAIT ||
> - opcode == OP_PRINTF;
> + opcode == OP_PRINTF ||
> + opcode == OP_MBWRITE;
> }
>
> #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
> @@ -2275,6 +2372,10 @@ DECL_MEM_FN(SubGroupInstruction,
> WorkGroupOps, getWorkGroupOpcode(void), getWork
> DECL_MEM_FN(PrintfInstruction, uint32_t, getNum(void), getNum())
> DECL_MEM_FN(PrintfInstruction, uint32_t, getBti(void), getBti())
> DECL_MEM_FN(PrintfInstruction, Type, getType(const Function& fn,
> uint32_t ID), getType(fn, ID))
> +DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getVectorSize(void),
> getVectorSize())
> +DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getVectorSize(void),
> getVectorSize())
>
> #undef DECL_MEM_FN
>
> @@ -2582,6 +2683,15 @@ DECL_MEM_FN(MemInstruction, void,
> setBtiReg(Register reg), setBtiReg(reg))
> return internal::PrintfInstruction(dst, srcTuple, typeTuple, srcNum, bti,
> num).convert();
> }
>
> + Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size,
> Tuple coord, uint8_t srcNum) {
> + return internal::MediaBlockReadInstruction(imageIndex, dst, vec_size,
> coord, srcNum).convert();
> + }
> +
> + Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum,
> uint8_t vec_size) {
> + return internal::MediaBlockWriteInstruction(imageIndex, srcTuple,
> srcNum, vec_size).convert();
> + }
> +
> +
> std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
> const Function &fn = insn.getFunction();
> const BasicBlock *bb = insn.getParent();
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 4e7d5b7..b2b0b49 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -635,6 +635,24 @@ namespace ir {
> static bool isClassOf(const Instruction &insn);
> };
>
> + /*! Media Block Read. */
> + class MediaBlockReadInstruction : public Instruction {
> + public:
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn);
> + uint8_t getImageIndex() const;
> + uint8_t getVectorSize() const;
> + };
> +
> + /*! Media Block Write. */
> + class MediaBlockWriteInstruction : public Instruction {
> + public:
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn);
> + uint8_t getImageIndex() const;
> + uint8_t getVectorSize() const;
> + };
> +
> /*! Specialize the instruction. Also performs typechecking first based on the
> * opcode. Crashes if it fails
> */
> @@ -867,6 +885,10 @@ namespace ir {
> Instruction SUBGROUP(WorkGroupOps opcode, Register dst, Tuple
> srcTuple, uint8_t srcNum, Type type);
> /*! printf */
> Instruction PRINTF(Register dst, Tuple srcTuple, Tuple typeTuple, uint8_t
> srcNum, uint8_t bti, uint16_t num);
> + /*! media block read */
> + Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size,
> Tuple coord, uint8_t srcNum);
> + /*! media block write */
> + Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum,
> uint8_t vec_size);
> } /* namespace ir */
> } /* namespace gbe */
>
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index 57e13eb..7d755ae 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -114,3 +114,5 @@ DECL_INSN(WAIT, WaitInstruction)
> DECL_INSN(WORKGROUP, WorkGroupInstruction)
> DECL_INSN(SUBGROUP, SubGroupInstruction)
> DECL_INSN(PRINTF, PrintfInstruction)
> +DECL_INSN(MBREAD, MediaBlockReadInstruction)
> +DECL_INSN(MBWRITE, MediaBlockWriteInstruction)
> diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
> index 3162d13..43d4c87 100644
> --- a/backend/src/ir/liveness.cpp
> +++ b/backend/src/ir/liveness.cpp
> @@ -118,7 +118,8 @@ namespace ir {
> uniform = false;
>
> // do not change dst uniform for block read
> - if (insn.getOpcode() == ir::OP_LOAD &&
> ir::cast<ir::LoadInstruction>(insn).isBlock())
> + if ((insn.getOpcode() == ir::OP_LOAD &&
> ir::cast<ir::LoadInstruction>(insn).isBlock()) ||
> + insn.getOpcode() == ir::OP_MBREAD)
> uniform = false;
>
> for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
> diff --git a/backend/src/libocl/src/ocl_substore.ll
> b/backend/src/libocl/src/ocl_substore.ll
> index 665cdfa..f6c2c70 100644
> --- a/backend/src/libocl/src/ocl_substore.ll
> +++ b/backend/src/libocl/src/ocl_substore.ll
> @@ -1,9 +1,42 @@
> target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-
> v192:256-v256:256-v512:512-v1024:1024"
> target triple = "spir"
> +%opencl.image2d_t = type opaque
>
> declare void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)*
> nocapture, i32) nounwind alwaysinline noduplicate
> +declare void
> @__gen_ocl_sub_group_block_write_image(%opencl.image2d_t
> addrspace(1)*, i32, i32, i32) nounwind alwaysinline noduplicate
> +declare void
> @__gen_ocl_sub_group_block_write_image2(%opencl.image2d_t
> addrspace(1)*, i32, i32, <2 x i32>) nounwind alwaysinline noduplicate
> +declare void
> @__gen_ocl_sub_group_block_write_image4(%opencl.image2d_t
> addrspace(1)*, i32, i32, <4 x i32>) nounwind alwaysinline noduplicate
> +declare void
> @__gen_ocl_sub_group_block_write_image8(%opencl.image2d_t
> addrspace(1)*, i32, i32, <8 x i32>) nounwind alwaysinline noduplicate
>
> define void @_Z27intel_sub_group_block_writePKU3AS1jj(i32
> addrspace(1)* %p, i32 %data) nounwind alwaysinline noduplicate {
> call void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* %p,
> i32 %data)
> ret void
> }
> +
> +define void
> @_Z27intel_sub_group_block_write11ocl_image2dDv2_ij(%opencl.image2d
> _t addrspace(1)* %image, <2 x i32> %byte_coord, i32 %data) nounwind
> alwaysinline noduplicate {
> + %1 = extractelement <2 x i32> %byte_coord, i32 0
> + %2 = extractelement <2 x i32> %byte_coord, i32 1
> + call void @__gen_ocl_sub_group_block_write_image(%opencl.image2d_t
> addrspace(1)* %image, i32 %1, i32 %2, i32 %data)
> + ret void
> +}
> +
> +define void
> @_Z28intel_sub_group_block_write211ocl_image2dDv2_iDv2_j(%opencl.im
> age2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <2 x i32> %data)
> nounwind alwaysinline noduplicate {
> + %1 = extractelement <2 x i32> %byte_coord, i32 0
> + %2 = extractelement <2 x i32> %byte_coord, i32 1
> + call void
> @__gen_ocl_sub_group_block_write_image2(%opencl.image2d_t
> addrspace(1)* %image, i32 %1, i32 %2, <2 x i32> %data)
> + ret void
> +}
> +
> +define void
> @_Z28intel_sub_group_block_write411ocl_image2dDv2_iDv4_j(%opencl.im
> age2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <4 x i32> %data)
> nounwind alwaysinline noduplicate {
> + %1 = extractelement <2 x i32> %byte_coord, i32 0
> + %2 = extractelement <2 x i32> %byte_coord, i32 1
> + call void
> @__gen_ocl_sub_group_block_write_image4(%opencl.image2d_t
> addrspace(1)* %image, i32 %1, i32 %2, <4 x i32> %data)
> + ret void
> +}
> +
> +define void
> @_Z28intel_sub_group_block_write811ocl_image2dDv2_iDv8_j(%opencl.im
> age2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <8 x i32> %data)
> nounwind alwaysinline noduplicate {
> + %1 = extractelement <2 x i32> %byte_coord, i32 0
> + %2 = extractelement <2 x i32> %byte_coord, i32 1
> + call void
> @__gen_ocl_sub_group_block_write_image8(%opencl.image2d_t
> addrspace(1)* %image, i32 %1, i32 %2, <8 x i32> %data)
> + ret void
> +}
> diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> index 66490cc..753a045 100644
> --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
> @@ -187,3 +187,24 @@ OVERLOADABLE void
> intel_sub_group_block_write8(const global uint* p,uint8 data)
> intel_sub_group_block_write(p + get_simd_size() * 6, data.s6);
> intel_sub_group_block_write(p + get_simd_size() * 7, data.s7);
> }
> +
> +PURE CONST uint __gen_ocl_sub_group_block_read_image(image2d_t p,
> int x, int y);
> +PURE CONST uint2 __gen_ocl_sub_group_block_read_image2(image2d_t p,
> int x, int y);
> +PURE CONST uint4 __gen_ocl_sub_group_block_read_image4(image2d_t p,
> int x, int y);
> +PURE CONST uint8 __gen_ocl_sub_group_block_read_image8(image2d_t p,
> int x, int y);
> +OVERLOADABLE uint intel_sub_group_block_read(image2d_t p, int2 cord)
> +{
> + return __gen_ocl_sub_group_block_read_image(p, cord.x, cord.y);
> +}
> +OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t p, int2 cord)
> +{
> + return __gen_ocl_sub_group_block_read_image2(p, cord.x, cord.y);
> +}
> +OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t p, int2 cord)
> +{
> + return __gen_ocl_sub_group_block_read_image4(p, cord.x, cord.y);
> +}
> +OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t p, int2 cord)
> +{
> + return __gen_ocl_sub_group_block_read_image8(p, cord.x, cord.y);
> +}
> diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> index d0676be..799f772 100644
> --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
> @@ -143,3 +143,13 @@ OVERLOADABLE void
> intel_sub_group_block_write(const __global uint* p, uint data)
> OVERLOADABLE void intel_sub_group_block_write2(const __global uint* p,
> uint2 data);
> OVERLOADABLE void intel_sub_group_block_write4(const __global uint* p,
> uint4 data);
> OVERLOADABLE void intel_sub_group_block_write8(const __global uint* p,
> uint8 data);
> +
> +OVERLOADABLE uint intel_sub_group_block_read(image2d_t image, int2
> byte_coord);
> +OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t image, int2
> byte_coord);
> +OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t image, int2
> byte_coord);
> +OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t image, int2
> byte_coord);
> +
> +OVERLOADABLE void intel_sub_group_block_write(image2d_t image, int2
> byte_coord, uint data);
> +OVERLOADABLE void intel_sub_group_block_write2(image2d_t image, int2
> byte_coord, uint2 data);
> +OVERLOADABLE void intel_sub_group_block_write4(image2d_t image, int2
> byte_coord, uint4 data);
> +OVERLOADABLE void intel_sub_group_block_write8(image2d_t image, int2
> byte_coord, uint8 data);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index ffa838c..2dcf308 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -699,6 +699,7 @@ namespace gbe
> void emitSubGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps
> opcode);
> // Emit subgroup instructions
> void emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite);
> + void emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, bool isWrite,
> uint8_t vec_size);
>
> uint8_t appendSampler(CallSite::arg_iterator AI);
> uint8_t getImageID(CallInst &I);
> @@ -3744,10 +3745,12 @@ namespace gbe
> case GEN_OCL_SUB_GROUP_SCAN_INCLUSIVE_MAX:
> case GEN_OCL_SUB_GROUP_SCAN_INCLUSIVE_MIN:
> case GEN_OCL_LRP:
> - this->newRegister(&I);
> - break;
> case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM:
> - this->newRegister(&I, NULL, false);
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE:
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2:
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4:
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8:
> + this->newRegister(&I);
> break;
> case GEN_OCL_PRINTF:
> this->newRegister(&I); // fall through
> @@ -3764,6 +3767,10 @@ namespace gbe
> case GEN_OCL_STORE_PROFILING:
> case GEN_OCL_DEBUGWAIT:
> case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8:
> break;
> case GEN_OCL_NOT_FOUND:
> default:
> @@ -4013,6 +4020,39 @@ namespace gbe
> GBE_ASSERT(AI == AE);
> }
>
> + void GenWriter::emitBlockReadWriteImageInst(CallInst &I, CallSite &CS,
> bool isWrite, uint8_t vec_size) {
> + CallSite::arg_iterator AI = CS.arg_begin();
> + CallSite::arg_iterator AE = CS.arg_end();
> + GBE_ASSERT(AI != AE);
> +
> + const uint8_t imageID = getImageID(I);
> + AI++;
> +
> + if(isWrite){
> + vector<ir::Register> srcTupleData;
> + srcTupleData.push_back(getRegister(*(AI++)));
> + srcTupleData.push_back(getRegister(*(AI++)));
> + for(int i = 0;i < vec_size; i++)
> + srcTupleData.push_back(getRegister(*(AI), i));
> + AI++;
> + const ir::Tuple srctuple = ctx.arrayTuple(&srcTupleData[0], 2 + vec_size);
> + ctx.MBWRITE(imageID, srctuple, 2 + vec_size, vec_size);
> + } else {
> + ir::Register src[2];
> + src[0] = getRegister(*(AI++));
> + src[1] = getRegister(*(AI++));
> + vector<ir::Register> dstTupleData;
> + for(int i = 0;i < vec_size; i++)
> + dstTupleData.push_back(getRegister(&I, i));
> + const ir::Tuple srctuple = ctx.arrayTuple(src, 2);
> + const ir::Tuple dsttuple = ctx.arrayTuple(&dstTupleData[0], vec_size);
> + ctx.MBREAD(imageID, dsttuple, vec_size, srctuple, 2);
> + }
> +
> + GBE_ASSERT(AI == AE);
> + }
> +
> +
> /* append a new sampler. should be called before any reference to
> * a sampler_t value. */
> uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) {
> @@ -4841,6 +4881,22 @@ namespace gbe
> this->emitBlockReadWriteMemInst(I, CS, false); break;
> case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM:
> this->emitBlockReadWriteMemInst(I, CS, true); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE:
> + this->emitBlockReadWriteImageInst(I, CS, false, 1); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2:
> + this->emitBlockReadWriteImageInst(I, CS, false, 2); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4:
> + this->emitBlockReadWriteImageInst(I, CS, false, 4); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8:
> + this->emitBlockReadWriteImageInst(I, CS, false, 8); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE:
> + this->emitBlockReadWriteImageInst(I, CS, true, 1); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2:
> + this->emitBlockReadWriteImageInst(I, CS, true, 2); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4:
> + this->emitBlockReadWriteImageInst(I, CS, true, 4); break;
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8:
> + this->emitBlockReadWriteImageInst(I, CS, true, 8); break;
> default: break;
> }
> }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 003be91..456ab58 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -219,6 +219,14 @@
> DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MIN,
> __gen_ocl_sub_group_scan_in
>
> DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_MEM,
> __gen_ocl_sub_group_block_read_mem)
> DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_MEM,
> __gen_ocl_sub_group_block_write_mem)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE,
> __gen_ocl_sub_group_block_read_image)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE2,
> __gen_ocl_sub_group_block_read_image2)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE4,
> __gen_ocl_sub_group_block_read_image4)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE8,
> __gen_ocl_sub_group_block_read_image8)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE,
> __gen_ocl_sub_group_block_write_image)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE2,
> __gen_ocl_sub_group_block_write_image2)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE4,
> __gen_ocl_sub_group_block_write_image4)
> +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE8,
> __gen_ocl_sub_group_block_write_image8)
>
> // common function
> DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index 53fd320..e60bf4b 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -682,7 +682,21 @@ namespace gbe {
> *CI = InsertToVector(call, *CI);
> break;
> }
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4:
> + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8:
> + {
> + ++CI;
> + ++CI;
> + if ((*CI)->getType()->isVectorTy())
> + *CI = InsertToVector(call, *CI);
> + break;
> + }
> case GEN_OCL_VME:
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2:
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4:
> + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8:
> setAppendPoint(call);
> extractFromVector(call);
> break;
> --
> 2.7.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list