[Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
Song, Ruiling
ruiling.song at intel.com
Mon Nov 9 19:44:07 PST 2015
This version patchset LGTM.
Thanks!
Ruiling
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Chuanbo Weng
> Sent: Friday, November 6, 2015 11:28 AM
> To: beignet at lists.freedesktop.org
> Cc: Weng, Chuanbo
> Subject: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
>
> __gen_ocl_vme is used for hardware accelerated video motion estimation.
> It gets payload values as parameters and uses MOV to pass these payload
> values to VME SEND Message's payload grfs. The int8 return value is used
> to store SEND Message writeback.
>
> v2:
> Remove unnecessary 5 parameters(src_grf*) of built-in function(we just
> need to allocate related registers in gen_insn_selection step).
>
> v3:
> Remove redundant code and change MAX_SRC_NUM to 40.
>
> v4:
> Choose message response length by message type instead of hard code.
>
> v5:
> Choose message response length by message type in the whole backend
> pipeline.
>
> v6:
> Treat simd8 and simd16 differently when mov payload value to consecutive
> payload grfs.
>
> Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 14 ++++
> backend/src/backend/gen7_instruction.hpp | 15 ++++
> backend/src/backend/gen_context.cpp | 98 ++++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 1 +
> backend/src/backend/gen_defs.hpp | 15 ++++
> backend/src/backend/gen_encoder.cpp | 44 ++++++++++
> backend/src/backend/gen_encoder.hpp | 13 +++
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 73 ++++++++++++++++
> backend/src/backend/gen_insn_selection.hpp | 14 +++-
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/ir/instruction.cpp | 66 +++++++++++++++
> backend/src/ir/instruction.hpp | 17 +++-
> backend/src/ir/instruction.hxx | 1 +
> backend/src/libocl/include/ocl_misc.h | 15 ++++
> backend/src/llvm/llvm_gen_backend.cpp | 47 +++++++++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 2 +
> backend/src/llvm/llvm_scalarize.cpp | 4 +
> 18 files changed, 436 insertions(+), 5 deletions(-)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index 5b71cfa..3198da7 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -476,6 +476,13 @@ static int column;
>
> static int gen_version;
>
> +#define GEN7_BITS_FIELD(inst, gen7) \
> + ({ \
> + int bits; \
> + bits = ((const union Gen7NativeInstruction *)inst)->gen7; \
> + bits; \
> + })
> +
> #define GEN_BITS_FIELD(inst, gen) \
> ({ \
> int bits; \
> @@ -530,6 +537,8 @@ static int gen_version;
> #define EXECUTION_SIZE(inst) GEN_BITS_FIELD(inst, header.execution_size)
> #define BRANCH_JIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.jip,
> bits3.gen8_branch.jip/8)
> #define BRANCH_UIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.uip,
> bits2.gen8_branch.uip/8)
> +#define VME_BTI(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti)
> +#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst,
> bits3.vme_gen7.msg_type)
> #define SAMPLE_BTI(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti)
> #define SAMPLER(inst) GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.sampler)
> #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.msg_type)
> @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t
> deviceID, uint32_t compac
>
> if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, bits2.da1.src1_reg_file) ==
> GEN_IMMEDIATE_VALUE) {
> switch (target) {
> + case GEN_SFID_VIDEO_MOTION_EST:
> + format(file, " (bti: %d, msg_type: %d)",
> + VME_BTI(inst),
> + VME_MSG_TYPE(inst));
> + break;
> case GEN_SFID_SAMPLER:
> format(file, " (%d, %d, %d, %d)",
> SAMPLE_BTI(inst),
> diff --git a/backend/src/backend/gen7_instruction.hpp
> b/backend/src/backend/gen7_instruction.hpp
> index 51f342b..258dd24 100644
> --- a/backend/src/backend/gen7_instruction.hpp
> +++ b/backend/src/backend/gen7_instruction.hpp
> @@ -350,6 +350,21 @@ union Gen7NativeInstruction
> uint32_t end_of_thread:1;
> } sampler_gen7;
>
> + struct {
> + uint32_t bti:8;
> + uint32_t vme_search_path_lut:3;
> + uint32_t lut_sub:2;
> + uint32_t msg_type:2;
> + uint32_t stream_in:1;
> + uint32_t stream_out:1;
> + uint32_t reserved_mbz:2;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } vme_gen7;
> +
> /**
> * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> *
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 4e2ebfb..ccc9f17 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2266,6 +2266,104 @@ namespace gbe
> p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0,
> insn.extra.isLD, insn.extra.isUniform);
> }
>
> + void GenContext::emitVmeInstruction(const SelectionInstruction &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const unsigned int msg_type = insn.extra.msg_type;
> +
> + GBE_ASSERT(msg_type == 1);
> + int rsp_len;
> + if(msg_type == 1)
> + rsp_len = 6;
> + uint32_t execWidth_org = p->curr.execWidth;
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + /* Use MOV to Setup bits of payload: mov payload value stored in insn.src(x)
> to
> + * 5 consecutive payload grf.
> + * In simd8 mode, one virtual grf register map to one physical grf register.
> But
> + * in simd16 mode, one virtual grf register map to two physical grf registers.
> + * So we should treat them differently.
> + * */
> + if(execWidth_org == 8){
> + for(int i=0; i < 5; i++){
> + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*8+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + }
> + else if(execWidth_org == 16){
> + for(int i=0; i < 2; i++){
> + for(int k = 0; k < 2; k++){
> + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> + payload_grf.nr += k;
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + }
> + {
> + int i = 2;
> + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*16+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + }
> + p->pop();
> +
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + GenRegister payload_did = GenRegister::retype(ra-
> >genReg(insn.dst(rsp_len)), GEN_TYPE_UB);
> + payload_did.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_did.width = GEN_WIDTH_1;
> + payload_did.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_did.subphysical = 1;
> + payload_did.subnr = 20 * typeSize(GEN_TYPE_UB);
> + GenRegister grf0 = GenRegister::ub1grf(0, 20);
> + p->MOV(payload_did, grf0);
> + p->pop();
> +
> + const GenRegister msgPayload = ra->genReg(insn.dst(rsp_len));
> + const unsigned char bti = insn.getbti();
> + const unsigned int vme_search_path_lut = insn.extra.vme_search_path_lut;
> + const unsigned int lut_sub = insn.extra.lut_sub;
> + p->VME(bti, dst, msgPayload, msg_type, vme_search_path_lut, lut_sub);
> + }
> +
> void GenContext::scratchWrite(const GenRegister header, uint32_t offset,
> uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
> p->push();
> uint32_t simdWidth = p->curr.execWidth;
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 4044694..870266c 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -167,6 +167,7 @@ namespace gbe
> virtual void emitUnpackLongInstruction(const SelectionInstruction &insn);
> void emitDWordGatherInstruction(const SelectionInstruction &insn);
> void emitSampleInstruction(const SelectionInstruction &insn);
> + void emitVmeInstruction(const SelectionInstruction &insn);
> void emitTypedWriteInstruction(const SelectionInstruction &insn);
> void emitSpillRegInstruction(const SelectionInstruction &insn);
> void emitUnSpillRegInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index 1b550ac..09cb2ba 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -615,6 +615,21 @@ union GenNativeInstruction
> uint32_t end_of_thread:1;
> } sampler_gen7;
>
> + struct {
> + uint32_t bti:8;
> + uint32_t vme_search_path_lut:3;
> + uint32_t lut_sub:2;
> + uint32_t msg_type:2;
> + uint32_t stream_in:1;
> + uint32_t stream_out:1;
> + uint32_t reserved_mbz:2;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } vme_gen7;
> +
> /**
> * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> *
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 2cc51cc..be38cef 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -1191,6 +1191,50 @@ namespace gbe
> simd_mode, return_format);
> }
>
> + void GenEncoder::setVmeMessage(GenNativeInstruction *insn,
> + unsigned char bti,
> + uint32_t response_length,
> + uint32_t msg_length,
> + uint32_t msg_type,
> + unsigned char vme_search_path_lut,
> + unsigned char lut_sub)
> + {
> + const GenMessageTarget sfid = GEN_SFID_VIDEO_MOTION_EST;
> + setMessageDescriptor(insn, sfid, msg_length, response_length, true);
> + insn->bits3.vme_gen7.bti = bti;
> + insn->bits3.vme_gen7.vme_search_path_lut = vme_search_path_lut;
> + insn->bits3.vme_gen7.lut_sub = lut_sub;
> + insn->bits3.vme_gen7.msg_type = msg_type;
> + insn->bits3.vme_gen7.stream_in = 0;
> + insn->bits3.vme_gen7.stream_out = 0;
> + insn->bits3.vme_gen7.reserved_mbz = 0;
> +
> + }
> +
> + void GenEncoder::VME(unsigned char bti,
> + GenRegister dest,
> + GenRegister msg,
> + uint32_t msg_type,
> + uint32_t vme_search_path_lut,
> + uint32_t lut_sub)
> + {
> + /* Currectly we just support inter search only, we will support other
> + * modes in future.
> + */
> + GBE_ASSERT(msg_type == 1);
> + uint32_t msg_length, response_length;
> + if(msg_type == 1){
> + msg_length = 5;
> + response_length = 6;
> + }
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + this->setHeader(insn);
> + this->setDst(insn, dest);
> + this->setSrc0(insn, msg);
> + setVmeMessage(insn, bti, response_length, msg_length,
> + msg_type, vme_search_path_lut, lut_sub);
> + }
> +
> void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present,
> unsigned char bti)
> {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index f2bb5ab..6df7087 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -203,6 +203,19 @@ namespace gbe
> bool header_present,
> uint32_t simd_mode,
> uint32_t return_format);
> + virtual void VME(unsigned char bti,
> + GenRegister dest,
> + GenRegister msg,
> + uint32_t msg_type,
> + uint32_t vme_search_path_lut,
> + uint32_t lut_sub);
> + void setVmeMessage(GenNativeInstruction *insn,
> + unsigned char bti,
> + uint32_t response_length,
> + uint32_t msg_length,
> + uint32_t msg_type,
> + unsigned char vme_search_path_lut,
> + unsigned char lut_sub);
>
> /*! TypedWrite instruction for texture */
> virtual void TYPED_WRITE(GenRegister header,
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 9b60c17..878e0e7 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -36,6 +36,7 @@ DECL_GEN7_SCHEDULE(UnpackByte, 40, 1, 1)
> DECL_GEN7_SCHEDULE(PackLong, 40, 1, 1)
> DECL_GEN7_SCHEDULE(UnpackLong, 40, 1, 1)
> DECL_GEN7_SCHEDULE(Sample, 160, 1, 1)
> +DECL_GEN7_SCHEDULE(Vme, 320, 1, 1)
> DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1)
> DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1)
> DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 2452aea..cfaa792 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -187,6 +187,7 @@ namespace gbe
> this->opcode == SEL_OP_ATOMIC ||
> this->opcode == SEL_OP_BYTE_GATHER ||
> this->opcode == SEL_OP_SAMPLE ||
> + this->opcode == SEL_OP_VME ||
> this->opcode == SEL_OP_DWORD_GATHER;
> }
>
> @@ -661,6 +662,8 @@ namespace gbe
> void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
> /*! Encode sample instructions */
> void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads,
> uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform);
> + /*! Encode vme instructions */
> + void VME(uint32_t bti, GenRegister *dst, GenRegister *payloadVal, uint32_t
> dstNum, uint32_t srcNum, uint32_t msg_type, uint32_t vme_search_path_lut,
> uint32_t lut_sub);
> /*! Encode typed write instructions */
> void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool
> is3D);
> /*! Get image information */
> @@ -2120,6 +2123,34 @@ namespace gbe
> insn->extra.isUniform = isUniform;
> }
>
> + void Selection::Opaque::VME(uint32_t bti, GenRegister *dst, GenRegister
> *payloadVal,
> + uint32_t dstNum, uint32_t srcNum, uint32_t msg_type,
> + uint32_t vme_search_path_lut, uint32_t lut_sub) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_VME, dstNum,
> srcNum);
> + SelectionVector *dstVector = this->appendVector();
> + SelectionVector *msgVector = this->appendVector();
> +
> + for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
> + insn->dst(elemID) = dst[elemID];
> + for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
> + insn->src(elemID) = payloadVal[elemID];
> +
> + dstVector->regNum = dstNum;
> + dstVector->isSrc = 0;
> + dstVector->offsetID = 0;
> + dstVector->reg = &insn->dst(0);
> +
> + msgVector->regNum = srcNum;
> + msgVector->isSrc = 1;
> + msgVector->offsetID = 0;
> + msgVector->reg = &insn->src(0);
> +
> + insn->setbti(bti);
> + insn->extra.msg_type = msg_type;
> + insn->extra.vme_search_path_lut = vme_search_path_lut;
> + insn->extra.lut_sub = lut_sub;
> + }
> +
> ///////////////////////////////////////////////////////////////////////////
> // Code selection public implementation
> ///////////////////////////////////////////////////////////////////////////
> @@ -5126,6 +5157,47 @@ namespace gbe
> DECL_CTOR(SampleInstruction, 1, 1);
> };
>
> + DECL_PATTERN(VmeInstruction)
> + {
> + INLINE bool emitOne(Selection::Opaque &sel, const ir::VmeInstruction &insn,
> bool &markChildren) const
> + {
> + using namespace ir;
> + uint32_t msg_type, vme_search_path_lut, lut_sub;
> + msg_type = insn.getMsgType();
> + vme_search_path_lut = 0;
> + lut_sub = 0;
> + GBE_ASSERT(msg_type == 1);
> + uint32_t payloadLen = 0;
> + //We allocate 5 virtual payload grfs to selection dst register.
> + if(msg_type == 1){
> + payloadLen = 5;
> + }
> + uint32_t selDstNum = insn.getDstNum() + payloadLen;
> + uint32_t srcNum = insn.getSrcNum();
> + vector<GenRegister> dst(selDstNum);
> + vector<GenRegister> payloadVal(srcNum);
> + uint32_t valueID = 0;
> + for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
> + dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
> + for (valueID = insn.getDstNum(); valueID < selDstNum; ++valueID)
> + dst[valueID] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> +
> + for (valueID = 0; valueID < srcNum; ++valueID)
> + payloadVal[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
> +
> + uint32_t bti = insn.getImageIndex();
> + if (bti > BTI_MAX_ID) {
> + std::cerr << "Too large bti " << bti;
> + return false;
> + }
> +
> + sel.VME(bti, dst.data(), payloadVal.data(), selDstNum, srcNum, msg_type,
> vme_search_path_lut, lut_sub);
> +
> + return true;
> + }
> + DECL_CTOR(VmeInstruction, 1, 1);
> + };
> +
> /*! Typed write instruction pattern. */
> DECL_PATTERN(TypedWriteInstruction)
> {
> @@ -5591,6 +5663,7 @@ namespace gbe
> this->insert<MulAddInstructionPattern>();
> this->insert<SelectModifierInstructionPattern>();
> this->insert<SampleInstructionPattern>();
> + this->insert<VmeInstructionPattern>();
> this->insert<GetImageInfoInstructionPattern>();
> this->insert<ReadARFInstructionPattern>();
> this->insert<RegionInstructionPattern>();
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index f51c905..578db41 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -90,8 +90,8 @@ namespace gbe
> const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
> /*! Damn C++ */
> const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
> - /*! No more than 9 sources (used by typed writes on simd8 mode.) */
> - enum { MAX_SRC_NUM = 9 };
> + /*! No more than 40 sources (40 sources are used by vme for payload
> passing and setting) */
> + enum { MAX_SRC_NUM = 40 };
> /*! No more than 16 destinations (15 used by I64DIV/I64REM) */
> enum { MAX_DST_NUM = 16 };
> /*! State of the instruction (extra fields neeed for the encoding) */
> @@ -129,6 +129,12 @@ namespace gbe
> bool isLD; // is this a ld message?
> bool isUniform;
> };
> + struct {
> + uint16_t vme_bti:8;
> + uint16_t msg_type:2;
> + uint16_t vme_search_path_lut:3;
> + uint16_t lut_sub:2;
> + };
> uint32_t barrierType;
> bool longjmp;
> uint32_t indirect_offset;
> @@ -138,7 +144,7 @@ namespace gbe
> /*! Number of destinations */
> uint8_t dstNum:5;
> /*! Number of sources */
> - uint8_t srcNum:4;
> + uint8_t srcNum:6;
> /*! To store various indices */
> uint32_t index;
> /*! For BRC/IF to store the UIP */
> @@ -152,6 +158,7 @@ namespace gbe
> switch (opcode) {
> case SEL_OP_DWORD_GATHER: return extra.function;
> case SEL_OP_SAMPLE: return extra.rdbti;
> + case SEL_OP_VME: return extra.vme_bti;
> case SEL_OP_TYPED_WRITE: return extra.bti;
> default:
> GBE_ASSERT(0);
> @@ -164,6 +171,7 @@ namespace gbe
> switch (opcode) {
> case SEL_OP_DWORD_GATHER: extra.function = bti; return;
> case SEL_OP_SAMPLE: extra.rdbti = bti; return;
> + case SEL_OP_VME: extra.vme_bti = bti; return;
> case SEL_OP_TYPED_WRITE: extra.bti = bti; return;
> default:
> GBE_ASSERT(0);
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 479398b..4d3e921 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -65,6 +65,7 @@ DECL_SELECTION_IR(UNPACK_BYTE,
> UnpackByteInstruction)
> DECL_SELECTION_IR(PACK_LONG, PackLongInstruction)
> DECL_SELECTION_IR(UNPACK_LONG, UnpackLongInstruction)
> DECL_SELECTION_IR(SAMPLE, SampleInstruction)
> +DECL_SELECTION_IR(VME, VmeInstruction)
> DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
> DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
> DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index f93c528..7bf787e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -595,6 +595,58 @@ namespace ir {
> static const uint32_t dstNum = 4;
> };
>
> + class ALIGNED_INSTRUCTION VmeInstruction :
> + public BasePolicy,
> + public TupleSrcPolicy<VmeInstruction>,
> + public TupleDstPolicy<VmeInstruction>
> + {
> + public:
> + VmeInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple,
> + uint32_t dstNum, uint32_t srcNum, int msg_type,
> + int vme_search_path_lut, int lut_sub) {
> + this->opcode = OP_VME;
> + this->dst = dstTuple;
> + this->src = srcTuple;
> + this->dstNum = dstNum;
> + this->srcNum = srcNum;
> + this->imageIdx = imageIdx;
> + this->msg_type = msg_type;
> + this->vme_search_path_lut = vme_search_path_lut;
> + this->lut_sub = lut_sub;
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const {
> + this->outOpcode(out);
> + out << " src_surface id " << (int)this->getImageIndex()
> + << " ref_surface id " << (int)this->getImageIndex() + 1;
> + for(uint32_t i = 0; i < dstNum; i++){
> + out<< " %" << this->getDst(fn, i);
> + }
> + for(uint32_t i = 0; i < srcNum; i++){
> + out<< " %" << this->getSrc(fn, i);
> + }
> + out
> + << " msg_type " << (int)this->getMsgType()
> + << " vme_search_path_lut " << (int)this->vme_search_path_lut
> + << " lut_sub " << (int)this->lut_sub;
> + }
> + Tuple src;
> + Tuple dst;
> +
> + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
> + INLINE uint8_t getMsgType(void) const { return this->msg_type; }
> +
> + INLINE Type getSrcType(void) const { return TYPE_U32; }
> + INLINE Type getDstType(void) const { return TYPE_U32; }
> + uint8_t imageIdx;
> + uint8_t msg_type;
> + uint8_t vme_search_path_lut;
> + uint8_t lut_sub;
> + uint32_t srcNum;
> + uint32_t dstNum;
> + };
> +
> +
> class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO
> public BasePolicy,
> public TupleSrcPolicy<TypedWriteInstruction>,
> @@ -1111,6 +1163,8 @@ namespace ir {
> // TODO
> INLINE bool SampleInstruction::wellFormed(const Function &fn, std::string
> &why) const
> { return true; }
> + INLINE bool VmeInstruction::wellFormed(const Function &fn, std::string
> &why) const
> + { return true; }
> INLINE bool TypedWriteInstruction::wellFormed(const Function &fn,
> std::string &why) const
> { return true; }
> INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn,
> std::string &why) const
> @@ -1502,6 +1556,10 @@ START_INTROSPECTION(LabelInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(LabelInstruction)
>
> +START_INTROSPECTION(VmeInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(VmeInstruction)
> +
> #undef END_INTROSPECTION
> #undef START_INTROSPECTION
> #undef DECL_INSN
> @@ -1694,6 +1752,10 @@ DECL_MEM_FN(SampleInstruction, Type,
> getDstType(void), getDstType())
> DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void),
> getSamplerIndex())
> DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerOffset(void),
> getSamplerOffset())
> DECL_MEM_FN(SampleInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(VmeInstruction, Type, getSrcType(void), getSrcType())
> +DECL_MEM_FN(VmeInstruction, Type, getDstType(void), getDstType())
> +DECL_MEM_FN(VmeInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(VmeInstruction, uint8_t, getMsgType(void), getMsgType())
> DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType())
> DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void),
> getCoordType())
> DECL_MEM_FN(TypedWriteInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> @@ -1932,6 +1994,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
> return internal::SampleInstruction(imageIndex, dst, src, srcNum, dstIsFloat,
> srcIsFloat, sampler, samplerOffset).convert();
> }
>
> + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum,
> uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub) {
> + return internal::VmeInstruction(imageIndex, dst, src, dstNum, srcNum,
> msg_type, vme_search_path_lut, lut_sub).convert();
> + }
> +
> Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type
> srcType, Type coordType) {
> return internal::TypedWriteInstruction(imageIndex, src, srcNum, srcType,
> coordType).convert();
> }
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 3f3c655..c8da416 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -191,8 +191,8 @@ namespace ir {
> template <typename T> INLINE bool isMemberOf(void) const {
> return T::isClassOf(*this);
> }
> - /*! max_src for store instruction (vec16 + addr) */
> - static const uint32_t MAX_SRC_NUM = 32;
> + /*! max_src used by vme for payload passing and setting */
> + static const uint32_t MAX_SRC_NUM = 40;
> static const uint32_t MAX_DST_NUM = 32;
> protected:
> BasicBlock *parent; //!< The basic block containing the instruction
> @@ -399,6 +399,17 @@ namespace ir {
> static bool isClassOf(const Instruction &insn);
> };
>
> + /*! Video motion estimation */
> + class VmeInstruction : public Instruction {
> + public:
> + uint8_t getImageIndex() const;
> + uint8_t getMsgType() const;
> + Type getSrcType(void) const;
> + Type getDstType(void) const;
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn);
> + };
> +
> typedef union _ImageInfoKey{
> _ImageInfoKey(uint8_t i, uint8_t t) : index(i), type(t) {};
> _ImageInfoKey(int key) : data(key) {};
> @@ -756,6 +767,8 @@ namespace ir {
> Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type
> srcType, Type coordType);
> /*! sample textures */
> Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, uint8_t srcNum,
> bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset);
> + /*! video motion estimation */
> + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum,
> uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub);
> /*! get image information , such as width/height/depth/... */
> Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex,
> Register infoReg);
> /*! label labelIndex */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index 81548c9..27d59a9 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction)
> DECL_INSN(LABEL, LabelInstruction)
> DECL_INSN(READ_ARF, ReadARFInstruction)
> DECL_INSN(REGION, RegionInstruction)
> +DECL_INSN(VME, VmeInstruction)
> DECL_INSN(INDIRECT_MOV, IndirectMovInstruction)
> DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
> DECL_INSN(MUL_HI, BinaryInstruction)
> diff --git a/backend/src/libocl/include/ocl_misc.h
> b/backend/src/libocl/include/ocl_misc.h
> index 359025b..7d4abab 100644
> --- a/backend/src/libocl/include/ocl_misc.h
> +++ b/backend/src/libocl/include/ocl_misc.h
> @@ -136,5 +136,20 @@ struct time_stamp {
> uint event;
> };
>
> +uint __gen_ocl_region(ushort offset, uint data);
> +
> struct time_stamp __gen_ocl_get_timestamp(void);
> +
> +uint8 __gen_ocl_vme(image2d_t, image2d_t,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + int, int, int);
> #endif
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 7299d53..19927ba 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3541,6 +3541,7 @@ namespace gbe
> case GEN_OCL_REGION:
> case GEN_OCL_SIMD_ID:
> case GEN_OCL_SIMD_SHUFFLE:
> + case GEN_OCL_VME:
> this->newRegister(&I);
> break;
> case GEN_OCL_PRINTF:
> @@ -3839,6 +3840,52 @@ namespace gbe
> ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
> break;
> }
> + case GEN_OCL_VME:
> + {
> +
> + const uint8_t imageID = getImageID(I);
> +
> + AI++;
> + AI++;
> +
> + uint32_t src_length = 40;
> +
> + vector<ir::Register> dstTupleData, srcTupleData;
> + for (uint32_t i = 0; i < src_length; i++, AI++){
> + srcTupleData.push_back(this->getRegister(*AI));
> + }
> +
> + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], src_length);
> +
> + Constant *msg_type_cpv = dyn_cast<Constant>(*AI);
> + assert(msg_type_cpv);
> + const ir::Immediate &msg_type_x =
> processConstantImm(msg_type_cpv);
> + int msg_type = msg_type_x.getIntegerValue();
> + uint32_t dst_length;
> + //msy_type =1 indicate inter search only of gen vme shared function
> + GBE_ASSERT(msg_type == 1);
> + if(msg_type == 1)
> + dst_length = 6;
> + for (uint32_t elemID = 0; elemID < dst_length; ++elemID) {
> + const ir::Register reg = this->getRegister(&I, elemID);
> + dstTupleData.push_back(reg);
> + }
> + const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], dst_length);
> + ++AI;
> + Constant *vme_search_path_lut_cpv = dyn_cast<Constant>(*AI);
> + assert(vme_search_path_lut_cpv);
> + const ir::Immediate &vme_search_path_lut_x =
> processConstantImm(vme_search_path_lut_cpv);
> + ++AI;
> + Constant *lut_sub_cpv = dyn_cast<Constant>(*AI);
> + assert(lut_sub_cpv);
> + const ir::Immediate &lut_sub_x = processConstantImm(lut_sub_cpv);
> +
> + ctx.VME(imageID, dstTuple, srcTuple, dst_length, src_length,
> + msg_type, vme_search_path_lut_x.getIntegerValue(),
> + lut_sub_x.getIntegerValue());
> +
> + break;
> + }
> case GEN_OCL_REGION:
> {
> const ir::Register dst = this->getRegister(&I);
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index cabb225..3fbf847 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -168,5 +168,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE,
> intel_sub_group_shuffle)
> DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
> DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
>
> +DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme)
> +
> // printf function
> DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index 7ee5259..dc1d8ab 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -671,6 +671,10 @@ namespace gbe {
> *CI = InsertToVector(call, *CI);
> break;
> }
> + case GEN_OCL_VME:
> + setAppendPoint(call);
> + extractFromVector(call);
> + break;
> }
> }
> }
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list