[Beignet] [PATCH v3 1/4] Add built-in function __gen_ocl_vme.
Song, Ruiling
ruiling.song at intel.com
Wed Sep 23 19:53:28 PDT 2015
This version patchset LGTM
Thanks!
Ruiling
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Chuanbo Weng
> Sent: Tuesday, September 22, 2015 8:55 PM
> To: beignet at lists.freedesktop.org
> Cc: Weng, Chuanbo
> Subject: [Beignet] [PATCH v3 1/4] Add built-in function __gen_ocl_vme.
>
> __gen_ocl_vme is used for hardware accelerated video motion estimation.
> It gets payload values as parameters and uses MOV to pass these payload values
> to VME SEND Message's payload grfs. The int8 return value is used to store
> SEND Message writeback.
>
> v2:
> Remove unnecessary 5 parameters(src_grf*) of built-in function(we just need to
> allocate related registers in gen_insn_selection step).
>
> v3:
> Remove redundant code and change MAX_SRC_NUM to 40.
>
> Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 14 +++++
> backend/src/backend/gen7_instruction.hpp | 15 +++++
> backend/src/backend/gen_context.cpp | 68 ++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 1 +
> backend/src/backend/gen_defs.hpp | 19 ++++++
> backend/src/backend/gen_encoder.cpp | 44 +++++++++++++
> backend/src/backend/gen_encoder.hpp | 13 ++++
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 73
> ++++++++++++++++++++++
> backend/src/backend/gen_insn_selection.hpp | 14 ++++-
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/ir/instruction.cpp | 66 +++++++++++++++++++
> backend/src/ir/instruction.hpp | 17 ++++-
> backend/src/ir/instruction.hxx | 1 +
> backend/src/libocl/include/ocl_misc.h | 15 +++++
> backend/src/llvm/llvm_gen_backend.cpp | 41 ++++++++++++
> backend/src/llvm/llvm_gen_ocl_function.hxx | 2 +
> backend/src/llvm/llvm_scalarize.cpp | 4 ++
> 18 files changed, 404 insertions(+), 5 deletions(-)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index 5220233..baed06d 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -455,6 +455,13 @@ static int column;
>
> static int gen_version;
>
> +#define GEN7_BITS_FIELD(inst, gen7) \
> + ({ \
> + int bits; \
> + bits = ((const union Gen7NativeInstruction *)inst)->gen7; \
> + bits; \
> + })
> +
> #define GEN_BITS_FIELD(inst, gen) \
> ({ \
> int bits; \
> @@ -509,6 +516,8 @@ static int gen_version;
> #define EXECUTION_SIZE(inst) GEN_BITS_FIELD(inst, header.execution_size)
> #define BRANCH_JIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.jip,
> bits3.gen8_branch.jip/8)
> #define BRANCH_UIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.uip,
> bits2.gen8_branch.uip/8)
> +#define VME_BTI(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti)
> +#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst,
> bits3.vme_gen7.msg_type)
> #define SAMPLE_BTI(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti)
> #define SAMPLER(inst) GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.sampler)
> #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.msg_type)
> @@ -1333,6 +1342,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t
> deviceID, uint32_t compac
>
> if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, bits2.da1.src1_reg_file) ==
> GEN_IMMEDIATE_VALUE) {
> switch (target) {
> + case GEN_SFID_VIDEO_MOTION_EST:
> + format(file, " (bti: %d, msg_type: %d)",
> + VME_BTI(inst),
> + VME_MSG_TYPE(inst));
> + break;
> case GEN_SFID_SAMPLER:
> format(file, " (%d, %d, %d, %d)",
> SAMPLE_BTI(inst),
> diff --git a/backend/src/backend/gen7_instruction.hpp
> b/backend/src/backend/gen7_instruction.hpp
> index 51f342b..258dd24 100644
> --- a/backend/src/backend/gen7_instruction.hpp
> +++ b/backend/src/backend/gen7_instruction.hpp
> @@ -350,6 +350,21 @@ union Gen7NativeInstruction
> uint32_t end_of_thread:1;
> } sampler_gen7;
>
> + struct {
> + uint32_t bti:8;
> + uint32_t vme_search_path_lut:3;
> + uint32_t lut_sub:2;
> + uint32_t msg_type:2;
> + uint32_t stream_in:1;
> + uint32_t stream_out:1;
> + uint32_t reserved_mbz:2;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } vme_gen7;
> +
> /**
> * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> *
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 32d00e2..ccabc99 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2259,6 +2259,74 @@ namespace gbe
> p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0,
> insn.extra.isLD, insn.extra.isUniform);
> }
>
> + void GenContext::emitVmeInstruction(const SelectionInstruction &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> +
> + /* Use MOV to Setup bits of payload */
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + for(int i=0; i < 2; i++){
> + for(int k = 0; k < 2; k++){
> + GenRegister payload_grf = ra->genReg(insn.dst(8+i));
> + payload_grf.nr += k;
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + }
> + {
> + int i = 2;
> + GenRegister payload_grf = ra->genReg(insn.dst(8+i));
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*16+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + p->pop();
> +
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + GenRegister payload_did = GenRegister::retype(ra->genReg(insn.dst(8)),
> GEN_TYPE_UB);
> + payload_did.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_did.width = GEN_WIDTH_1;
> + payload_did.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_did.subphysical = 1;
> + payload_did.subnr = 20 * typeSize(GEN_TYPE_UB);
> + GenRegister grf0 = GenRegister::ub1grf(0, 20);
> + p->MOV(payload_did, grf0);
> + p->pop();
> +
> + const GenRegister msgPayload = ra->genReg(insn.dst(8));
> + const unsigned char bti = insn.getbti();
> + const unsigned int msg_type = insn.extra.msg_type;
> + const unsigned int vme_search_path_lut = insn.extra.vme_search_path_lut;
> + const unsigned int lut_sub = insn.extra.lut_sub;
> + p->VME(bti, dst, msgPayload, msg_type, vme_search_path_lut,
> + lut_sub); }
> +
> void GenContext::scratchWrite(const GenRegister header, uint32_t offset,
> uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
> p->push();
> uint32_t simdWidth = p->curr.execWidth; diff --git
> a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 34f9293..65a577d 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -167,6 +167,7 @@ namespace gbe
> virtual void emitUnpackLongInstruction(const SelectionInstruction &insn);
> void emitDWordGatherInstruction(const SelectionInstruction &insn);
> void emitSampleInstruction(const SelectionInstruction &insn);
> + void emitVmeInstruction(const SelectionInstruction &insn);
> void emitTypedWriteInstruction(const SelectionInstruction &insn);
> void emitSpillRegInstruction(const SelectionInstruction &insn);
> void emitUnSpillRegInstruction(const SelectionInstruction &insn); diff --git
> a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index 1ca148c..895ee4e 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -422,6 +422,10 @@ enum GenMessageTarget {
> #define GEN_SAMPLER_SIMD_MODE_SIMD16 2
> #define GEN_SAMPLER_SIMD_MODE_SIMD32_64 3
>
> +#define VME_INTER_SEARCH_ONLY 1
> +#define VME_INTRA_SEARCH_ONLY 2
> +#define VME_INTER_AND_INTRA_SEARCH 3
> +
> #define GEN_MATH_FUNCTION_INV 1
> #define GEN_MATH_FUNCTION_LOG 2
> #define GEN_MATH_FUNCTION_EXP 3
> @@ -602,6 +606,21 @@ union GenNativeInstruction
> uint32_t end_of_thread:1;
> } sampler_gen7;
>
> + struct {
> + uint32_t bti:8;
> + uint32_t vme_search_path_lut:3;
> + uint32_t lut_sub:2;
> + uint32_t msg_type:2;
> + uint32_t stream_in:1;
> + uint32_t stream_out:1;
> + uint32_t reserved_mbz:2;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } vme_gen7;
> +
> /**
> * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> *
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index cac29e8..6b6e9cd 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -1231,6 +1231,50 @@ namespace gbe
> simd_mode, return_format);
> }
>
> + void GenEncoder::setVmeMessage(GenNativeInstruction *insn,
> + unsigned char bti,
> + uint32_t response_length,
> + uint32_t msg_length,
> + uint32_t msg_type,
> + unsigned char vme_search_path_lut,
> + unsigned char lut_sub) {
> + const GenMessageTarget sfid = GEN_SFID_VIDEO_MOTION_EST;
> + setMessageDescriptor(insn, sfid, msg_length, response_length, true);
> + insn->bits3.vme_gen7.bti = bti;
> + insn->bits3.vme_gen7.vme_search_path_lut = vme_search_path_lut;
> + insn->bits3.vme_gen7.lut_sub = lut_sub;
> + insn->bits3.vme_gen7.msg_type = msg_type;
> + insn->bits3.vme_gen7.stream_in = 0;
> + insn->bits3.vme_gen7.stream_out = 0;
> + insn->bits3.vme_gen7.reserved_mbz = 0;
> +
> + }
> +
> + void GenEncoder::VME(unsigned char bti,
> + GenRegister dest,
> + GenRegister msg,
> + uint32_t msg_type,
> + uint32_t vme_search_path_lut,
> + uint32_t lut_sub) {
> + /* Currectly we just support VME_INTER_SEARCH_ONLY, we will support
> other
> + * modes in future.
> + */
> + GBE_ASSERT(msg_type == VME_INTER_SEARCH_ONLY);
> + uint32_t msg_length, response_length;
> + if(msg_type == VME_INTER_SEARCH_ONLY){
> + msg_length = 5;
> + response_length = 6;
> + }
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + this->setHeader(insn);
> + this->setDst(insn, dest);
> + this->setSrc0(insn, msg);
> + setVmeMessage(insn, bti, response_length, msg_length,
> + msg_type, vme_search_path_lut, lut_sub); }
> +
> void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present,
> unsigned char bti)
> {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); diff --git
> a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index 79e7b6e..e991b99 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -205,6 +205,19 @@ namespace gbe
> bool header_present,
> uint32_t simd_mode,
> uint32_t return_format);
> + virtual void VME(unsigned char bti,
> + GenRegister dest,
> + GenRegister msg,
> + uint32_t msg_type,
> + uint32_t vme_search_path_lut,
> + uint32_t lut_sub);
> + void setVmeMessage(GenNativeInstruction *insn,
> + unsigned char bti,
> + uint32_t response_length,
> + uint32_t msg_length,
> + uint32_t msg_type,
> + unsigned char vme_search_path_lut,
> + unsigned char lut_sub);
>
> /*! TypedWrite instruction for texture */
> virtual void TYPED_WRITE(GenRegister header, diff --git
> a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index d073770..0ecc95f 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -36,6 +36,7 @@ DECL_GEN7_SCHEDULE(UnpackByte, 40, 1, 1)
> DECL_GEN7_SCHEDULE(PackLong, 40, 1, 1)
> DECL_GEN7_SCHEDULE(UnpackLong, 40, 1, 1)
> DECL_GEN7_SCHEDULE(Sample, 160, 1, 1)
> +DECL_GEN7_SCHEDULE(Vme, 320, 1, 1)
> DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1)
> DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1)
> DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index ab00269..7073f93 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -187,6 +187,7 @@ namespace gbe
> this->opcode == SEL_OP_ATOMIC ||
> this->opcode == SEL_OP_BYTE_GATHER ||
> this->opcode == SEL_OP_SAMPLE ||
> + this->opcode == SEL_OP_VME ||
> this->opcode == SEL_OP_DWORD_GATHER;
> }
>
> @@ -659,6 +660,8 @@ namespace gbe
> void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
> /*! Encode sample instructions */
> void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads,
> uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform);
> + /*! Encode vme instructions */
> + void VME(uint32_t bti, GenRegister *dst, GenRegister *payloadVal,
> + uint32_t dstNum, uint32_t srcNum, uint32_t msg_type, uint32_t
> + vme_search_path_lut, uint32_t lut_sub);
> /*! Encode typed write instructions */
> void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool
> is3D);
> /*! Get image information */
> @@ -2059,6 +2062,34 @@ namespace gbe
> insn->extra.isUniform = isUniform;
> }
>
> + void Selection::Opaque::VME(uint32_t bti, GenRegister *dst, GenRegister
> *payloadVal,
> + uint32_t dstNum, uint32_t srcNum, uint32_t msg_type,
> + uint32_t vme_search_path_lut, uint32_t lut_sub) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_VME, dstNum,
> srcNum);
> + SelectionVector *dstVector = this->appendVector();
> + SelectionVector *msgVector = this->appendVector();
> +
> + for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
> + insn->dst(elemID) = dst[elemID];
> + for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
> + insn->src(elemID) = payloadVal[elemID];
> +
> + dstVector->regNum = dstNum;
> + dstVector->isSrc = 0;
> + dstVector->offsetID = 0;
> + dstVector->reg = &insn->dst(0);
> +
> + msgVector->regNum = srcNum;
> + msgVector->isSrc = 1;
> + msgVector->offsetID = 0;
> + msgVector->reg = &insn->src(0);
> +
> + insn->setbti(bti);
> + insn->extra.msg_type = msg_type;
> + insn->extra.vme_search_path_lut = vme_search_path_lut;
> + insn->extra.lut_sub = lut_sub;
> + }
> +
> ///////////////////////////////////////////////////////////////////////////
> // Code selection public implementation
> ///////////////////////////////////////////////////////////////////////////
> @@ -4859,6 +4890,47 @@ namespace gbe
> DECL_CTOR(SampleInstruction, 1, 1);
> };
>
> + DECL_PATTERN(VmeInstruction)
> + {
> + INLINE bool emitOne(Selection::Opaque &sel, const ir::VmeInstruction &insn,
> bool &markChildren) const
> + {
> + using namespace ir;
> + uint32_t msg_type, vme_search_path_lut, lut_sub;
> + msg_type = insn.getMsgType();
> + vme_search_path_lut = 0;
> + lut_sub = 0;
> + GBE_ASSERT(msg_type == VME_INTER_SEARCH_ONLY);
> + uint32_t payloadLen = 0;
> + if(msg_type == VME_INTER_SEARCH_ONLY){
> + payloadLen = 5;
> + }
> + //vector<GenRegister> dst(insn.getDstNum());
> + uint32_t selDstNum = insn.getDstNum() + payloadLen;
> + uint32_t srcNum = insn.getSrcNum();
> + vector<GenRegister> dst(selDstNum);
> + vector<GenRegister> payloadVal(srcNum);
> + uint32_t valueID = 0;
> + for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
> + dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
> + for (valueID = insn.getDstNum(); valueID < selDstNum; ++valueID)
> + dst[valueID] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> +
> + for (valueID = 0; valueID < srcNum; ++valueID)
> + payloadVal[valueID] = sel.selReg(insn.getSrc(valueID),
> + insn.getSrcType());
> +
> + uint32_t bti = insn.getImageIndex();
> + if (bti > BTI_MAX_ID) {
> + std::cerr << "Too large bti " << bti;
> + return false;
> + }
> +
> + sel.VME(bti, dst.data(), payloadVal.data(), selDstNum, srcNum,
> + msg_type, vme_search_path_lut, lut_sub);
> +
> + return true;
> + }
> + DECL_CTOR(VmeInstruction, 1, 1);
> + };
> +
> /*! Typed write instruction pattern. */
> DECL_PATTERN(TypedWriteInstruction)
> {
> @@ -5317,6 +5389,7 @@ namespace gbe
> this->insert<MulAddInstructionPattern>();
> this->insert<SelectModifierInstructionPattern>();
> this->insert<SampleInstructionPattern>();
> + this->insert<VmeInstructionPattern>();
> this->insert<GetImageInfoInstructionPattern>();
> this->insert<ReadARFInstructionPattern>();
> this->insert<RegionInstructionPattern>();
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index ffc79e1..2172c01 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -90,8 +90,8 @@ namespace gbe
> const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
> /*! Damn C++ */
> const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
> - /*! No more than 9 sources (used by typed writes on simd8 mode.) */
> - enum { MAX_SRC_NUM = 9 };
> + /*! No more than 40 sources (40 sources are used by vme for payload
> passing and setting) */
> + enum { MAX_SRC_NUM = 40 };
> /*! No more than 16 destinations (15 used by I64DIV/I64REM) */
> enum { MAX_DST_NUM = 16 };
> /*! State of the instruction (extra fields neeed for the encoding) */ @@ -
> 129,6 +129,12 @@ namespace gbe
> bool isLD; // is this a ld message?
> bool isUniform;
> };
> + struct {
> + uint16_t vme_bti:8;
> + uint16_t msg_type:2;
> + uint16_t vme_search_path_lut:3;
> + uint16_t lut_sub:2;
> + };
> uint32_t barrierType;
> bool longjmp;
> uint32_t indirect_offset;
> @@ -138,7 +144,7 @@ namespace gbe
> /*! Number of destinations */
> uint8_t dstNum:5;
> /*! Number of sources */
> - uint8_t srcNum:4;
> + uint8_t srcNum:6;
> /*! To store various indices */
> uint32_t index;
> /*! For BRC/IF to store the UIP */
> @@ -152,6 +158,7 @@ namespace gbe
> switch (opcode) {
> case SEL_OP_DWORD_GATHER: return extra.function;
> case SEL_OP_SAMPLE: return extra.rdbti;
> + case SEL_OP_VME: return extra.vme_bti;
> case SEL_OP_TYPED_WRITE: return extra.bti;
> default:
> GBE_ASSERT(0);
> @@ -164,6 +171,7 @@ namespace gbe
> switch (opcode) {
> case SEL_OP_DWORD_GATHER: extra.function = bti; return;
> case SEL_OP_SAMPLE: extra.rdbti = bti; return;
> + case SEL_OP_VME: extra.vme_bti = bti; return;
> case SEL_OP_TYPED_WRITE: extra.bti = bti; return;
> default:
> GBE_ASSERT(0);
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index adbb137..58921b2 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -65,6 +65,7 @@ DECL_SELECTION_IR(UNPACK_BYTE,
> UnpackByteInstruction) DECL_SELECTION_IR(PACK_LONG, PackLongInstruction)
> DECL_SELECTION_IR(UNPACK_LONG, UnpackLongInstruction)
> DECL_SELECTION_IR(SAMPLE, SampleInstruction)
> +DECL_SELECTION_IR(VME, VmeInstruction)
> DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
> DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
> DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction) diff --git
> a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index
> f93c528..7bf787e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -595,6 +595,58 @@ namespace ir {
> static const uint32_t dstNum = 4;
> };
>
> + class ALIGNED_INSTRUCTION VmeInstruction :
> + public BasePolicy,
> + public TupleSrcPolicy<VmeInstruction>,
> + public TupleDstPolicy<VmeInstruction>
> + {
> + public:
> + VmeInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple,
> + uint32_t dstNum, uint32_t srcNum, int msg_type,
> + int vme_search_path_lut, int lut_sub) {
> + this->opcode = OP_VME;
> + this->dst = dstTuple;
> + this->src = srcTuple;
> + this->dstNum = dstNum;
> + this->srcNum = srcNum;
> + this->imageIdx = imageIdx;
> + this->msg_type = msg_type;
> + this->vme_search_path_lut = vme_search_path_lut;
> + this->lut_sub = lut_sub;
> + }
> + INLINE bool wellFormed(const Function &fn, std::string &why) const;
> + INLINE void out(std::ostream &out, const Function &fn) const {
> + this->outOpcode(out);
> + out << " src_surface id " << (int)this->getImageIndex()
> + << " ref_surface id " << (int)this->getImageIndex() + 1;
> + for(uint32_t i = 0; i < dstNum; i++){
> + out<< " %" << this->getDst(fn, i);
> + }
> + for(uint32_t i = 0; i < srcNum; i++){
> + out<< " %" << this->getSrc(fn, i);
> + }
> + out
> + << " msg_type " << (int)this->getMsgType()
> + << " vme_search_path_lut " << (int)this->vme_search_path_lut
> + << " lut_sub " << (int)this->lut_sub;
> + }
> + Tuple src;
> + Tuple dst;
> +
> + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
> + INLINE uint8_t getMsgType(void) const { return this->msg_type; }
> +
> + INLINE Type getSrcType(void) const { return TYPE_U32; }
> + INLINE Type getDstType(void) const { return TYPE_U32; }
> + uint8_t imageIdx;
> + uint8_t msg_type;
> + uint8_t vme_search_path_lut;
> + uint8_t lut_sub;
> + uint32_t srcNum;
> + uint32_t dstNum;
> + };
> +
> +
> class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO
> public BasePolicy,
> public TupleSrcPolicy<TypedWriteInstruction>,
> @@ -1111,6 +1163,8 @@ namespace ir {
> // TODO
> INLINE bool SampleInstruction::wellFormed(const Function &fn, std::string
> &why) const
> { return true; }
> + INLINE bool VmeInstruction::wellFormed(const Function &fn, std::string
> &why) const
> + { return true; }
> INLINE bool TypedWriteInstruction::wellFormed(const Function &fn,
> std::string &why) const
> { return true; }
> INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn,
> std::string &why) const @@ -1502,6 +1556,10 @@
> START_INTROSPECTION(LabelInstruction)
> #include "ir/instruction.hxx"
> END_INTROSPECTION(LabelInstruction)
>
> +START_INTROSPECTION(VmeInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(VmeInstruction)
> +
> #undef END_INTROSPECTION
> #undef START_INTROSPECTION
> #undef DECL_INSN
> @@ -1694,6 +1752,10 @@ DECL_MEM_FN(SampleInstruction, Type,
> getDstType(void), getDstType()) DECL_MEM_FN(SampleInstruction, uint8_t,
> getSamplerIndex(void), getSamplerIndex()) DECL_MEM_FN(SampleInstruction,
> uint8_t, getSamplerOffset(void), getSamplerOffset())
> DECL_MEM_FN(SampleInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(VmeInstruction, Type, getSrcType(void), getSrcType())
> +DECL_MEM_FN(VmeInstruction, Type, getDstType(void), getDstType())
> +DECL_MEM_FN(VmeInstruction, uint8_t, getImageIndex(void),
> +getImageIndex()) DECL_MEM_FN(VmeInstruction, uint8_t, getMsgType(void),
> +getMsgType())
> DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType())
> DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void),
> getCoordType()) DECL_MEM_FN(TypedWriteInstruction, uint8_t,
> getImageIndex(void), getImageIndex()) @@ -1932,6 +1994,10 @@
> DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void),
> getImageIndex
> return internal::SampleInstruction(imageIndex, dst, src, srcNum, dstIsFloat,
> srcIsFloat, sampler, samplerOffset).convert();
> }
>
> + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum,
> uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub) {
> + return internal::VmeInstruction(imageIndex, dst, src, dstNum,
> + srcNum, msg_type, vme_search_path_lut, lut_sub).convert(); }
> +
> Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type
> srcType, Type coordType) {
> return internal::TypedWriteInstruction(imageIndex, src, srcNum, srcType,
> coordType).convert();
> }
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index cf8d839..cf2d65c 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -191,8 +191,8 @@ namespace ir {
> template <typename T> INLINE bool isMemberOf(void) const {
> return T::isClassOf(*this);
> }
> - /*! max_src for store instruction (vec16 + addr) */
> - static const uint32_t MAX_SRC_NUM = 32;
> + /*! max_src used by vme for payload passing and setting */
> + static const uint32_t MAX_SRC_NUM = 40;
> static const uint32_t MAX_DST_NUM = 32;
> protected:
> BasicBlock *parent; //!< The basic block containing the instruction
> @@ -399,6 +399,17 @@ namespace ir {
> static bool isClassOf(const Instruction &insn);
> };
>
> + /*! Video motion estimation */
> + class VmeInstruction : public Instruction {
> + public:
> + uint8_t getImageIndex() const;
> + uint8_t getMsgType() const;
> + Type getSrcType(void) const;
> + Type getDstType(void) const;
> + /*! Return true if the given instruction is an instance of this class */
> + static bool isClassOf(const Instruction &insn); };
> +
> typedef union _ImageInfoKey{
> _ImageInfoKey(uint8_t i, uint8_t t) : index(i), type(t) {};
> struct {
> @@ -755,6 +766,8 @@ namespace ir {
> Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type
> srcType, Type coordType);
> /*! sample textures */
> Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, uint8_t srcNum,
> bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset);
> + /*! video motion estimation */
> + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t
> + dstNum, uint32_t srcNum, int msg_type, int vme_search_path_lut, int
> + lut_sub);
> /*! get image information , such as width/height/depth/... */
> Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex,
> Register infoReg);
> /*! label labelIndex */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index
> 81548c9..27d59a9 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL,
> LabelInstruction) DECL_INSN(READ_ARF, ReadARFInstruction)
> DECL_INSN(REGION, RegionInstruction)
> +DECL_INSN(VME, VmeInstruction)
> DECL_INSN(INDIRECT_MOV, IndirectMovInstruction)
> DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) DECL_INSN(MUL_HI,
> BinaryInstruction) diff --git a/backend/src/libocl/include/ocl_misc.h
> b/backend/src/libocl/include/ocl_misc.h
> index 359025b..7d4abab 100644
> --- a/backend/src/libocl/include/ocl_misc.h
> +++ b/backend/src/libocl/include/ocl_misc.h
> @@ -136,5 +136,20 @@ struct time_stamp {
> uint event;
> };
>
> +uint __gen_ocl_region(ushort offset, uint data);
> +
> struct time_stamp __gen_ocl_get_timestamp(void);
> +
> +uint8 __gen_ocl_vme(image2d_t, image2d_t,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + uint, uint, uint, uint,
> + int, int, int);
> #endif
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 4905415..ab1094c 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3491,6 +3491,7 @@ namespace gbe
> case GEN_OCL_REGION:
> case GEN_OCL_SIMD_ID:
> case GEN_OCL_SIMD_SHUFFLE:
> + case GEN_OCL_VME:
> this->newRegister(&I);
> break;
> case GEN_OCL_PRINTF:
> @@ -3789,6 +3790,46 @@ namespace gbe
> ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
> break;
> }
> + case GEN_OCL_VME:
> + {
> +
> + const uint8_t imageID = getImageID(I);
> +
> + AI++;
> + AI++;
> +
> + uint32_t src_length = 40, dst_length = 6;
> +
> + vector<ir::Register> dstTupleData, srcTupleData;
> + for (uint32_t i = 0; i < src_length; i++, AI++){
> + srcTupleData.push_back(this->getRegister(*AI));
> + }
> +
> + for (uint32_t elemID = 0; elemID < dst_length; ++elemID) {
> + const ir::Register reg = this->getRegister(&I, elemID);
> + dstTupleData.push_back(reg);
> + }
> + const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], dst_length);
> + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0],
> + src_length);
> +
> + Constant *msg_type_cpv = dyn_cast<Constant>(*AI);
> + assert(msg_type_cpv);
> + const ir::Immediate &msg_type_x =
> processConstantImm(msg_type_cpv);
> + ++AI;
> + Constant *vme_search_path_lut_cpv = dyn_cast<Constant>(*AI);
> + assert(vme_search_path_lut_cpv);
> + const ir::Immediate &vme_search_path_lut_x =
> processConstantImm(vme_search_path_lut_cpv);
> + ++AI;
> + Constant *lut_sub_cpv = dyn_cast<Constant>(*AI);
> + assert(lut_sub_cpv);
> + const ir::Immediate &lut_sub_x =
> + processConstantImm(lut_sub_cpv);
> +
> + ctx.VME(imageID, dstTuple, srcTuple, dst_length, src_length,
> + msg_type_x.getIntegerValue(),
> vme_search_path_lut_x.getIntegerValue(),
> + lut_sub_x.getIntegerValue());
> +
> + break;
> + }
> case GEN_OCL_REGION:
> {
> const ir::Register dst = this->getRegister(&I); diff --git
> a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index cabb225..3fbf847 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -168,5 +168,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE,
> intel_sub_group_shuffle) DECL_LLVM_GEN_FUNCTION(READ_TM,
> __gen_ocl_read_tm) DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
>
> +DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme)
> +
> // printf function
> DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) diff --git
> a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
> index bc985c6..0f5c33b 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -704,6 +704,10 @@ namespace gbe {
> *CI = InsertToVector(call, *CI);
> break;
> }
> + case GEN_OCL_VME:
> + setAppendPoint(call);
> + extractFromVector(call);
> + break;
> }
> }
> }
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list