[Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

Song, Ruiling ruiling.song at intel.com
Mon Nov 9 19:44:07 PST 2015


This version patchset LGTM.

Thanks!
Ruiling

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Chuanbo Weng
> Sent: Friday, November 6, 2015 11:28 AM
> To: beignet at lists.freedesktop.org
> Cc: Weng, Chuanbo
> Subject: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
> 
> __gen_ocl_vme is used for hardware accelerated video motion estimation.
> It gets payload values as parameters and uses MOV to pass these payload
> values to VME SEND Message's payload grfs. The int8 return value is used
> to store SEND Message writeback.
> 
> v2:
> Remove unnecessary 5 parameters(src_grf*) of built-in function(we just
> need to allocate related registers in gen_insn_selection step).
> 
> v3:
> Remove redundant code and change MAX_SRC_NUM to 40.
> 
> v4:
> Choose message response length by message type instead of hard code.
> 
> v5:
> Choose message response length by message type in the whole backend
> pipeline.
> 
> v6:
> Treat simd8 and simd16 differently when mov payload value to consecutive
> payload grfs.
> 
> Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
> ---
>  backend/src/backend/gen/gen_mesa_disasm.c          | 14 ++++
>  backend/src/backend/gen7_instruction.hpp           | 15 ++++
>  backend/src/backend/gen_context.cpp                | 98 ++++++++++++++++++++++
>  backend/src/backend/gen_context.hpp                |  1 +
>  backend/src/backend/gen_defs.hpp                   | 15 ++++
>  backend/src/backend/gen_encoder.cpp                | 44 ++++++++++
>  backend/src/backend/gen_encoder.hpp                | 13 +++
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
>  backend/src/backend/gen_insn_selection.cpp         | 73 ++++++++++++++++
>  backend/src/backend/gen_insn_selection.hpp         | 14 +++-
>  backend/src/backend/gen_insn_selection.hxx         |  1 +
>  backend/src/ir/instruction.cpp                     | 66 +++++++++++++++
>  backend/src/ir/instruction.hpp                     | 17 +++-
>  backend/src/ir/instruction.hxx                     |  1 +
>  backend/src/libocl/include/ocl_misc.h              | 15 ++++
>  backend/src/llvm/llvm_gen_backend.cpp              | 47 +++++++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx         |  2 +
>  backend/src/llvm/llvm_scalarize.cpp                |  4 +
>  18 files changed, 436 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index 5b71cfa..3198da7 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -476,6 +476,13 @@ static int column;
> 
>  static int gen_version;
> 
> +#define GEN7_BITS_FIELD(inst, gen7) \
> +  ({                                                            \
> +    int bits;                                                   \
> +      bits = ((const union Gen7NativeInstruction *)inst)->gen7; \
> +    bits;                                                       \
> +  })
> +
>  #define GEN_BITS_FIELD(inst, gen)                               \
>    ({                                                            \
>      int bits;                                                   \
> @@ -530,6 +537,8 @@ static int gen_version;
>  #define EXECUTION_SIZE(inst)       GEN_BITS_FIELD(inst, header.execution_size)
>  #define BRANCH_JIP(inst)           GEN_BITS_FIELD2(inst, bits3.gen7_branch.jip,
> bits3.gen8_branch.jip/8)
>  #define BRANCH_UIP(inst)           GEN_BITS_FIELD2(inst, bits3.gen7_branch.uip,
> bits2.gen8_branch.uip/8)
> +#define VME_BTI(inst)              GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti)
> +#define VME_MSG_TYPE(inst)         GEN7_BITS_FIELD(inst,
> bits3.vme_gen7.msg_type)
>  #define SAMPLE_BTI(inst)           GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti)
>  #define SAMPLER(inst)              GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.sampler)
>  #define SAMPLER_MSG_TYPE(inst)     GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.msg_type)
> @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t
> deviceID, uint32_t compac
> 
>      if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, bits2.da1.src1_reg_file) ==
> GEN_IMMEDIATE_VALUE) {
>        switch (target) {
> +        case GEN_SFID_VIDEO_MOTION_EST:
> +          format(file, " (bti: %d, msg_type: %d)",
> +                 VME_BTI(inst),
> +                 VME_MSG_TYPE(inst));
> +          break;
>          case GEN_SFID_SAMPLER:
>            format(file, " (%d, %d, %d, %d)",
>                   SAMPLE_BTI(inst),
> diff --git a/backend/src/backend/gen7_instruction.hpp
> b/backend/src/backend/gen7_instruction.hpp
> index 51f342b..258dd24 100644
> --- a/backend/src/backend/gen7_instruction.hpp
> +++ b/backend/src/backend/gen7_instruction.hpp
> @@ -350,6 +350,21 @@ union Gen7NativeInstruction
>          uint32_t end_of_thread:1;
>        } sampler_gen7;
> 
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t vme_search_path_lut:3;
> +        uint32_t lut_sub:2;
> +        uint32_t msg_type:2;
> +        uint32_t stream_in:1;
> +        uint32_t stream_out:1;
> +        uint32_t reserved_mbz:2;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad1:2;
> +        uint32_t end_of_thread:1;
> +      } vme_gen7;
> +
>        /**
>         * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
>         *
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 4e2ebfb..ccc9f17 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2266,6 +2266,104 @@ namespace gbe
>      p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0,
> insn.extra.isLD, insn.extra.isUniform);
>    }
> 
> +  void GenContext::emitVmeInstruction(const SelectionInstruction &insn) {
> +    const GenRegister dst = ra->genReg(insn.dst(0));
> +    const unsigned int msg_type = insn.extra.msg_type;
> +
> +    GBE_ASSERT(msg_type == 1);
> +    int rsp_len;
> +    if(msg_type == 1)
> +      rsp_len = 6;
> +    uint32_t execWidth_org = p->curr.execWidth;
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.noMask = 1;
> +    p->curr.execWidth = 1;
> +    /* Use MOV to Setup bits of payload: mov payload value stored in insn.src(x)
> to
> +     * 5 consecutive payload grf.
> +     * In simd8 mode, one virtual grf register map to one physical grf register.
> But
> +     * in simd16 mode, one virtual grf register map to two physical grf registers.
> +     * So we should treat them differently.
> +     * */
> +    if(execWidth_org == 8){
> +      for(int i=0; i < 5; i++){
> +        GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> +        payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> +        payload_grf.width = GEN_WIDTH_1;
> +        payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> +        payload_grf.subphysical = 1;
> +        for(int j=0; j < 8; j++){
> +          payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> +          GenRegister payload_val = ra->genReg(insn.src(i*8+j));
> +          payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> +          payload_val.width = GEN_WIDTH_1;
> +          payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> +          p->MOV(payload_grf, payload_val);
> +        }
> +      }
> +    }
> +    else if(execWidth_org == 16){
> +      for(int i=0; i < 2; i++){
> +        for(int k = 0; k < 2; k++){
> +          GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> +          payload_grf.nr += k;
> +          payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> +          payload_grf.width = GEN_WIDTH_1;
> +          payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> +          payload_grf.subphysical = 1;
> +          for(int j=0; j < 8; j++){
> +            payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> +            GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j));
> +            payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> +            payload_val.width = GEN_WIDTH_1;
> +            payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> +            p->MOV(payload_grf, payload_val);
> +          }
> +        }
> +      }
> +      {
> +        int i = 2;
> +        GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> +        payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> +        payload_grf.width = GEN_WIDTH_1;
> +        payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> +        payload_grf.subphysical = 1;
> +        for(int j=0; j < 8; j++){
> +          payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> +          GenRegister payload_val = ra->genReg(insn.src(i*16+j));
> +          payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> +          payload_val.width = GEN_WIDTH_1;
> +          payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> +
> +          p->MOV(payload_grf, payload_val);
> +        }
> +      }
> +    }
> +    p->pop();
> +
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.noMask = 1;
> +    p->curr.execWidth = 1;
> +    GenRegister payload_did = GenRegister::retype(ra-
> >genReg(insn.dst(rsp_len)), GEN_TYPE_UB);
> +    payload_did.vstride = GEN_VERTICAL_STRIDE_0;
> +    payload_did.width = GEN_WIDTH_1;
> +    payload_did.hstride = GEN_HORIZONTAL_STRIDE_0;
> +    payload_did.subphysical = 1;
> +    payload_did.subnr = 20 * typeSize(GEN_TYPE_UB);
> +    GenRegister grf0 = GenRegister::ub1grf(0, 20);
> +    p->MOV(payload_did, grf0);
> +    p->pop();
> +
> +    const GenRegister msgPayload = ra->genReg(insn.dst(rsp_len));
> +    const unsigned char bti = insn.getbti();
> +    const unsigned int vme_search_path_lut = insn.extra.vme_search_path_lut;
> +    const unsigned int lut_sub = insn.extra.lut_sub;
> +    p->VME(bti, dst, msgPayload, msg_type, vme_search_path_lut, lut_sub);
> +  }
> +
>    void GenContext::scratchWrite(const GenRegister header, uint32_t offset,
> uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
>      p->push();
>      uint32_t simdWidth = p->curr.execWidth;
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 4044694..870266c 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -167,6 +167,7 @@ namespace gbe
>      virtual void emitUnpackLongInstruction(const SelectionInstruction &insn);
>      void emitDWordGatherInstruction(const SelectionInstruction &insn);
>      void emitSampleInstruction(const SelectionInstruction &insn);
> +    void emitVmeInstruction(const SelectionInstruction &insn);
>      void emitTypedWriteInstruction(const SelectionInstruction &insn);
>      void emitSpillRegInstruction(const SelectionInstruction &insn);
>      void emitUnSpillRegInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index 1b550ac..09cb2ba 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -615,6 +615,21 @@ union GenNativeInstruction
>          uint32_t end_of_thread:1;
>        } sampler_gen7;
> 
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t vme_search_path_lut:3;
> +        uint32_t lut_sub:2;
> +        uint32_t msg_type:2;
> +        uint32_t stream_in:1;
> +        uint32_t stream_out:1;
> +        uint32_t reserved_mbz:2;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad1:2;
> +        uint32_t end_of_thread:1;
> +      } vme_gen7;
> +
>        /**
>         * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
>         *
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 2cc51cc..be38cef 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -1191,6 +1191,50 @@ namespace gbe
>                         simd_mode, return_format);
>    }
> 
> +  void GenEncoder::setVmeMessage(GenNativeInstruction *insn,
> +                                unsigned char bti,
> +                                uint32_t response_length,
> +                                uint32_t msg_length,
> +                                uint32_t msg_type,
> +                                unsigned char vme_search_path_lut,
> +                                unsigned char lut_sub)
> +  {
> +     const GenMessageTarget sfid = GEN_SFID_VIDEO_MOTION_EST;
> +     setMessageDescriptor(insn, sfid, msg_length, response_length, true);
> +     insn->bits3.vme_gen7.bti = bti;
> +     insn->bits3.vme_gen7.vme_search_path_lut = vme_search_path_lut;
> +     insn->bits3.vme_gen7.lut_sub = lut_sub;
> +     insn->bits3.vme_gen7.msg_type = msg_type;
> +     insn->bits3.vme_gen7.stream_in = 0;
> +     insn->bits3.vme_gen7.stream_out = 0;
> +     insn->bits3.vme_gen7.reserved_mbz = 0;
> +
> +  }
> +
> +  void GenEncoder::VME(unsigned char bti,
> +                       GenRegister dest,
> +                       GenRegister msg,
> +                       uint32_t msg_type,
> +                       uint32_t vme_search_path_lut,
> +                       uint32_t lut_sub)
> +  {
> +    /* Currectly we just support inter search only, we will support other
> +     * modes in future.
> +     */
> +    GBE_ASSERT(msg_type == 1);
> +    uint32_t msg_length, response_length;
> +    if(msg_type == 1){
> +      msg_length = 5;
> +      response_length = 6;
> +    }
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    this->setHeader(insn);
> +    this->setDst(insn, dest);
> +    this->setSrc0(insn, msg);
> +    setVmeMessage(insn, bti, response_length, msg_length,
> +                  msg_type, vme_search_path_lut, lut_sub);
> +  }
> +
>    void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present,
> unsigned char bti)
>    {
>       GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index f2bb5ab..6df7087 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -203,6 +203,19 @@ namespace gbe
>                             bool header_present,
>                             uint32_t simd_mode,
>                             uint32_t return_format);
> +    virtual void VME(unsigned char bti,
> +                         GenRegister dest,
> +                         GenRegister msg,
> +                         uint32_t msg_type,
> +                         uint32_t vme_search_path_lut,
> +                         uint32_t lut_sub);
> +    void setVmeMessage(GenNativeInstruction *insn,
> +                          unsigned char bti,
> +                          uint32_t response_length,
> +                          uint32_t msg_length,
> +                          uint32_t msg_type,
> +                          unsigned char vme_search_path_lut,
> +                          unsigned char lut_sub);
> 
>      /*! TypedWrite instruction for texture */
>      virtual void TYPED_WRITE(GenRegister header,
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 9b60c17..878e0e7 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -36,6 +36,7 @@ DECL_GEN7_SCHEDULE(UnpackByte,      40,        1,        1)
>  DECL_GEN7_SCHEDULE(PackLong,        40,        1,        1)
>  DECL_GEN7_SCHEDULE(UnpackLong,      40,        1,        1)
>  DECL_GEN7_SCHEDULE(Sample,          160,       1,        1)
> +DECL_GEN7_SCHEDULE(Vme,             320,       1,        1)
>  DECL_GEN7_SCHEDULE(TypedWrite,      80,        1,        1)
>  DECL_GEN7_SCHEDULE(SpillReg,        20,        1,        1)
>  DECL_GEN7_SCHEDULE(UnSpillReg,      160,       1,        1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 2452aea..cfaa792 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -187,6 +187,7 @@ namespace gbe
>             this->opcode == SEL_OP_ATOMIC       ||
>             this->opcode == SEL_OP_BYTE_GATHER  ||
>             this->opcode == SEL_OP_SAMPLE ||
> +           this->opcode == SEL_OP_VME ||
>             this->opcode == SEL_OP_DWORD_GATHER;
>    }
> 
> @@ -661,6 +662,8 @@ namespace gbe
>      void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
>      /*! Encode sample instructions */
>      void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads,
> uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform);
> +    /*! Encode vme instructions */
> +    void VME(uint32_t bti, GenRegister *dst, GenRegister *payloadVal, uint32_t
> dstNum, uint32_t srcNum, uint32_t msg_type, uint32_t vme_search_path_lut,
> uint32_t lut_sub);
>      /*! Encode typed write instructions */
>      void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool
> is3D);
>      /*! Get image information */
> @@ -2120,6 +2123,34 @@ namespace gbe
>      insn->extra.isUniform = isUniform;
>    }
> 
> +  void Selection::Opaque::VME(uint32_t bti, GenRegister *dst, GenRegister
> *payloadVal,
> +                              uint32_t dstNum, uint32_t srcNum, uint32_t msg_type,
> +                              uint32_t vme_search_path_lut, uint32_t lut_sub) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_VME, dstNum,
> srcNum);
> +    SelectionVector *dstVector = this->appendVector();
> +    SelectionVector *msgVector = this->appendVector();
> +
> +    for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
> +      insn->dst(elemID) = dst[elemID];
> +    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
> +      insn->src(elemID) = payloadVal[elemID];
> +
> +    dstVector->regNum = dstNum;
> +    dstVector->isSrc = 0;
> +    dstVector->offsetID = 0;
> +    dstVector->reg = &insn->dst(0);
> +
> +    msgVector->regNum = srcNum;
> +    msgVector->isSrc = 1;
> +    msgVector->offsetID = 0;
> +    msgVector->reg = &insn->src(0);
> +
> +    insn->setbti(bti);
> +    insn->extra.msg_type = msg_type;
> +    insn->extra.vme_search_path_lut = vme_search_path_lut;
> +    insn->extra.lut_sub = lut_sub;
> +  }
> +
>    ///////////////////////////////////////////////////////////////////////////
>    // Code selection public implementation
>    ///////////////////////////////////////////////////////////////////////////
> @@ -5126,6 +5157,47 @@ namespace gbe
>      DECL_CTOR(SampleInstruction, 1, 1);
>    };
> 
> +  DECL_PATTERN(VmeInstruction)
> +  {
> +    INLINE bool emitOne(Selection::Opaque &sel, const ir::VmeInstruction &insn,
> bool &markChildren) const
> +    {
> +      using namespace ir;
> +      uint32_t msg_type, vme_search_path_lut, lut_sub;
> +      msg_type = insn.getMsgType();
> +      vme_search_path_lut = 0;
> +      lut_sub = 0;
> +      GBE_ASSERT(msg_type == 1);
> +      uint32_t payloadLen = 0;
> +      //We allocate 5 virtual payload grfs to selection dst register.
> +      if(msg_type == 1){
> +        payloadLen = 5;
> +      }
> +      uint32_t selDstNum = insn.getDstNum() + payloadLen;
> +      uint32_t srcNum = insn.getSrcNum();
> +      vector<GenRegister> dst(selDstNum);
> +      vector<GenRegister> payloadVal(srcNum);
> +      uint32_t valueID = 0;
> +      for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
> +        dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
> +      for (valueID = insn.getDstNum(); valueID < selDstNum; ++valueID)
> +        dst[valueID] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> +
> +      for (valueID = 0; valueID < srcNum; ++valueID)
> +        payloadVal[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
> +
> +      uint32_t bti = insn.getImageIndex();
> +      if (bti > BTI_MAX_ID) {
> +        std::cerr << "Too large bti " << bti;
> +        return false;
> +      }
> +
> +      sel.VME(bti, dst.data(), payloadVal.data(), selDstNum, srcNum, msg_type,
> vme_search_path_lut, lut_sub);
> +
> +      return true;
> +    }
> +    DECL_CTOR(VmeInstruction, 1, 1);
> +  };
> +
>    /*! Typed write instruction pattern. */
>    DECL_PATTERN(TypedWriteInstruction)
>    {
> @@ -5591,6 +5663,7 @@ namespace gbe
>      this->insert<MulAddInstructionPattern>();
>      this->insert<SelectModifierInstructionPattern>();
>      this->insert<SampleInstructionPattern>();
> +    this->insert<VmeInstructionPattern>();
>      this->insert<GetImageInfoInstructionPattern>();
>      this->insert<ReadARFInstructionPattern>();
>      this->insert<RegionInstructionPattern>();
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index f51c905..578db41 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -90,8 +90,8 @@ namespace gbe
>      const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
>      /*! Damn C++ */
>      const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
> -    /*! No more than 9 sources (used by typed writes on simd8 mode.) */
> -    enum { MAX_SRC_NUM = 9 };
> +    /*! No more than 40 sources (40 sources are used by vme for payload
> passing and setting) */
> +    enum { MAX_SRC_NUM = 40 };
>      /*! No more than 16 destinations (15 used by I64DIV/I64REM) */
>      enum { MAX_DST_NUM = 16 };
>      /*! State of the instruction (extra fields neeed for the encoding) */
> @@ -129,6 +129,12 @@ namespace gbe
>          bool     isLD;  // is this a ld message?
>          bool     isUniform;
>        };
> +      struct {
> +        uint16_t vme_bti:8;
> +        uint16_t msg_type:2;
> +        uint16_t vme_search_path_lut:3;
> +        uint16_t lut_sub:2;
> +      };
>        uint32_t barrierType;
>        bool longjmp;
>        uint32_t indirect_offset;
> @@ -138,7 +144,7 @@ namespace gbe
>      /*! Number of destinations */
>      uint8_t dstNum:5;
>      /*! Number of sources */
> -    uint8_t srcNum:4;
> +    uint8_t srcNum:6;
>      /*! To store various indices */
>      uint32_t index;
>      /*! For BRC/IF to store the UIP */
> @@ -152,6 +158,7 @@ namespace gbe
>        switch (opcode) {
>          case SEL_OP_DWORD_GATHER: return extra.function;
>          case SEL_OP_SAMPLE: return extra.rdbti;
> +        case SEL_OP_VME: return extra.vme_bti;
>          case SEL_OP_TYPED_WRITE: return extra.bti;
>          default:
>            GBE_ASSERT(0);
> @@ -164,6 +171,7 @@ namespace gbe
>        switch (opcode) {
>          case SEL_OP_DWORD_GATHER: extra.function = bti; return;
>          case SEL_OP_SAMPLE: extra.rdbti = bti; return;
> +        case SEL_OP_VME: extra.vme_bti = bti; return;
>          case SEL_OP_TYPED_WRITE: extra.bti = bti; return;
>          default:
>            GBE_ASSERT(0);
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index 479398b..4d3e921 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -65,6 +65,7 @@ DECL_SELECTION_IR(UNPACK_BYTE,
> UnpackByteInstruction)
>  DECL_SELECTION_IR(PACK_LONG, PackLongInstruction)
>  DECL_SELECTION_IR(UNPACK_LONG, UnpackLongInstruction)
>  DECL_SELECTION_IR(SAMPLE, SampleInstruction)
> +DECL_SELECTION_IR(VME, VmeInstruction)
>  DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
>  DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
>  DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index f93c528..7bf787e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -595,6 +595,58 @@ namespace ir {
>        static const uint32_t dstNum = 4;
>      };
> 
> +    class ALIGNED_INSTRUCTION VmeInstruction :
> +      public BasePolicy,
> +      public TupleSrcPolicy<VmeInstruction>,
> +      public TupleDstPolicy<VmeInstruction>
> +    {
> +    public:
> +      VmeInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple,
> +                     uint32_t dstNum, uint32_t srcNum, int msg_type,
> +                     int vme_search_path_lut, int lut_sub) {
> +        this->opcode = OP_VME;
> +        this->dst = dstTuple;
> +        this->src = srcTuple;
> +        this->dstNum = dstNum;
> +        this->srcNum = srcNum;
> +        this->imageIdx = imageIdx;
> +        this->msg_type = msg_type;
> +        this->vme_search_path_lut = vme_search_path_lut;
> +        this->lut_sub = lut_sub;
> +      }
> +      INLINE bool wellFormed(const Function &fn, std::string &why) const;
> +      INLINE void out(std::ostream &out, const Function &fn) const {
> +        this->outOpcode(out);
> +        out << " src_surface id " << (int)this->getImageIndex()
> +            << " ref_surface id " << (int)this->getImageIndex() + 1;
> +        for(uint32_t i = 0; i < dstNum; i++){
> +          out<< " %" << this->getDst(fn, i);
> +        }
> +        for(uint32_t i = 0; i < srcNum; i++){
> +          out<< " %" << this->getSrc(fn, i);
> +        }
> +        out
> +            << " msg_type " << (int)this->getMsgType()
> +            << " vme_search_path_lut " << (int)this->vme_search_path_lut
> +            << " lut_sub " << (int)this->lut_sub;
> +      }
> +      Tuple src;
> +      Tuple dst;
> +
> +      INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
> +      INLINE uint8_t getMsgType(void) const { return this->msg_type; }
> +
> +      INLINE Type getSrcType(void) const { return TYPE_U32; }
> +      INLINE Type getDstType(void) const { return TYPE_U32; }
> +      uint8_t imageIdx;
> +      uint8_t msg_type;
> +      uint8_t vme_search_path_lut;
> +      uint8_t lut_sub;
> +      uint32_t srcNum;
> +      uint32_t dstNum;
> +    };
> +
> +
>      class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO
>        public BasePolicy,
>        public TupleSrcPolicy<TypedWriteInstruction>,
> @@ -1111,6 +1163,8 @@ namespace ir {
>      // TODO
>      INLINE bool SampleInstruction::wellFormed(const Function &fn, std::string
> &why) const
>      { return true; }
> +    INLINE bool VmeInstruction::wellFormed(const Function &fn, std::string
> &why) const
> +    { return true; }
>      INLINE bool TypedWriteInstruction::wellFormed(const Function &fn,
> std::string &why) const
>      { return true; }
>      INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn,
> std::string &why) const
> @@ -1502,6 +1556,10 @@ START_INTROSPECTION(LabelInstruction)
>  #include "ir/instruction.hxx"
>  END_INTROSPECTION(LabelInstruction)
> 
> +START_INTROSPECTION(VmeInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(VmeInstruction)
> +
>  #undef END_INTROSPECTION
>  #undef START_INTROSPECTION
>  #undef DECL_INSN
> @@ -1694,6 +1752,10 @@ DECL_MEM_FN(SampleInstruction, Type,
> getDstType(void), getDstType())
>  DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void),
> getSamplerIndex())
>  DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerOffset(void),
> getSamplerOffset())
>  DECL_MEM_FN(SampleInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(VmeInstruction, Type, getSrcType(void), getSrcType())
> +DECL_MEM_FN(VmeInstruction, Type, getDstType(void), getDstType())
> +DECL_MEM_FN(VmeInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> +DECL_MEM_FN(VmeInstruction, uint8_t, getMsgType(void), getMsgType())
>  DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType())
>  DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void),
> getCoordType())
>  DECL_MEM_FN(TypedWriteInstruction, uint8_t, getImageIndex(void),
> getImageIndex())
> @@ -1932,6 +1994,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t,
> getImageIndex(void), getImageIndex
>      return internal::SampleInstruction(imageIndex, dst, src, srcNum, dstIsFloat,
> srcIsFloat, sampler, samplerOffset).convert();
>    }
> 
> +  Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum,
> uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub) {
> +    return internal::VmeInstruction(imageIndex, dst, src, dstNum, srcNum,
> msg_type, vme_search_path_lut, lut_sub).convert();
> +  }
> +
>    Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type
> srcType, Type coordType) {
>      return internal::TypedWriteInstruction(imageIndex, src, srcNum, srcType,
> coordType).convert();
>    }
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 3f3c655..c8da416 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -191,8 +191,8 @@ namespace ir {
>      template <typename T> INLINE bool isMemberOf(void) const {
>        return T::isClassOf(*this);
>      }
> -    /*! max_src for store instruction (vec16 + addr) */
> -    static const uint32_t MAX_SRC_NUM = 32;
> +    /*! max_src used by vme for payload passing and setting */
> +    static const uint32_t MAX_SRC_NUM = 40;
>      static const uint32_t MAX_DST_NUM = 32;
>    protected:
>      BasicBlock *parent;      //!< The basic block containing the instruction
> @@ -399,6 +399,17 @@ namespace ir {
>      static bool isClassOf(const Instruction &insn);
>    };
> 
> +  /*! Video motion estimation */
> +  class VmeInstruction : public Instruction {
> +  public:
> +    uint8_t getImageIndex() const;
> +    uint8_t getMsgType() const;
> +    Type getSrcType(void) const;
> +    Type getDstType(void) const;
> +    /*! Return true if the given instruction is an instance of this class */
> +    static bool isClassOf(const Instruction &insn);
> +  };
> +
>    typedef union _ImageInfoKey{
>      _ImageInfoKey(uint8_t i, uint8_t t) : index(i), type(t) {};
>      _ImageInfoKey(int key) : data(key) {};
> @@ -756,6 +767,8 @@ namespace ir {
>    Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type
> srcType, Type coordType);
>    /*! sample textures */
>    Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, uint8_t srcNum,
> bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset);
> +  /*! video motion estimation */
> +  Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum,
> uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub);
>    /*! get image information , such as width/height/depth/... */
>    Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex,
> Register infoReg);
>    /*! label labelIndex */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index 81548c9..27d59a9 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction)
>  DECL_INSN(LABEL, LabelInstruction)
>  DECL_INSN(READ_ARF, ReadARFInstruction)
>  DECL_INSN(REGION, RegionInstruction)
> +DECL_INSN(VME, VmeInstruction)
>  DECL_INSN(INDIRECT_MOV, IndirectMovInstruction)
>  DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
>  DECL_INSN(MUL_HI, BinaryInstruction)
> diff --git a/backend/src/libocl/include/ocl_misc.h
> b/backend/src/libocl/include/ocl_misc.h
> index 359025b..7d4abab 100644
> --- a/backend/src/libocl/include/ocl_misc.h
> +++ b/backend/src/libocl/include/ocl_misc.h
> @@ -136,5 +136,20 @@ struct time_stamp {
>    uint event;
>  };
> 
> +uint __gen_ocl_region(ushort offset, uint data);
> +
>  struct time_stamp __gen_ocl_get_timestamp(void);
> +
> +uint8 __gen_ocl_vme(image2d_t, image2d_t,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   uint, uint, uint, uint,
> +                   int, int, int);
>  #endif
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 7299d53..19927ba 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3541,6 +3541,7 @@ namespace gbe
>        case GEN_OCL_REGION:
>        case GEN_OCL_SIMD_ID:
>        case GEN_OCL_SIMD_SHUFFLE:
> +      case GEN_OCL_VME:
>          this->newRegister(&I);
>          break;
>        case GEN_OCL_PRINTF:
> @@ -3839,6 +3840,52 @@ namespace gbe
>              ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
>              break;
>            }
> +          case GEN_OCL_VME:
> +          {
> +
> +            const uint8_t imageID = getImageID(I);
> +
> +            AI++;
> +            AI++;
> +
> +            uint32_t src_length = 40;
> +
> +            vector<ir::Register> dstTupleData, srcTupleData;
> +            for (uint32_t i = 0; i < src_length; i++, AI++){
> +              srcTupleData.push_back(this->getRegister(*AI));
> +            }
> +
> +            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], src_length);
> +
> +            Constant *msg_type_cpv = dyn_cast<Constant>(*AI);
> +            assert(msg_type_cpv);
> +            const ir::Immediate &msg_type_x =
> processConstantImm(msg_type_cpv);
> +            int msg_type = msg_type_x.getIntegerValue();
> +            uint32_t dst_length;
> +            //msy_type =1 indicate inter search only of gen vme shared function
> +            GBE_ASSERT(msg_type == 1);
> +            if(msg_type == 1)
> +              dst_length = 6;
> +            for (uint32_t elemID = 0; elemID < dst_length; ++elemID) {
> +              const ir::Register reg = this->getRegister(&I, elemID);
> +              dstTupleData.push_back(reg);
> +            }
> +            const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], dst_length);
> +            ++AI;
> +            Constant *vme_search_path_lut_cpv = dyn_cast<Constant>(*AI);
> +            assert(vme_search_path_lut_cpv);
> +            const ir::Immediate &vme_search_path_lut_x =
> processConstantImm(vme_search_path_lut_cpv);
> +            ++AI;
> +            Constant *lut_sub_cpv = dyn_cast<Constant>(*AI);
> +            assert(lut_sub_cpv);
> +            const ir::Immediate &lut_sub_x = processConstantImm(lut_sub_cpv);
> +
> +            ctx.VME(imageID, dstTuple, srcTuple, dst_length, src_length,
> +                    msg_type, vme_search_path_lut_x.getIntegerValue(),
> +                    lut_sub_x.getIntegerValue());
> +
> +            break;
> +          }
>            case GEN_OCL_REGION:
>            {
>              const ir::Register dst = this->getRegister(&I);
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index cabb225..3fbf847 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -168,5 +168,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE,
> intel_sub_group_shuffle)
>  DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
>  DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
> 
> +DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme)
> +
>  // printf function
>  DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index 7ee5259..dc1d8ab 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -671,6 +671,10 @@ namespace gbe {
>              *CI = InsertToVector(call, *CI);
>              break;
>            }
> +          case GEN_OCL_VME:
> +            setAppendPoint(call);
> +            extractFromVector(call);
> +            break;
>          }
>        }
>      }
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list