[Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

Yang, Rong R rong.r.yang at intel.com
Mon Nov 9 23:10:10 PST 2015


Pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Song, Ruiling
> Sent: Tuesday, November 10, 2015 11:44
> To: Weng, Chuanbo; beignet at lists.freedesktop.org
> Cc: Weng, Chuanbo
> Subject: Re: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
> 
> This version patchset LGTM.
> 
> Thanks!
> Ruiling
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of Chuanbo Weng
> > Sent: Friday, November 6, 2015 11:28 AM
> > To: beignet at lists.freedesktop.org
> > Cc: Weng, Chuanbo
> > Subject: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
> >
> > __gen_ocl_vme is used for hardware accelerated video motion estimation.
> > It gets payload values as parameters and uses MOV to pass these
> > payload values to VME SEND Message's payload grfs. The int8 return
> > value is used to store SEND Message writeback.
> >
> > v2:
> > Remove unnecessary 5 parameters(src_grf*) of built-in function(we just
> > need to allocate related registers in gen_insn_selection step).
> >
> > v3:
> > Remove redundant code and change MAX_SRC_NUM to 40.
> >
> > v4:
> > Choose message response length by message type instead of hard code.
> >
> > v5:
> > Choose message response length by message type in the whole backend
> > pipeline.
> >
> > v6:
> > Treat simd8 and simd16 differently when mov payload value to
> > consecutive payload grfs.
> >
> > Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
> > ---
> >  backend/src/backend/gen/gen_mesa_disasm.c          | 14 ++++
> >  backend/src/backend/gen7_instruction.hpp           | 15 ++++
> >  backend/src/backend/gen_context.cpp                | 98
> ++++++++++++++++++++++
> >  backend/src/backend/gen_context.hpp                |  1 +
> >  backend/src/backend/gen_defs.hpp                   | 15 ++++
> >  backend/src/backend/gen_encoder.cpp                | 44 ++++++++++
> >  backend/src/backend/gen_encoder.hpp                | 13 +++
> >  .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
> >  backend/src/backend/gen_insn_selection.cpp         | 73
> ++++++++++++++++
> >  backend/src/backend/gen_insn_selection.hpp         | 14 +++-
> >  backend/src/backend/gen_insn_selection.hxx         |  1 +
> >  backend/src/ir/instruction.cpp                     | 66 +++++++++++++++
> >  backend/src/ir/instruction.hpp                     | 17 +++-
> >  backend/src/ir/instruction.hxx                     |  1 +
> >  backend/src/libocl/include/ocl_misc.h              | 15 ++++
> >  backend/src/llvm/llvm_gen_backend.cpp              | 47 +++++++++++
> >  backend/src/llvm/llvm_gen_ocl_function.hxx         |  2 +
> >  backend/src/llvm/llvm_scalarize.cpp                |  4 +
> >  18 files changed, 436 insertions(+), 5 deletions(-)
> >
> > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> > b/backend/src/backend/gen/gen_mesa_disasm.c
> > index 5b71cfa..3198da7 100644
> > --- a/backend/src/backend/gen/gen_mesa_disasm.c
> > +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> > @@ -476,6 +476,13 @@ static int column;
> >
> >  static int gen_version;
> >
> > +#define GEN7_BITS_FIELD(inst, gen7) \
> > +  ({                                                            \
> > +    int bits;                                                   \
> > +      bits = ((const union Gen7NativeInstruction *)inst)->gen7; \
> > +    bits;                                                       \
> > +  })
> > +
> >  #define GEN_BITS_FIELD(inst, gen)                               \
> >    ({                                                            \
> >      int bits;                                                   \
> > @@ -530,6 +537,8 @@ static int gen_version;
> >  #define EXECUTION_SIZE(inst)       GEN_BITS_FIELD(inst,
> header.execution_size)
> >  #define BRANCH_JIP(inst)           GEN_BITS_FIELD2(inst,
> bits3.gen7_branch.jip,
> > bits3.gen8_branch.jip/8)
> >  #define BRANCH_UIP(inst)           GEN_BITS_FIELD2(inst,
> bits3.gen7_branch.uip,
> > bits2.gen8_branch.uip/8)
> > +#define VME_BTI(inst)              GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti)
> > +#define VME_MSG_TYPE(inst)         GEN7_BITS_FIELD(inst,
> > bits3.vme_gen7.msg_type)
> >  #define SAMPLE_BTI(inst)           GEN_BITS_FIELD(inst,
> bits3.sampler_gen7.bti)
> >  #define SAMPLER(inst)              GEN_BITS_FIELD(inst,
> > bits3.sampler_gen7.sampler)
> >  #define SAMPLER_MSG_TYPE(inst)     GEN_BITS_FIELD(inst,
> > bits3.sampler_gen7.msg_type)
> > @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst,
> > uint32_t deviceID, uint32_t compac
> >
> >      if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file,
> > bits2.da1.src1_reg_file) ==
> > GEN_IMMEDIATE_VALUE) {
> >        switch (target) {
> > +        case GEN_SFID_VIDEO_MOTION_EST:
> > +          format(file, " (bti: %d, msg_type: %d)",
> > +                 VME_BTI(inst),
> > +                 VME_MSG_TYPE(inst));
> > +          break;
> >          case GEN_SFID_SAMPLER:
> >            format(file, " (%d, %d, %d, %d)",
> >                   SAMPLE_BTI(inst),
> > diff --git a/backend/src/backend/gen7_instruction.hpp
> > b/backend/src/backend/gen7_instruction.hpp
> > index 51f342b..258dd24 100644
> > --- a/backend/src/backend/gen7_instruction.hpp
> > +++ b/backend/src/backend/gen7_instruction.hpp
> > @@ -350,6 +350,21 @@ union Gen7NativeInstruction
> >          uint32_t end_of_thread:1;
> >        } sampler_gen7;
> >
> > +      struct {
> > +        uint32_t bti:8;
> > +        uint32_t vme_search_path_lut:3;
> > +        uint32_t lut_sub:2;
> > +        uint32_t msg_type:2;
> > +        uint32_t stream_in:1;
> > +        uint32_t stream_out:1;
> > +        uint32_t reserved_mbz:2;
> > +        uint32_t header_present:1;
> > +        uint32_t response_length:5;
> > +        uint32_t msg_length:4;
> > +        uint32_t pad1:2;
> > +        uint32_t end_of_thread:1;
> > +      } vme_gen7;
> > +
> >        /**
> >         * Message for the Sandybridge Sampler Cache or Constant Cache Data
> Port.
> >         *
> > diff --git a/backend/src/backend/gen_context.cpp
> > b/backend/src/backend/gen_context.cpp
> > index 4e2ebfb..ccc9f17 100644
> > --- a/backend/src/backend/gen_context.cpp
> > +++ b/backend/src/backend/gen_context.cpp
> > @@ -2266,6 +2266,104 @@ namespace gbe
> >      p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler,
> > simdWidth, -1, 0, insn.extra.isLD, insn.extra.isUniform);
> >    }
> >
> > +  void GenContext::emitVmeInstruction(const SelectionInstruction &insn)
> {
> > +    const GenRegister dst = ra->genReg(insn.dst(0));
> > +    const unsigned int msg_type = insn.extra.msg_type;
> > +
> > +    GBE_ASSERT(msg_type == 1);
> > +    int rsp_len;
> > +    if(msg_type == 1)
> > +      rsp_len = 6;
> > +    uint32_t execWidth_org = p->curr.execWidth;
> > +    p->push();
> > +    p->curr.predicate = GEN_PREDICATE_NONE;
> > +    p->curr.noMask = 1;
> > +    p->curr.execWidth = 1;
> > +    /* Use MOV to Setup bits of payload: mov payload value stored in
> > + insn.src(x)
> > to
> > +     * 5 consecutive payload grf.
> > +     * In simd8 mode, one virtual grf register map to one physical grf
> register.
> > But
> > +     * in simd16 mode, one virtual grf register map to two physical grf
> registers.
> > +     * So we should treat them differently.
> > +     * */
> > +    if(execWidth_org == 8){
> > +      for(int i=0; i < 5; i++){
> > +        GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> > +        payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> > +        payload_grf.width = GEN_WIDTH_1;
> > +        payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +        payload_grf.subphysical = 1;
> > +        for(int j=0; j < 8; j++){
> > +          payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> > +          GenRegister payload_val = ra->genReg(insn.src(i*8+j));
> > +          payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> > +          payload_val.width = GEN_WIDTH_1;
> > +          payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +
> > +          p->MOV(payload_grf, payload_val);
> > +        }
> > +      }
> > +    }
> > +    else if(execWidth_org == 16){
> > +      for(int i=0; i < 2; i++){
> > +        for(int k = 0; k < 2; k++){
> > +          GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> > +          payload_grf.nr += k;
> > +          payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> > +          payload_grf.width = GEN_WIDTH_1;
> > +          payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +          payload_grf.subphysical = 1;
> > +          for(int j=0; j < 8; j++){
> > +            payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> > +            GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j));
> > +            payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> > +            payload_val.width = GEN_WIDTH_1;
> > +            payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +
> > +            p->MOV(payload_grf, payload_val);
> > +          }
> > +        }
> > +      }
> > +      {
> > +        int i = 2;
> > +        GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i));
> > +        payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> > +        payload_grf.width = GEN_WIDTH_1;
> > +        payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +        payload_grf.subphysical = 1;
> > +        for(int j=0; j < 8; j++){
> > +          payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> > +          GenRegister payload_val = ra->genReg(insn.src(i*16+j));
> > +          payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> > +          payload_val.width = GEN_WIDTH_1;
> > +          payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +
> > +          p->MOV(payload_grf, payload_val);
> > +        }
> > +      }
> > +    }
> > +    p->pop();
> > +
> > +    p->push();
> > +    p->curr.predicate = GEN_PREDICATE_NONE;
> > +    p->curr.noMask = 1;
> > +    p->curr.execWidth = 1;
> > +    GenRegister payload_did = GenRegister::retype(ra-
> > >genReg(insn.dst(rsp_len)), GEN_TYPE_UB);
> > +    payload_did.vstride = GEN_VERTICAL_STRIDE_0;
> > +    payload_did.width = GEN_WIDTH_1;
> > +    payload_did.hstride = GEN_HORIZONTAL_STRIDE_0;
> > +    payload_did.subphysical = 1;
> > +    payload_did.subnr = 20 * typeSize(GEN_TYPE_UB);
> > +    GenRegister grf0 = GenRegister::ub1grf(0, 20);
> > +    p->MOV(payload_did, grf0);
> > +    p->pop();
> > +
> > +    const GenRegister msgPayload = ra->genReg(insn.dst(rsp_len));
> > +    const unsigned char bti = insn.getbti();
> > +    const unsigned int vme_search_path_lut =
> insn.extra.vme_search_path_lut;
> > +    const unsigned int lut_sub = insn.extra.lut_sub;
> > +    p->VME(bti, dst, msgPayload, msg_type, vme_search_path_lut,
> > + lut_sub);  }
> > +
> >    void GenContext::scratchWrite(const GenRegister header, uint32_t
> > offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
> >      p->push();
> >      uint32_t simdWidth = p->curr.execWidth; diff --git
> > a/backend/src/backend/gen_context.hpp
> > b/backend/src/backend/gen_context.hpp
> > index 4044694..870266c 100644
> > --- a/backend/src/backend/gen_context.hpp
> > +++ b/backend/src/backend/gen_context.hpp
> > @@ -167,6 +167,7 @@ namespace gbe
> >      virtual void emitUnpackLongInstruction(const SelectionInstruction
> &insn);
> >      void emitDWordGatherInstruction(const SelectionInstruction &insn);
> >      void emitSampleInstruction(const SelectionInstruction &insn);
> > +    void emitVmeInstruction(const SelectionInstruction &insn);
> >      void emitTypedWriteInstruction(const SelectionInstruction &insn);
> >      void emitSpillRegInstruction(const SelectionInstruction &insn);
> >      void emitUnSpillRegInstruction(const SelectionInstruction &insn);
> > diff --git a/backend/src/backend/gen_defs.hpp
> > b/backend/src/backend/gen_defs.hpp
> > index 1b550ac..09cb2ba 100644
> > --- a/backend/src/backend/gen_defs.hpp
> > +++ b/backend/src/backend/gen_defs.hpp
> > @@ -615,6 +615,21 @@ union GenNativeInstruction
> >          uint32_t end_of_thread:1;
> >        } sampler_gen7;
> >
> > +      struct {
> > +        uint32_t bti:8;
> > +        uint32_t vme_search_path_lut:3;
> > +        uint32_t lut_sub:2;
> > +        uint32_t msg_type:2;
> > +        uint32_t stream_in:1;
> > +        uint32_t stream_out:1;
> > +        uint32_t reserved_mbz:2;
> > +        uint32_t header_present:1;
> > +        uint32_t response_length:5;
> > +        uint32_t msg_length:4;
> > +        uint32_t pad1:2;
> > +        uint32_t end_of_thread:1;
> > +      } vme_gen7;
> > +
> >        /**
> >         * Message for the Sandybridge Sampler Cache or Constant Cache Data
> Port.
> >         *
> > diff --git a/backend/src/backend/gen_encoder.cpp
> > b/backend/src/backend/gen_encoder.cpp
> > index 2cc51cc..be38cef 100644
> > --- a/backend/src/backend/gen_encoder.cpp
> > +++ b/backend/src/backend/gen_encoder.cpp
> > @@ -1191,6 +1191,50 @@ namespace gbe
> >                         simd_mode, return_format);
> >    }
> >
> > +  void GenEncoder::setVmeMessage(GenNativeInstruction *insn,
> > +                                unsigned char bti,
> > +                                uint32_t response_length,
> > +                                uint32_t msg_length,
> > +                                uint32_t msg_type,
> > +                                unsigned char vme_search_path_lut,
> > +                                unsigned char lut_sub)  {
> > +     const GenMessageTarget sfid = GEN_SFID_VIDEO_MOTION_EST;
> > +     setMessageDescriptor(insn, sfid, msg_length, response_length, true);
> > +     insn->bits3.vme_gen7.bti = bti;
> > +     insn->bits3.vme_gen7.vme_search_path_lut = vme_search_path_lut;
> > +     insn->bits3.vme_gen7.lut_sub = lut_sub;
> > +     insn->bits3.vme_gen7.msg_type = msg_type;
> > +     insn->bits3.vme_gen7.stream_in = 0;
> > +     insn->bits3.vme_gen7.stream_out = 0;
> > +     insn->bits3.vme_gen7.reserved_mbz = 0;
> > +
> > +  }
> > +
> > +  void GenEncoder::VME(unsigned char bti,
> > +                       GenRegister dest,
> > +                       GenRegister msg,
> > +                       uint32_t msg_type,
> > +                       uint32_t vme_search_path_lut,
> > +                       uint32_t lut_sub)  {
> > +    /* Currectly we just support inter search only, we will support other
> > +     * modes in future.
> > +     */
> > +    GBE_ASSERT(msg_type == 1);
> > +    uint32_t msg_length, response_length;
> > +    if(msg_type == 1){
> > +      msg_length = 5;
> > +      response_length = 6;
> > +    }
> > +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> > +    this->setHeader(insn);
> > +    this->setDst(insn, dest);
> > +    this->setSrc0(insn, msg);
> > +    setVmeMessage(insn, bti, response_length, msg_length,
> > +                  msg_type, vme_search_path_lut, lut_sub);  }
> > +
> >    void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present,
> > unsigned char bti)
> >    {
> >       GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); diff
> > --git a/backend/src/backend/gen_encoder.hpp
> > b/backend/src/backend/gen_encoder.hpp
> > index f2bb5ab..6df7087 100644
> > --- a/backend/src/backend/gen_encoder.hpp
> > +++ b/backend/src/backend/gen_encoder.hpp
> > @@ -203,6 +203,19 @@ namespace gbe
> >                             bool header_present,
> >                             uint32_t simd_mode,
> >                             uint32_t return_format);
> > +    virtual void VME(unsigned char bti,
> > +                         GenRegister dest,
> > +                         GenRegister msg,
> > +                         uint32_t msg_type,
> > +                         uint32_t vme_search_path_lut,
> > +                         uint32_t lut_sub);
> > +    void setVmeMessage(GenNativeInstruction *insn,
> > +                          unsigned char bti,
> > +                          uint32_t response_length,
> > +                          uint32_t msg_length,
> > +                          uint32_t msg_type,
> > +                          unsigned char vme_search_path_lut,
> > +                          unsigned char lut_sub);
> >
> >      /*! TypedWrite instruction for texture */
> >      virtual void TYPED_WRITE(GenRegister header, diff --git
> > a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > index 9b60c17..878e0e7 100644
> > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > @@ -36,6 +36,7 @@ DECL_GEN7_SCHEDULE(UnpackByte,      40,        1,        1)
> >  DECL_GEN7_SCHEDULE(PackLong,        40,        1,        1)
> >  DECL_GEN7_SCHEDULE(UnpackLong,      40,        1,        1)
> >  DECL_GEN7_SCHEDULE(Sample,          160,       1,        1)
> > +DECL_GEN7_SCHEDULE(Vme,             320,       1,        1)
> >  DECL_GEN7_SCHEDULE(TypedWrite,      80,        1,        1)
> >  DECL_GEN7_SCHEDULE(SpillReg,        20,        1,        1)
> >  DECL_GEN7_SCHEDULE(UnSpillReg,      160,       1,        1)
> > diff --git a/backend/src/backend/gen_insn_selection.cpp
> > b/backend/src/backend/gen_insn_selection.cpp
> > index 2452aea..cfaa792 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -187,6 +187,7 @@ namespace gbe
> >             this->opcode == SEL_OP_ATOMIC       ||
> >             this->opcode == SEL_OP_BYTE_GATHER  ||
> >             this->opcode == SEL_OP_SAMPLE ||
> > +           this->opcode == SEL_OP_VME ||
> >             this->opcode == SEL_OP_DWORD_GATHER;
> >    }
> >
> > @@ -661,6 +662,8 @@ namespace gbe
> >      void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg
> src2);
> >      /*! Encode sample instructions */
> >      void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister
> > *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool
> > isLD, bool isUniform);
> > +    /*! Encode vme instructions */
> > +    void VME(uint32_t bti, GenRegister *dst, GenRegister *payloadVal,
> > + uint32_t
> > dstNum, uint32_t srcNum, uint32_t msg_type, uint32_t
> > vme_search_path_lut, uint32_t lut_sub);
> >      /*! Encode typed write instructions */
> >      void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t
> > bti, bool is3D);
> >      /*! Get image information */
> > @@ -2120,6 +2123,34 @@ namespace gbe
> >      insn->extra.isUniform = isUniform;
> >    }
> >
> > +  void Selection::Opaque::VME(uint32_t bti, GenRegister *dst,
> > + GenRegister
> > *payloadVal,
> > +                              uint32_t dstNum, uint32_t srcNum, uint32_t msg_type,
> > +                              uint32_t vme_search_path_lut, uint32_t lut_sub) {
> > +    SelectionInstruction *insn = this->appendInsn(SEL_OP_VME, dstNum,
> > srcNum);
> > +    SelectionVector *dstVector = this->appendVector();
> > +    SelectionVector *msgVector = this->appendVector();
> > +
> > +    for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
> > +      insn->dst(elemID) = dst[elemID];
> > +    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
> > +      insn->src(elemID) = payloadVal[elemID];
> > +
> > +    dstVector->regNum = dstNum;
> > +    dstVector->isSrc = 0;
> > +    dstVector->offsetID = 0;
> > +    dstVector->reg = &insn->dst(0);
> > +
> > +    msgVector->regNum = srcNum;
> > +    msgVector->isSrc = 1;
> > +    msgVector->offsetID = 0;
> > +    msgVector->reg = &insn->src(0);
> > +
> > +    insn->setbti(bti);
> > +    insn->extra.msg_type = msg_type;
> > +    insn->extra.vme_search_path_lut = vme_search_path_lut;
> > +    insn->extra.lut_sub = lut_sub;
> > +  }
> > +
> >    ///////////////////////////////////////////////////////////////////////////
> >    // Code selection public implementation
> >
> > //////////////////////////////////////////////////////////////////////
> > /////
> > @@ -5126,6 +5157,47 @@ namespace gbe
> >      DECL_CTOR(SampleInstruction, 1, 1);
> >    };
> >
> > +  DECL_PATTERN(VmeInstruction)
> > +  {
> > +    INLINE bool emitOne(Selection::Opaque &sel, const
> > + ir::VmeInstruction &insn,
> > bool &markChildren) const
> > +    {
> > +      using namespace ir;
> > +      uint32_t msg_type, vme_search_path_lut, lut_sub;
> > +      msg_type = insn.getMsgType();
> > +      vme_search_path_lut = 0;
> > +      lut_sub = 0;
> > +      GBE_ASSERT(msg_type == 1);
> > +      uint32_t payloadLen = 0;
> > +      //We allocate 5 virtual payload grfs to selection dst register.
> > +      if(msg_type == 1){
> > +        payloadLen = 5;
> > +      }
> > +      uint32_t selDstNum = insn.getDstNum() + payloadLen;
> > +      uint32_t srcNum = insn.getSrcNum();
> > +      vector<GenRegister> dst(selDstNum);
> > +      vector<GenRegister> payloadVal(srcNum);
> > +      uint32_t valueID = 0;
> > +      for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
> > +        dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
> > +      for (valueID = insn.getDstNum(); valueID < selDstNum; ++valueID)
> > +        dst[valueID] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > +
> > +      for (valueID = 0; valueID < srcNum; ++valueID)
> > +        payloadVal[valueID] = sel.selReg(insn.getSrc(valueID),
> > + insn.getSrcType());
> > +
> > +      uint32_t bti = insn.getImageIndex();
> > +      if (bti > BTI_MAX_ID) {
> > +        std::cerr << "Too large bti " << bti;
> > +        return false;
> > +      }
> > +
> > +      sel.VME(bti, dst.data(), payloadVal.data(), selDstNum, srcNum,
> > + msg_type,
> > vme_search_path_lut, lut_sub);
> > +
> > +      return true;
> > +    }
> > +    DECL_CTOR(VmeInstruction, 1, 1);
> > +  };
> > +
> >    /*! Typed write instruction pattern. */
> >    DECL_PATTERN(TypedWriteInstruction)
> >    {
> > @@ -5591,6 +5663,7 @@ namespace gbe
> >      this->insert<MulAddInstructionPattern>();
> >      this->insert<SelectModifierInstructionPattern>();
> >      this->insert<SampleInstructionPattern>();
> > +    this->insert<VmeInstructionPattern>();
> >      this->insert<GetImageInfoInstructionPattern>();
> >      this->insert<ReadARFInstructionPattern>();
> >      this->insert<RegionInstructionPattern>();
> > diff --git a/backend/src/backend/gen_insn_selection.hpp
> > b/backend/src/backend/gen_insn_selection.hpp
> > index f51c905..578db41 100644
> > --- a/backend/src/backend/gen_insn_selection.hpp
> > +++ b/backend/src/backend/gen_insn_selection.hpp
> > @@ -90,8 +90,8 @@ namespace gbe
> >      const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
> >      /*! Damn C++ */
> >      const GenRegister &src(uint32_t srcID) const { return
> regs[dstNum+srcID]; }
> > -    /*! No more than 9 sources (used by typed writes on simd8 mode.) */
> > -    enum { MAX_SRC_NUM = 9 };
> > +    /*! No more than 40 sources (40 sources are used by vme for
> > + payload
> > passing and setting) */
> > +    enum { MAX_SRC_NUM = 40 };
> >      /*! No more than 16 destinations (15 used by I64DIV/I64REM) */
> >      enum { MAX_DST_NUM = 16 };
> >      /*! State of the instruction (extra fields neeed for the
> > encoding) */ @@ -129,6 +129,12 @@ namespace gbe
> >          bool     isLD;  // is this a ld message?
> >          bool     isUniform;
> >        };
> > +      struct {
> > +        uint16_t vme_bti:8;
> > +        uint16_t msg_type:2;
> > +        uint16_t vme_search_path_lut:3;
> > +        uint16_t lut_sub:2;
> > +      };
> >        uint32_t barrierType;
> >        bool longjmp;
> >        uint32_t indirect_offset;
> > @@ -138,7 +144,7 @@ namespace gbe
> >      /*! Number of destinations */
> >      uint8_t dstNum:5;
> >      /*! Number of sources */
> > -    uint8_t srcNum:4;
> > +    uint8_t srcNum:6;
> >      /*! To store various indices */
> >      uint32_t index;
> >      /*! For BRC/IF to store the UIP */ @@ -152,6 +158,7 @@ namespace
> > gbe
> >        switch (opcode) {
> >          case SEL_OP_DWORD_GATHER: return extra.function;
> >          case SEL_OP_SAMPLE: return extra.rdbti;
> > +        case SEL_OP_VME: return extra.vme_bti;
> >          case SEL_OP_TYPED_WRITE: return extra.bti;
> >          default:
> >            GBE_ASSERT(0);
> > @@ -164,6 +171,7 @@ namespace gbe
> >        switch (opcode) {
> >          case SEL_OP_DWORD_GATHER: extra.function = bti; return;
> >          case SEL_OP_SAMPLE: extra.rdbti = bti; return;
> > +        case SEL_OP_VME: extra.vme_bti = bti; return;
> >          case SEL_OP_TYPED_WRITE: extra.bti = bti; return;
> >          default:
> >            GBE_ASSERT(0);
> > diff --git a/backend/src/backend/gen_insn_selection.hxx
> > b/backend/src/backend/gen_insn_selection.hxx
> > index 479398b..4d3e921 100644
> > --- a/backend/src/backend/gen_insn_selection.hxx
> > +++ b/backend/src/backend/gen_insn_selection.hxx
> > @@ -65,6 +65,7 @@ DECL_SELECTION_IR(UNPACK_BYTE,
> > UnpackByteInstruction)
> >  DECL_SELECTION_IR(PACK_LONG, PackLongInstruction)
> > DECL_SELECTION_IR(UNPACK_LONG, UnpackLongInstruction)
> > DECL_SELECTION_IR(SAMPLE, SampleInstruction)
> > +DECL_SELECTION_IR(VME, VmeInstruction)
> >  DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
> > DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
> > DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction) diff --git
> > a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> > index f93c528..7bf787e 100644
> > --- a/backend/src/ir/instruction.cpp
> > +++ b/backend/src/ir/instruction.cpp
> > @@ -595,6 +595,58 @@ namespace ir {
> >        static const uint32_t dstNum = 4;
> >      };
> >
> > +    class ALIGNED_INSTRUCTION VmeInstruction :
> > +      public BasePolicy,
> > +      public TupleSrcPolicy<VmeInstruction>,
> > +      public TupleDstPolicy<VmeInstruction>
> > +    {
> > +    public:
> > +      VmeInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple,
> > +                     uint32_t dstNum, uint32_t srcNum, int msg_type,
> > +                     int vme_search_path_lut, int lut_sub) {
> > +        this->opcode = OP_VME;
> > +        this->dst = dstTuple;
> > +        this->src = srcTuple;
> > +        this->dstNum = dstNum;
> > +        this->srcNum = srcNum;
> > +        this->imageIdx = imageIdx;
> > +        this->msg_type = msg_type;
> > +        this->vme_search_path_lut = vme_search_path_lut;
> > +        this->lut_sub = lut_sub;
> > +      }
> > +      INLINE bool wellFormed(const Function &fn, std::string &why) const;
> > +      INLINE void out(std::ostream &out, const Function &fn) const {
> > +        this->outOpcode(out);
> > +        out << " src_surface id " << (int)this->getImageIndex()
> > +            << " ref_surface id " << (int)this->getImageIndex() + 1;
> > +        for(uint32_t i = 0; i < dstNum; i++){
> > +          out<< " %" << this->getDst(fn, i);
> > +        }
> > +        for(uint32_t i = 0; i < srcNum; i++){
> > +          out<< " %" << this->getSrc(fn, i);
> > +        }
> > +        out
> > +            << " msg_type " << (int)this->getMsgType()
> > +            << " vme_search_path_lut " << (int)this->vme_search_path_lut
> > +            << " lut_sub " << (int)this->lut_sub;
> > +      }
> > +      Tuple src;
> > +      Tuple dst;
> > +
> > +      INLINE uint8_t getImageIndex(void) const { return this->imageIdx; }
> > +      INLINE uint8_t getMsgType(void) const { return this->msg_type;
> > + }
> > +
> > +      INLINE Type getSrcType(void) const { return TYPE_U32; }
> > +      INLINE Type getDstType(void) const { return TYPE_U32; }
> > +      uint8_t imageIdx;
> > +      uint8_t msg_type;
> > +      uint8_t vme_search_path_lut;
> > +      uint8_t lut_sub;
> > +      uint32_t srcNum;
> > +      uint32_t dstNum;
> > +    };
> > +
> > +
> >      class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO
> >        public BasePolicy,
> >        public TupleSrcPolicy<TypedWriteInstruction>,
> > @@ -1111,6 +1163,8 @@ namespace ir {
> >      // TODO
> >      INLINE bool SampleInstruction::wellFormed(const Function &fn,
> > std::string
> > &why) const
> >      { return true; }
> > +    INLINE bool VmeInstruction::wellFormed(const Function &fn,
> > + std::string
> > &why) const
> > +    { return true; }
> >      INLINE bool TypedWriteInstruction::wellFormed(const Function &fn,
> > std::string &why) const
> >      { return true; }
> >      INLINE bool GetImageInfoInstruction::wellFormed(const Function
> > &fn, std::string &why) const @@ -1502,6 +1556,10 @@
> > START_INTROSPECTION(LabelInstruction)
> >  #include "ir/instruction.hxx"
> >  END_INTROSPECTION(LabelInstruction)
> >
> > +START_INTROSPECTION(VmeInstruction)
> > +#include "ir/instruction.hxx"
> > +END_INTROSPECTION(VmeInstruction)
> > +
> >  #undef END_INTROSPECTION
> >  #undef START_INTROSPECTION
> >  #undef DECL_INSN
> > @@ -1694,6 +1752,10 @@ DECL_MEM_FN(SampleInstruction, Type,
> > getDstType(void), getDstType())  DECL_MEM_FN(SampleInstruction,
> > uint8_t, getSamplerIndex(void),
> > getSamplerIndex())
> >  DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerOffset(void),
> > getSamplerOffset())
> >  DECL_MEM_FN(SampleInstruction, uint8_t, getImageIndex(void),
> > getImageIndex())
> > +DECL_MEM_FN(VmeInstruction, Type, getSrcType(void), getSrcType())
> > +DECL_MEM_FN(VmeInstruction, Type, getDstType(void), getDstType())
> > +DECL_MEM_FN(VmeInstruction, uint8_t, getImageIndex(void),
> > getImageIndex())
> > +DECL_MEM_FN(VmeInstruction, uint8_t, getMsgType(void),
> getMsgType())
> >  DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void),
> > getSrcType())  DECL_MEM_FN(TypedWriteInstruction, Type,
> > getCoordType(void),
> > getCoordType())
> >  DECL_MEM_FN(TypedWriteInstruction, uint8_t, getImageIndex(void),
> > getImageIndex())
> > @@ -1932,6 +1994,10 @@ DECL_MEM_FN(GetImageInfoInstruction,
> uint8_t,
> > getImageIndex(void), getImageIndex
> >      return internal::SampleInstruction(imageIndex, dst, src, srcNum,
> > dstIsFloat, srcIsFloat, sampler, samplerOffset).convert();
> >    }
> >
> > +  Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t
> > + dstNum,
> > uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub) {
> > +    return internal::VmeInstruction(imageIndex, dst, src, dstNum,
> > + srcNum,
> > msg_type, vme_search_path_lut, lut_sub).convert();
> > +  }
> > +
> >    Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t
> > srcNum, Type srcType, Type coordType) {
> >      return internal::TypedWriteInstruction(imageIndex, src, srcNum,
> > srcType, coordType).convert();
> >    }
> > diff --git a/backend/src/ir/instruction.hpp
> > b/backend/src/ir/instruction.hpp index 3f3c655..c8da416 100644
> > --- a/backend/src/ir/instruction.hpp
> > +++ b/backend/src/ir/instruction.hpp
> > @@ -191,8 +191,8 @@ namespace ir {
> >      template <typename T> INLINE bool isMemberOf(void) const {
> >        return T::isClassOf(*this);
> >      }
> > -    /*! max_src for store instruction (vec16 + addr) */
> > -    static const uint32_t MAX_SRC_NUM = 32;
> > +    /*! max_src used by vme for payload passing and setting */
> > +    static const uint32_t MAX_SRC_NUM = 40;
> >      static const uint32_t MAX_DST_NUM = 32;
> >    protected:
> >      BasicBlock *parent;      //!< The basic block containing the instruction
> > @@ -399,6 +399,17 @@ namespace ir {
> >      static bool isClassOf(const Instruction &insn);
> >    };
> >
> > +  /*! Video motion estimation */
> > +  class VmeInstruction : public Instruction {
> > +  public:
> > +    uint8_t getImageIndex() const;
> > +    uint8_t getMsgType() const;
> > +    Type getSrcType(void) const;
> > +    Type getDstType(void) const;
> > +    /*! Return true if the given instruction is an instance of this class */
> > +    static bool isClassOf(const Instruction &insn);  };
> > +
> >    typedef union _ImageInfoKey{
> >      _ImageInfoKey(uint8_t i, uint8_t t) : index(i), type(t) {};
> >      _ImageInfoKey(int key) : data(key) {}; @@ -756,6 +767,8 @@
> > namespace ir {
> >    Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t
> > srcNum, Type srcType, Type coordType);
> >    /*! sample textures */
> >    Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src,
> > uint8_t srcNum, bool dstIsFloat, bool srcIsFloat, uint8_t sampler,
> > uint8_t samplerOffset);
> > +  /*! video motion estimation */
> > +  Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t
> > + dstNum,
> > uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub);
> >    /*! get image information , such as width/height/depth/... */
> >    Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t
> > imageIndex, Register infoReg);
> >    /*! label labelIndex */
> > diff --git a/backend/src/ir/instruction.hxx
> > b/backend/src/ir/instruction.hxx index 81548c9..27d59a9 100644
> > --- a/backend/src/ir/instruction.hxx
> > +++ b/backend/src/ir/instruction.hxx
> > @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction)  DECL_INSN(LABEL,
> > LabelInstruction)  DECL_INSN(READ_ARF, ReadARFInstruction)
> > DECL_INSN(REGION, RegionInstruction)
> > +DECL_INSN(VME, VmeInstruction)
> >  DECL_INSN(INDIRECT_MOV, IndirectMovInstruction)
> > DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
> DECL_INSN(MUL_HI,
> > BinaryInstruction) diff --git a/backend/src/libocl/include/ocl_misc.h
> > b/backend/src/libocl/include/ocl_misc.h
> > index 359025b..7d4abab 100644
> > --- a/backend/src/libocl/include/ocl_misc.h
> > +++ b/backend/src/libocl/include/ocl_misc.h
> > @@ -136,5 +136,20 @@ struct time_stamp {
> >    uint event;
> >  };
> >
> > +uint __gen_ocl_region(ushort offset, uint data);
> > +
> >  struct time_stamp __gen_ocl_get_timestamp(void);
> > +
> > +uint8 __gen_ocl_vme(image2d_t, image2d_t,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   uint, uint, uint, uint,
> > +                   int, int, int);
> >  #endif
> > diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> > b/backend/src/llvm/llvm_gen_backend.cpp
> > index 7299d53..19927ba 100644
> > --- a/backend/src/llvm/llvm_gen_backend.cpp
> > +++ b/backend/src/llvm/llvm_gen_backend.cpp
> > @@ -3541,6 +3541,7 @@ namespace gbe
> >        case GEN_OCL_REGION:
> >        case GEN_OCL_SIMD_ID:
> >        case GEN_OCL_SIMD_SHUFFLE:
> > +      case GEN_OCL_VME:
> >          this->newRegister(&I);
> >          break;
> >        case GEN_OCL_PRINTF:
> > @@ -3839,6 +3840,52 @@ namespace gbe
> >              ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM);
> >              break;
> >            }
> > +          case GEN_OCL_VME:
> > +          {
> > +
> > +            const uint8_t imageID = getImageID(I);
> > +
> > +            AI++;
> > +            AI++;
> > +
> > +            uint32_t src_length = 40;
> > +
> > +            vector<ir::Register> dstTupleData, srcTupleData;
> > +            for (uint32_t i = 0; i < src_length; i++, AI++){
> > +              srcTupleData.push_back(this->getRegister(*AI));
> > +            }
> > +
> > +            const ir::Tuple srcTuple =
> > + ctx.arrayTuple(&srcTupleData[0], src_length);
> > +
> > +            Constant *msg_type_cpv = dyn_cast<Constant>(*AI);
> > +            assert(msg_type_cpv);
> > +            const ir::Immediate &msg_type_x =
> > processConstantImm(msg_type_cpv);
> > +            int msg_type = msg_type_x.getIntegerValue();
> > +            uint32_t dst_length;
> > +            //msy_type =1 indicate inter search only of gen vme shared
> function
> > +            GBE_ASSERT(msg_type == 1);
> > +            if(msg_type == 1)
> > +              dst_length = 6;
> > +            for (uint32_t elemID = 0; elemID < dst_length; ++elemID) {
> > +              const ir::Register reg = this->getRegister(&I, elemID);
> > +              dstTupleData.push_back(reg);
> > +            }
> > +            const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0],
> dst_length);
> > +            ++AI;
> > +            Constant *vme_search_path_lut_cpv = dyn_cast<Constant>(*AI);
> > +            assert(vme_search_path_lut_cpv);
> > +            const ir::Immediate &vme_search_path_lut_x =
> > processConstantImm(vme_search_path_lut_cpv);
> > +            ++AI;
> > +            Constant *lut_sub_cpv = dyn_cast<Constant>(*AI);
> > +            assert(lut_sub_cpv);
> > +            const ir::Immediate &lut_sub_x =
> > + processConstantImm(lut_sub_cpv);
> > +
> > +            ctx.VME(imageID, dstTuple, srcTuple, dst_length, src_length,
> > +                    msg_type, vme_search_path_lut_x.getIntegerValue(),
> > +                    lut_sub_x.getIntegerValue());
> > +
> > +            break;
> > +          }
> >            case GEN_OCL_REGION:
> >            {
> >              const ir::Register dst = this->getRegister(&I); diff
> > --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> > b/backend/src/llvm/llvm_gen_ocl_function.hxx
> > index cabb225..3fbf847 100644
> > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> > @@ -168,5 +168,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE,
> > intel_sub_group_shuffle)
> >  DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
> > DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
> >
> > +DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme)
> > +
> >  // printf function
> >  DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) diff --git
> > a/backend/src/llvm/llvm_scalarize.cpp
> > b/backend/src/llvm/llvm_scalarize.cpp
> > index 7ee5259..dc1d8ab 100644
> > --- a/backend/src/llvm/llvm_scalarize.cpp
> > +++ b/backend/src/llvm/llvm_scalarize.cpp
> > @@ -671,6 +671,10 @@ namespace gbe {
> >              *CI = InsertToVector(call, *CI);
> >              break;
> >            }
> > +          case GEN_OCL_VME:
> > +            setAppendPoint(call);
> > +            extractFromVector(call);
> > +            break;
> >          }
> >        }
> >      }
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list