[Beignet] [PATCH v3 1/4] Add built-in function __gen_ocl_vme.

Song, Ruiling ruiling.song at intel.com
Sun Sep 13 19:07:54 PDT 2015


Some inline comments

> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 25fdf08..d9945a7 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2146,6 +2146,79 @@ namespace gbe
>      p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0,
> insn.extra.isLD, insn.extra.isUniform);
>    }
> 
> +  void GenContext::emitVmeInstruction(const SelectionInstruction &insn) {
> +    const GenRegister dst = ra->genReg(insn.dst(0));
> +
> +    /* Use MOV to Setup bits of payload */
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.noMask = 1;
> +    p->curr.execWidth = 1;
> +    for(int i=0; i < 2; i++){
> +      for(int k = 0; k < 2; k++){
> +        GenRegister payload_grf = ra->genReg(insn.dst(8+i));
> +        payload_grf.nr += k;
> +        payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> +        payload_grf.width = GEN_WIDTH_1;
> +        payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> +        payload_grf.subphysical = 1;
> +        for(int j=0; j < 8; j++){
> +          payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> +          GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j));
> +          payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> +          payload_val.width = GEN_WIDTH_1;
> +          payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;

You don't need to set subphysical and subnr here anymore.
> +          payload_val.subphysical = 1;
> +          payload_val.subnr = 0;
> +
> +          p->MOV(payload_grf, payload_val);
> +        }
> +      }
> +    }
> +    {
> +      int i = 2;
> +      GenRegister payload_grf = ra->genReg(insn.dst(8+i));
> +      payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> +      payload_grf.width = GEN_WIDTH_1;
> +      payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> +      payload_grf.subphysical = 1;
> +      for(int j=0; j < 8; j++){
> +        payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> +        GenRegister payload_val = ra->genReg(insn.src(i*16+j));
> +        payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> +        payload_val.width = GEN_WIDTH_1;
> +        payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
Same as above.
> +        payload_val.subphysical = 1;
> +        payload_val.subnr = 0;
> +
> +        p->MOV(payload_grf, payload_val);
> +      }
> +    }
> +    p->pop();
> +
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.noMask = 1;
> +    p->curr.execWidth = 1;
> +    GenRegister payload_did = GenRegister::retype(ra->genReg(insn.dst(8)),
> GEN_TYPE_UB);
> +    payload_did.vstride = GEN_VERTICAL_STRIDE_0;
> +    payload_did.width = GEN_WIDTH_1;
> +    payload_did.hstride = GEN_HORIZONTAL_STRIDE_0;
> +    payload_did.subphysical = 1;
> +    payload_did.subnr = 20 * typeSize(GEN_TYPE_UB);
> +    GenRegister grf0 = GenRegister::ub1grf(0, 20);
No need to set subnr here. Ub1grf() have already done that.

> +    grf0.subnr = 20 * typeSize(GEN_TYPE_UB);
> +    p->MOV(payload_did, grf0);
> +    p->pop();
> +


> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -90,8 +90,8 @@ namespace gbe
>      const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
>      /*! Damn C++ */
>      const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
> -    /*! No more than 9 sources (used by typed writes on simd8 mode.) */
> -    enum { MAX_SRC_NUM = 9 };
> +    /*! No more than 45 sources (used by vme for payload passing and setting)
> */
> +    enum { MAX_SRC_NUM = 45 };
Seems that you only need set MAX_SRC_NUM to 40, right?

> index cf8d839..8cbafec 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -191,8 +191,8 @@ namespace ir {
>      template <typename T> INLINE bool isMemberOf(void) const {
>        return T::isClassOf(*this);
>      }
> -    /*! max_src for store instruction (vec16 + addr) */
> -    static const uint32_t MAX_SRC_NUM = 32;
> +    /*! max_src used by vme for payload passing and setting */
> +    static const uint32_t MAX_SRC_NUM = 45;
Seems 40 is enough, right?




More information about the Beignet mailing list