[Beignet] [PATCH v3 1/4] Add built-in function __gen_ocl_vme.
Song, Ruiling
ruiling.song at intel.com
Sun Sep 13 19:07:54 PDT 2015
Some inline comments
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 25fdf08..d9945a7 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2146,6 +2146,79 @@ namespace gbe
> p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0,
> insn.extra.isLD, insn.extra.isUniform);
> }
>
> + void GenContext::emitVmeInstruction(const SelectionInstruction &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> +
> + /* Use MOV to Setup bits of payload */
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + for(int i=0; i < 2; i++){
> + for(int k = 0; k < 2; k++){
> + GenRegister payload_grf = ra->genReg(insn.dst(8+i));
> + payload_grf.nr += k;
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
You don't need to set subphysical and subnr here anymore.
> + payload_val.subphysical = 1;
> + payload_val.subnr = 0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + }
> + {
> + int i = 2;
> + GenRegister payload_grf = ra->genReg(insn.dst(8+i));
> + payload_grf.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_grf.width = GEN_WIDTH_1;
> + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_grf.subphysical = 1;
> + for(int j=0; j < 8; j++){
> + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD);
> + GenRegister payload_val = ra->genReg(insn.src(i*16+j));
> + payload_val.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_val.width = GEN_WIDTH_1;
> + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0;
Same as above.
> + payload_val.subphysical = 1;
> + payload_val.subnr = 0;
> +
> + p->MOV(payload_grf, payload_val);
> + }
> + }
> + p->pop();
> +
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->curr.execWidth = 1;
> + GenRegister payload_did = GenRegister::retype(ra->genReg(insn.dst(8)),
> GEN_TYPE_UB);
> + payload_did.vstride = GEN_VERTICAL_STRIDE_0;
> + payload_did.width = GEN_WIDTH_1;
> + payload_did.hstride = GEN_HORIZONTAL_STRIDE_0;
> + payload_did.subphysical = 1;
> + payload_did.subnr = 20 * typeSize(GEN_TYPE_UB);
> + GenRegister grf0 = GenRegister::ub1grf(0, 20);
No need to set subnr here. Ub1grf() have already done that.
> + grf0.subnr = 20 * typeSize(GEN_TYPE_UB);
> + p->MOV(payload_did, grf0);
> + p->pop();
> +
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -90,8 +90,8 @@ namespace gbe
> const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; }
> /*! Damn C++ */
> const GenRegister &src(uint32_t srcID) const { return regs[dstNum+srcID]; }
> - /*! No more than 9 sources (used by typed writes on simd8 mode.) */
> - enum { MAX_SRC_NUM = 9 };
> + /*! No more than 45 sources (used by vme for payload passing and setting)
> */
> + enum { MAX_SRC_NUM = 45 };
Seems that you only need set MAX_SRC_NUM to 40, right?
> index cf8d839..8cbafec 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -191,8 +191,8 @@ namespace ir {
> template <typename T> INLINE bool isMemberOf(void) const {
> return T::isClassOf(*this);
> }
> - /*! max_src for store instruction (vec16 + addr) */
> - static const uint32_t MAX_SRC_NUM = 32;
> + /*! max_src used by vme for payload passing and setting */
> + static const uint32_t MAX_SRC_NUM = 45;
Seems 40 is enough, right?
More information about the Beignet
mailing list