[Beignet] [PATCH] enable sends to write SLM for workgroup op

Yang, Rong R rong.r.yang at intel.com
Wed Dec 28 08:06:10 UTC 2016


Pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Pan, Xiuli
> Sent: Wednesday, December 28, 2016 15:55
> To: Guo, Yejun <yejun.guo at intel.com>; beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>
> Subject: Re: [Beignet] [PATCH] enable sends to write SLM for workgroup op
> 
> LGTM.
> 
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Friday, December 23, 2016 5:43 PM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>
> Subject: [Beignet] [PATCH] enable sends to write SLM for workgroup op
> 
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
>  backend/src/backend/gen8_context.cpp       | 12 +++----
>  backend/src/backend/gen_context.cpp        |  8 ++---
>  backend/src/backend/gen_insn_selection.cpp | 50
> +++++++++++++++++++++---------
> backend/src/backend/gen_insn_selection.hpp |  5 ++-
>  4 files changed, 49 insertions(+), 26 deletions(-)
> 
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index a3045ce..eede52c 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -1738,7 +1738,7 @@ namespace gbe
>      GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
>      GenRegister localBarrier = ra->genReg(insn.src(5));
> 
> -    uint32_t wg_op = insn.extra.workgroupOp;
> +    uint32_t wg_op = insn.extra.wgop.workgroupOp;
>      uint32_t simd = p->curr.execWidth;
>      int32_t jip0, jip1;
> 
> @@ -1757,8 +1757,8 @@ namespace gbe
>      /* use of continuous GRF allocation from insn selection */
>      GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)),
> dst.type);
>      GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)),
> GEN_TYPE_UD);
> -    GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0),
> GEN_TYPE_UD);
> -    GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1),
> dst.type);
> +    GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD);
> +    GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)),
> + dst.type);
> 
>      /* do some calculation within each thread */
>      wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p); @@ -
> 1799,7 +1799,7 @@ namespace gbe
>        p->curr.execWidth = 8;
>        p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
>        p->ADD(msgAddr, msgAddr, msgSlmOff);
> -      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
> +      p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 2,
> + insn.extra.wgop.splitSend);
>      }
>      else
>      {
> @@ -1807,7 +1807,7 @@ namespace gbe
>        p->MOV(msgData, threadData);
>        p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
>        p->ADD(msgAddr, msgAddr, msgSlmOff);
> -      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false);
> +      p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 1,
> + insn.extra.wgop.splitSend);
>      }
> 
>      /* init partialData register, it will hold the final result */ @@ -1945,7
> +1945,7 @@ namespace gbe
>      const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)),
> dst.type);
>      GenRegister threadData = ra->genReg(insn.src(1));
> 
> -    uint32_t wg_op = insn.extra.workgroupOp;
> +    uint32_t wg_op = insn.extra.wgop.workgroupOp;
>      uint32_t simd = p->curr.execWidth;
> 
>      /* masked elements should be properly set to init value */ diff --git
> a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index c8019e3..5d8861b 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -3252,7 +3252,7 @@ namespace gbe
>      GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
>      GenRegister localBarrier = ra->genReg(insn.src(5));
> 
> -    uint32_t wg_op = insn.extra.workgroupOp;
> +    uint32_t wg_op = insn.extra.wgop.workgroupOp;
>      uint32_t simd = p->curr.execWidth;
>      int32_t jip0, jip1;
> 
> @@ -3271,8 +3271,8 @@ namespace gbe
>      /* use of continuous GRF allocation from insn selection */
>      GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)),
> dst.type);
>      GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)),
> GEN_TYPE_UD);
> -    GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0),
> GEN_TYPE_UD);
> -    GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1),
> dst.type);
> +    GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD);
> +    GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)),
> + dst.type);
> 
>      /* do some calculation within each thread */
>      wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p); @@ -
> 3459,7 +3459,7 @@ namespace gbe
>      const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)),
> dst.type);
>      GenRegister threadData = ra->genReg(insn.src(1));
> 
> -    uint32_t wg_op = insn.extra.workgroupOp;
> +    uint32_t wg_op = insn.extra.wgop.workgroupOp;
>      uint32_t simd = p->curr.execWidth;
> 
>      /* masked elements should be properly set to init value */ diff --git
> a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 128c2bc..bcdba12 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -753,7 +753,7 @@ namespace gbe
>                        GenRegister tmpData1,
>                        GenRegister localThreadID, GenRegister localThreadNUM,
>                        GenRegister tmpData2, GenRegister slmOff,
> -                      vector<GenRegister> msg, uint32_t msgSizeReq,
> +                      vector<GenRegister> msg,
>                        GenRegister localBarrier);
>      /*! Sub Group Operations */
>      void SUBGROUP_OP(uint32_t wg_op, Reg dst, GenRegister src, @@ -
> 2255,19 +2255,11 @@ namespace gbe
>                                         GenRegister tmpData2,
>                                         GenRegister slmOff,
>                                         vector<GenRegister> msg,
> -                                       uint32_t msgSizeReq,
>                                         GenRegister localBarrier)
>    {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_WORKGROUP_OP,
> 2 + msg.size(), 6);
> -    SelectionVector *vector = this->appendVector();
> 
> -    /* allocate continuous GRF registers for READ/WRITE to SLM */
> -    GBE_ASSERT(msg.size() >= msgSizeReq);
> -    vector->regNum = msg.size();
> -    vector->offsetID = 0;
> -    vector->reg = &insn->dst(2);
> -    vector->isSrc = 0;
> -    insn->extra.workgroupOp = wg_op;
> +    insn->extra.wgop.workgroupOp = wg_op;
> 
>      insn->dst(0) = dst;
>      insn->dst(1) = tmpData1;
> @@ -2280,6 +2272,29 @@ namespace gbe
>      insn->src(3) = tmpData2;
>      insn->src(4) = slmOff;
>      insn->src(5) = localBarrier;
> +
> +    if (hasSends()) {
> +      insn->extra.wgop.splitSend = 1;
> +      SelectionVector *vector = this->appendVector();
> +
> +      vector->regNum = 1;
> +      vector->offsetID = 2;
> +      vector->reg = &insn->dst(2);
> +      vector->isSrc = 0;
> +
> +      vector = this->appendVector();
> +      vector->regNum = msg.size() - 1;
> +      vector->offsetID = 3;
> +      vector->reg = &insn->dst(3);
> +      vector->isSrc = 0;
> +    } else {
> +      /* allocate continuous GRF registers for READ/WRITE to SLM */
> +      SelectionVector *vector = this->appendVector();
> +      vector->regNum = msg.size();
> +      vector->offsetID = 2;
> +      vector->reg = &insn->dst(2);
> +      vector->isSrc = 0;
> +    }
>    }
> 
>    void Selection::Opaque::SUBGROUP_OP(uint32_t wg_op, @@ -2290,7
> +2305,7 @@ namespace gbe
>    {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_SUBGROUP_OP, 2,
> 2);
> 
> -    insn->extra.workgroupOp = wg_op;
> +    insn->extra.wgop.workgroupOp = wg_op;
> 
>      insn->dst(0) = dst;
>      insn->dst(1) = tmpData1;
> @@ -7451,10 +7466,15 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>        GenRegister localBarrier = GenRegister::ud8grf(sel.reg(FAMILY_DWORD));
> 
>        /* Allocate registers for message sending
> -       * (read/write to shared local memory) */
> +       * (read/write to shared local memory),
> +       * only one data (ud/ul) is needed for thread communication,
> +       * we will always use SIMD8 to do the read/write
> +       */
>        vector<GenRegister> msg;
> -      for(uint32_t i = 0; i < 6; i++)
> -        msg.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
> +      msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));
> //address
> +      msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));  //data
> +      if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L)
> +        msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));
> + //data
> 
>        /* Insert a barrier to make sure all the var we are interested in
>           have been assigned the final value. */ @@ -7466,7 +7486,7 @@ extern
> bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
> 
>        /* Perform workgroup op */
>        sel.WORKGROUP_OP(workGroupOp, dst, src, tmpData1,
> -                       localThreadID, localThreadNUM, tmpData2, slmOff, msg, 6,
> +                       localThreadID, localThreadNUM, tmpData2, slmOff,
> + msg,
>                         localBarrier);
> 
>        return true;
> diff --git a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 01999a2..8846372 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -159,7 +159,10 @@ namespace gbe
>          uint32_t continueFlag:8;
>          uint16_t printfSize;
>        };
> -      uint32_t workgroupOp;
> +      struct {
> +        uint16_t workgroupOp;
> +        uint16_t splitSend:1;
> +      }wgop;
>      } extra;
>      /*! Gen opcode */
>      uint8_t opcode;
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list