[Beignet] [PATCH 1/2] GBE: fix insntruction scheduling related bugs in read64/write64.

Zhigang Gong zhigang.gong at gmail.com
Fri Aug 9 00:55:06 PDT 2013


Sure. Because those temporary registers are to be modified in that
instruction which means a write to those registers. But if we put
them into the source register array. Then the post scheduler doesn't
think there is a write to those registers, thus may schedule a latter
instruction who write to those temporary register before this instruction
which is definitely wrong. Is that clear for you?

On Fri, Aug 09, 2013 at 07:43:12AM +0000, Song, Ruiling wrote:
> Patch 2 LGTM.
> For patch 1, could you explain a little bit more? Why the post instruction scheduler behave incorrect when temporary registers in src array but correct in dst array?
> 
> -----Original Message-----
> From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Zhigang Gong
> Sent: Thursday, August 08, 2013 3:16 PM
> To: beignet at lists.freedesktop.org
> Cc: Zhigang Gong
> Subject: [Beignet] [PATCH 1/2] GBE: fix insntruction scheduling related bugs in read64/write64.
> 
> In read64 and write64, we allocate some temporary registers and we should put all of those temporary registers may be modified to the instruction's dst array. Otherwise, the latter post instruction scheduling may rearrange the instruction incorrectly.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
> ---
>  backend/src/backend/gen_context.cpp        |   13 ++++---
>  backend/src/backend/gen_encoder.cpp        |    6 ---
>  backend/src/backend/gen_insn_selection.cpp |   57 ++++++++++++++++------------
>  3 files changed, 39 insertions(+), 37 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 621e7be..6de6cf1 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -601,10 +601,10 @@ namespace gbe
>    void GenContext::emitRead64Instruction(const SelectionInstruction &insn) {
>      const uint32_t elemNum = insn.extra.elem;
>      const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
> -    const GenRegister dst = ra->genReg(insn.dst(tmpRegSize));
> -    const GenRegister tmp = ra->genReg(insn.dst(0));
> +    const GenRegister tempAddr = ra->genReg(insn.dst(0));
> +    const GenRegister dst = ra->genReg(insn.dst(tmpRegSize + 1));
> +    const GenRegister tmp = ra->genReg(insn.dst(1));
>      const GenRegister src = ra->genReg(insn.src(0));
> -    const GenRegister tempAddr = ra->genReg(insn.src(1));
>      const uint32_t bti = insn.extra.function;
>      p->READ64(dst, tmp, tempAddr, src, bti, elemNum);
>    }
> @@ -621,11 +621,12 @@ namespace gbe
>    //  then follow the real destination registers.
>    //  For SIMD16, we allocate elemNum temporary registers from dst(0).
>    void GenContext::emitWrite64Instruction(const SelectionInstruction &insn) {
> -    const GenRegister src = ra->genReg(insn.src(0));
> +    const GenRegister src = ra->genReg(insn.dst(0));
>      const uint32_t elemNum = insn.extra.elem;
> -    const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
> -    const GenRegister data = ra->genReg(insn.src(tmpRegSize + 1));
> +    const GenRegister addr = ra->genReg(insn.src(0)); //tmpRegSize + 1));
> +    const GenRegister data = ra->genReg(insn.src(1));
>      const uint32_t bti = insn.extra.function;
> +    p->MOV(src, addr);
>      p->WRITE64(src, data, bti, elemNum);
>    }
>  
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index 4d6aa34..1a459e1 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -412,12 +412,6 @@ namespace gbe
>        curr.noMask = originMask;
>        this->UNTYPED_WRITE(msg, bti, elemNum);
>      }
> -    /* Let's restore the original message(addr) register. */
> -    /* XXX could be optimized if we don't allocate the address to the header
> -       position of the message. */
> -    curr.predicate = GEN_PREDICATE_NONE;
> -    curr.noMask = GEN_MASK_DISABLE;
> -    ADD(msg, GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::immd(-4));
>      pop();
>    }
>  
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 1a3af68..a361ab3 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -479,7 +479,7 @@ namespace gbe
>      /*! Read 64 bits float/int array */
>      void READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
>      /*! Write 64 bits float/int array */
> -    void WRITE64(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
> +    void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, 
> + const GenRegister *dst, uint32_t dstNum, uint32_t bti);
>      /*! Untyped read (up to 4 elements) */
>      void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
>      /*! Untyped write (up to 4 elements) */ @@ -825,28 +825,29 @@ namespace gbe
>    /* elemNum contains all the temporary register and the
>       real destination registers.*/
>    void Selection::Opaque::READ64(Reg addr,
> -                                       Reg tempAddr,
> -                                       const GenRegister *dst,
> -                                       uint32_t elemNum,
> -                                       uint32_t valueNum,
> -                                       uint32_t bti)
> +                                 Reg tempAddr,
> +                                 const GenRegister *dst,
> +                                 uint32_t elemNum,
> +                                 uint32_t valueNum,
> +                                 uint32_t bti)
>    {
> -    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 2);
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, 
> + elemNum + 1, 1);
>      SelectionVector *srcVector = this->appendVector();
>      SelectionVector *dstVector = this->appendVector();
>  
> +    /* temporary addr register is to be modified, set it to dst registers.*/
> +    insn->dst(0) = tempAddr;
>      // Regular instruction to encode
>      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -      insn->dst(elemID) = dst[elemID];
> +      insn->dst(elemID + 1) = dst[elemID];
>      insn->src(0) = addr;
> -    insn->src(1) = tempAddr;
>      insn->extra.function = bti;
>      insn->extra.elem = valueNum;
>  
>      // Only the temporary registers need contiguous allocation
>      dstVector->regNum = elemNum - valueNum;
>      dstVector->isSrc = 0;
> -    dstVector->reg = &insn->dst(0);
> +    dstVector->reg = &insn->dst(1);
>  
>      // Source cannot be scalar (yet)
>      srcVector->regNum = 1;
> @@ -883,24 +884,27 @@ namespace gbe
>    /* elemNum contains all the temporary register and the
>       real data registers.*/
>    void Selection::Opaque::WRITE64(Reg addr,
> -                                        const GenRegister *src,
> -                                        uint32_t elemNum,
> -                                        uint32_t valueNum,
> -                                        uint32_t bti)
> +                                  const GenRegister *src,
> +                                  uint32_t srcNum,
> +                                  const GenRegister *dst,
> +                                  uint32_t dstNum,
> +                                  uint32_t bti)
>    {
> -    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, elemNum+1);
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 
> + dstNum, srcNum + 1);
>      SelectionVector *vector = this->appendVector();
>  
>      // Regular instruction to encode
>      insn->src(0) = addr;
> -    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
> -      insn->src(elemID+1) = src[elemID];
> +    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
> +      insn->src(elemID + 1) = src[elemID];
> +    for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
> +      insn->dst(elemID) = dst[elemID];
>      insn->extra.function = bti;
> -    insn->extra.elem = valueNum;
> +    insn->extra.elem = srcNum;
>  
>      // Only the addr + temporary registers need to be contiguous.
> -    vector->regNum = (elemNum - valueNum) + 1;
> -    vector->reg = &insn->src(0);
> +    vector->regNum = dstNum;
> +    vector->reg = &insn->dst(0);
>      vector->isSrc = 1;
>    }
>  
> @@ -2085,13 +2089,16 @@ namespace gbe
>        addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);
>        // The first 16 DWORD register space is for temporary usage at encode stage.
>        uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum;
> -      GenRegister src[valueNum + tmpRegNum];
> +      GenRegister src[valueNum];
> +      GenRegister dst[tmpRegNum + 1];
> +      /* dst 0 is for the temporary address register. */
> +      dst[0] = sel.selReg(sel.reg(FAMILY_DWORD));
>        for (srcID = 0; srcID < tmpRegNum; ++srcID)
> -        src[srcID] = sel.selReg(sel.reg(FAMILY_DWORD));
> +        dst[srcID + 1] = sel.selReg(sel.reg(FAMILY_DWORD));
>  
> -      for (uint32_t valueID = 0; valueID < valueNum; ++srcID, ++valueID)
> -        src[srcID] = sel.selReg(insn.getValue(valueID));
> -      sel.WRITE64(addr, src, valueNum + tmpRegNum, valueNum, bti);
> +      for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
> +        src[valueID] = sel.selReg(insn.getValue(valueID));
> +      sel.WRITE64(addr, src, valueNum, dst, tmpRegNum + 1, bti);
>      }
>  
>      void emitByteScatter(Selection::Opaque &sel,
> --
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list