[Beignet] [PATCH 1/2] GBE: relax the batch byte/short load vector size restrication.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Sep 1 21:35:46 PDT 2014


Ping for review including this 2 patches and the previous 2 patches.
Thanks.

On Thu, Aug 28, 2014 at 10:46:03AM +0800, Zhigang Gong wrote:
> Previous restrication is that the vector size must be multiple
> of DWORD. This restrication prevent the vload2/3 of char or
> vload3 of ushort to be optimized. This patch relax this restrication
> on the vload path.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/backend/gen_context.cpp              |  6 ++--
>  backend/src/backend/gen_insn_selection.cpp       | 39 +++++++++++-------------
>  backend/src/llvm/llvm_gen_backend.cpp            |  3 +-
>  backend/src/llvm/llvm_loadstore_optimization.cpp |  3 +-
>  4 files changed, 24 insertions(+), 27 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index ba4a8f8..883fa39 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -1693,7 +1693,7 @@ namespace gbe
>    void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
>      const GenRegister src = ra->genReg(insn.src(0));
>      for(uint32_t i = 0; i < insn.dstNum; i++) {
> -      p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, insn.dstNum, i));
> +      p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, insn.extra.elem, i));
>      }
>    }
>  
> @@ -1702,12 +1702,12 @@ namespace gbe
>      p->push();
>      if(simdWidth == 8) {
>        for(uint32_t i = 0; i < insn.srcNum; i++)
> -        p->MOV(GenRegister::splitReg(dst, insn.srcNum, i), ra->genReg(insn.src(i)));
> +        p->MOV(GenRegister::splitReg(dst, insn.extra.elem, i), ra->genReg(insn.src(i)));
>      } else {
>        // when destination expands two registers, the source must span two registers.
>        p->curr.execWidth = 8;
>        for(uint32_t i = 0; i < insn.srcNum; i++) {
> -        GenRegister dsti = GenRegister::splitReg(dst, insn.srcNum, i);
> +        GenRegister dsti = GenRegister::splitReg(dst, insn.extra.elem, i);
>          GenRegister src = ra->genReg(insn.src(i));
>  
>          p->curr.quarterControl = 0;
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 8478616..1258e54 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -575,10 +575,10 @@ namespace gbe
>      void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti);
>      /*! DWord scatter (for constant cache read) */
>      void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
> -    /*! Unpack the uint to char4 */
> -    void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum);
> -    /*! pack the char4 to uint */
> -    void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemNum);
> +    /*! Unpack the uint to charN */
> +    void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemSize, uint32_t elemNum);
> +    /*! pack the charN to uint */
> +    void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemSize, uint32_t elemNum);
>      /*! Extended math function (2 arguments) */
>      void MATH(Reg dst, uint32_t function, Reg src0, Reg src1);
>      /*! Extended math function (1 argument) */
> @@ -1255,16 +1255,18 @@ namespace gbe
>      srcVector->reg = &insn->src(0);
>    }
>  
> -  void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) {
> +  void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemSize, uint32_t elemNum) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_UNPACK_BYTE, elemNum, 1);
>      insn->src(0) = src;
> +    insn->extra.elem = 4 / elemSize;
>      for(uint32_t i = 0; i < elemNum; i++)
>        insn->dst(i) = dst[i];
>    }
> -  void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemNum) {
> +  void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemSize, uint32_t elemNum) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_PACK_BYTE, 1, elemNum);
>      for(uint32_t i = 0; i < elemNum; i++)
>        insn->src(i) = src[i];
> +    insn->extra.elem = 4 / elemSize;
>      insn->dst(0) = dst;
>    }
>  
> @@ -2862,9 +2864,7 @@ namespace gbe
>        for(uint32_t i = 0; i < valueNum; i++)
>          dst[i] = sel.selReg(insn.getValue(i), getType(family));
>  
> -      uint32_t tmpRegNum = typeSize*valueNum / 4;
> -      if (tmpRegNum == 0)
> -        tmpRegNum = 1;
> +      uint32_t tmpRegNum = (typeSize*valueNum + 3) / 4;
>        vector<GenRegister> tmp(tmpRegNum);
>        vector<GenRegister> tmp2(tmpRegNum);
>        vector<Register> tmpReg(tmpRegNum);
> @@ -2875,15 +2875,10 @@ namespace gbe
>  
>        readDWord(sel, tmp, tmp2, address, tmpRegNum, insn.getAddressSpace(), bti);
>  
> -      if (valueNum > 1) {
> -        for(uint32_t i = 0; i < tmpRegNum; i++)
> -          sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize);
> -      }
> -      else {
> -        if (elemSize == GEN_BYTE_SCATTER_WORD)
> -          sel.MOV(GenRegister::retype(dst[0], GEN_TYPE_UW), sel.unpacked_uw(tmpReg[0]));
> -        else if (elemSize == GEN_BYTE_SCATTER_BYTE)
> -          sel.MOV(GenRegister::retype(dst[0], GEN_TYPE_UB), sel.unpacked_ub(tmpReg[0]));
> +      for(uint32_t i = 0; i < tmpRegNum; i++) {
> +        unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ?
> +                               4/typeSize : (valueNum - i * (4 / typeSize));
> +        sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], typeSize, elemNum);
>        }
>      }
>  
> @@ -2948,7 +2943,7 @@ namespace gbe
>          for(uint32_t i = 0; i < valueNum; i++)
>            dst[i] = sel.selReg(insn.getValue(i), getType(family));
>  
> -        uint32_t effectDataNum = typeSize*valueNum / 4;
> +        uint32_t effectDataNum = (typeSize*valueNum + 3) / 4;
>          vector<GenRegister> tmp(effectDataNum + 1);
>          vector<GenRegister> tmp2(effectDataNum + 1);
>          vector<GenRegister> effectData(effectDataNum);
> @@ -2986,7 +2981,9 @@ namespace gbe
>          getEffectByteData(sel, effectData, tmp, effectDataNum, address, simdWidth);
>  
>          for(uint32_t i = 0; i < effectDataNum; i++) {
> -          sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, effectData[i], 4/typeSize);
> +          unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ?
> +                                 4/typeSize : (valueNum - i * (4 / typeSize));
> +          sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, effectData[i], typeSize, elemNum);
>          }
>        } else {
>          GBE_ASSERT(insn.getValueNum() == 1);
> @@ -3148,7 +3145,7 @@ namespace gbe
>          vector<GenRegister> tmp(tmpRegNum);
>          for(uint32_t i = 0; i < tmpRegNum; i++) {
>            tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> -          sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, 4/typeSize);
> +          sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, typeSize, 4/typeSize);
>          }
>  
>          sel.UNTYPED_WRITE(addr, tmp.data(), tmpRegNum, bti);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index b956bc6..8f0d5c2 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3515,7 +3515,8 @@ handle_write_image:
>            emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad, binding, dwAligned);
>          }
>        }
> -      else if((dataFamily==ir::FAMILY_WORD && elemNum%2==0) || (dataFamily == ir::FAMILY_BYTE && elemNum%4 == 0)) {
> +      else if((dataFamily == ir::FAMILY_WORD && (isLoad || elemNum % 2 == 0)) ||
> +              (dataFamily == ir::FAMILY_BYTE && (isLoad || elemNum % 4 == 0))) {
>            emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad, binding, dwAligned);
>        } else {
>          for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
> diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
> index 19726b0..ae91af7 100644
> --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> @@ -259,8 +259,7 @@ namespace gbe {
>          while(size > 1) {
>            unsigned vecSize = (size >= 16) ? 16 :
>                               (size >= 8 ? 8 :
> -                             (size >= 4 ? 4 :
> -                             (size >= 2 ? 2 : size)));
> +                             (size >= 4 ? 4 : size));
>            SmallVector<Instruction*, 16> mergedVec(merged.begin() + pos, merged.begin() + pos + vecSize);
>            if(isLoad)
>              mergeLoad(BB, mergedVec);
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list