[Beignet] [PATCH 1/2] GBE: relax the batch byte/short load vector size restrication.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Sep 1 21:35:46 PDT 2014
Ping for review including this 2 patches and the previous 2 patches.
Thanks.
On Thu, Aug 28, 2014 at 10:46:03AM +0800, Zhigang Gong wrote:
> Previous restrication is that the vector size must be multiple
> of DWORD. This restrication prevent the vload2/3 of char or
> vload3 of ushort to be optimized. This patch relax this restrication
> on the vload path.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 6 ++--
> backend/src/backend/gen_insn_selection.cpp | 39 +++++++++++-------------
> backend/src/llvm/llvm_gen_backend.cpp | 3 +-
> backend/src/llvm/llvm_loadstore_optimization.cpp | 3 +-
> 4 files changed, 24 insertions(+), 27 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index ba4a8f8..883fa39 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -1693,7 +1693,7 @@ namespace gbe
> void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
> const GenRegister src = ra->genReg(insn.src(0));
> for(uint32_t i = 0; i < insn.dstNum; i++) {
> - p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, insn.dstNum, i));
> + p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, insn.extra.elem, i));
> }
> }
>
> @@ -1702,12 +1702,12 @@ namespace gbe
> p->push();
> if(simdWidth == 8) {
> for(uint32_t i = 0; i < insn.srcNum; i++)
> - p->MOV(GenRegister::splitReg(dst, insn.srcNum, i), ra->genReg(insn.src(i)));
> + p->MOV(GenRegister::splitReg(dst, insn.extra.elem, i), ra->genReg(insn.src(i)));
> } else {
> // when destination expands two registers, the source must span two registers.
> p->curr.execWidth = 8;
> for(uint32_t i = 0; i < insn.srcNum; i++) {
> - GenRegister dsti = GenRegister::splitReg(dst, insn.srcNum, i);
> + GenRegister dsti = GenRegister::splitReg(dst, insn.extra.elem, i);
> GenRegister src = ra->genReg(insn.src(i));
>
> p->curr.quarterControl = 0;
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 8478616..1258e54 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -575,10 +575,10 @@ namespace gbe
> void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti);
> /*! DWord scatter (for constant cache read) */
> void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
> - /*! Unpack the uint to char4 */
> - void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum);
> - /*! pack the char4 to uint */
> - void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemNum);
> + /*! Unpack the uint to charN */
> + void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemSize, uint32_t elemNum);
> + /*! pack the charN to uint */
> + void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemSize, uint32_t elemNum);
> /*! Extended math function (2 arguments) */
> void MATH(Reg dst, uint32_t function, Reg src0, Reg src1);
> /*! Extended math function (1 argument) */
> @@ -1255,16 +1255,18 @@ namespace gbe
> srcVector->reg = &insn->src(0);
> }
>
> - void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) {
> + void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemSize, uint32_t elemNum) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_UNPACK_BYTE, elemNum, 1);
> insn->src(0) = src;
> + insn->extra.elem = 4 / elemSize;
> for(uint32_t i = 0; i < elemNum; i++)
> insn->dst(i) = dst[i];
> }
> - void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemNum) {
> + void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t elemSize, uint32_t elemNum) {
> SelectionInstruction *insn = this->appendInsn(SEL_OP_PACK_BYTE, 1, elemNum);
> for(uint32_t i = 0; i < elemNum; i++)
> insn->src(i) = src[i];
> + insn->extra.elem = 4 / elemSize;
> insn->dst(0) = dst;
> }
>
> @@ -2862,9 +2864,7 @@ namespace gbe
> for(uint32_t i = 0; i < valueNum; i++)
> dst[i] = sel.selReg(insn.getValue(i), getType(family));
>
> - uint32_t tmpRegNum = typeSize*valueNum / 4;
> - if (tmpRegNum == 0)
> - tmpRegNum = 1;
> + uint32_t tmpRegNum = (typeSize*valueNum + 3) / 4;
> vector<GenRegister> tmp(tmpRegNum);
> vector<GenRegister> tmp2(tmpRegNum);
> vector<Register> tmpReg(tmpRegNum);
> @@ -2875,15 +2875,10 @@ namespace gbe
>
> readDWord(sel, tmp, tmp2, address, tmpRegNum, insn.getAddressSpace(), bti);
>
> - if (valueNum > 1) {
> - for(uint32_t i = 0; i < tmpRegNum; i++)
> - sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize);
> - }
> - else {
> - if (elemSize == GEN_BYTE_SCATTER_WORD)
> - sel.MOV(GenRegister::retype(dst[0], GEN_TYPE_UW), sel.unpacked_uw(tmpReg[0]));
> - else if (elemSize == GEN_BYTE_SCATTER_BYTE)
> - sel.MOV(GenRegister::retype(dst[0], GEN_TYPE_UB), sel.unpacked_ub(tmpReg[0]));
> + for(uint32_t i = 0; i < tmpRegNum; i++) {
> + unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ?
> + 4/typeSize : (valueNum - i * (4 / typeSize));
> + sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], typeSize, elemNum);
> }
> }
>
> @@ -2948,7 +2943,7 @@ namespace gbe
> for(uint32_t i = 0; i < valueNum; i++)
> dst[i] = sel.selReg(insn.getValue(i), getType(family));
>
> - uint32_t effectDataNum = typeSize*valueNum / 4;
> + uint32_t effectDataNum = (typeSize*valueNum + 3) / 4;
> vector<GenRegister> tmp(effectDataNum + 1);
> vector<GenRegister> tmp2(effectDataNum + 1);
> vector<GenRegister> effectData(effectDataNum);
> @@ -2986,7 +2981,9 @@ namespace gbe
> getEffectByteData(sel, effectData, tmp, effectDataNum, address, simdWidth);
>
> for(uint32_t i = 0; i < effectDataNum; i++) {
> - sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, effectData[i], 4/typeSize);
> + unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ?
> + 4/typeSize : (valueNum - i * (4 / typeSize));
> + sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, effectData[i], typeSize, elemNum);
> }
> } else {
> GBE_ASSERT(insn.getValueNum() == 1);
> @@ -3148,7 +3145,7 @@ namespace gbe
> vector<GenRegister> tmp(tmpRegNum);
> for(uint32_t i = 0; i < tmpRegNum; i++) {
> tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> - sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, 4/typeSize);
> + sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, typeSize, 4/typeSize);
> }
>
> sel.UNTYPED_WRITE(addr, tmp.data(), tmpRegNum, bti);
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index b956bc6..8f0d5c2 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3515,7 +3515,8 @@ handle_write_image:
> emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad, binding, dwAligned);
> }
> }
> - else if((dataFamily==ir::FAMILY_WORD && elemNum%2==0) || (dataFamily == ir::FAMILY_BYTE && elemNum%4 == 0)) {
> + else if((dataFamily == ir::FAMILY_WORD && (isLoad || elemNum % 2 == 0)) ||
> + (dataFamily == ir::FAMILY_BYTE && (isLoad || elemNum % 4 == 0))) {
> emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad, binding, dwAligned);
> } else {
> for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
> diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp
> index 19726b0..ae91af7 100644
> --- a/backend/src/llvm/llvm_loadstore_optimization.cpp
> +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
> @@ -259,8 +259,7 @@ namespace gbe {
> while(size > 1) {
> unsigned vecSize = (size >= 16) ? 16 :
> (size >= 8 ? 8 :
> - (size >= 4 ? 4 :
> - (size >= 2 ? 2 : size)));
> + (size >= 4 ? 4 : size));
> SmallVector<Instruction*, 16> mergedVec(merged.begin() + pos, merged.begin() + pos + vecSize);
> if(isLoad)
> mergeLoad(BB, mergedVec);
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list