[Beignet] [PATCH 1/2] GBE: Refine uniform load logic.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Jul 13 22:01:17 PDT 2014


Ruiling,

One major comment is about the usage of replaceUniformSource(). I just
take the following code snippet as example,

> -      if(srcNum > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
> -      if(srcNum > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
> +      if(srcNum > 1) src1 = replaceUniformSource(sel, insn.getSrc(1), TYPE_U32);
> +      if(srcNum > 2) src2 = replaceUniformSource(sel, insn.getSrc(2), TYPE_U32);

Some of the sel.selReg() have been replaced to replaceUniformSource() and some are not.
The internal policy should be that those instructions which don't support uniform source
need to use replaceUniformSource() and the other instructions which support uniform source
don't need to use the replaceUniformSource(). IMO, to handle this difference into one function
should be clearer than use it in each instructions individually.

For example, we can introduce another function similar as below:

GenRegister
getEffectSrc(sel, insn, src_id, TYPE_32)
{
}

Then in this function, we could check the insn type and determine whether we should use
replaceUniformSource or normal sel.selReg(). IMO, it will be a little bit clearer.

On Fri, Jul 11, 2014 at 04:23:07PM +0800, Ruiling Song wrote:
> Currently many dataport messages cannot support uniform
> load/store. So we need to add a move instruction before
> or after a dataport message to move data between uniform
> and varying registers. The idea behind the patch is that
> if a dataport message could not handle uniform register,
> then explicitly handle uniform register before the message.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |  174 ++++++++++++++++++++--------
>  backend/src/backend/gen_reg_allocation.cpp |    2 +
>  2 files changed, 127 insertions(+), 49 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index fb041de..565f203 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -904,7 +904,7 @@ namespace gbe
>  
>    ir::Register Selection::Opaque::replaceSrc(SelectionInstruction *insn, uint32_t regID, ir::Type type, bool needMov) {
>      SelectionBlock *block = insn->parent;
> -    const uint32_t simdWidth = insn->state.execWidth;
> +    const uint32_t simdWidth =this->isScalarReg(insn->src(regID).reg()) ? 1 : insn->state.execWidth;
>      ir::Register tmp;
>      GenRegister gr;
>  
> @@ -939,7 +939,7 @@ namespace gbe
>      ir::Register tmp;
>      GenRegister gr;
>      this->block = block;
> -    tmp = this->reg(ir::getFamily(type));
> +    tmp = this->reg(ir::getFamily(type),simdWidth == 1);
>      gr = this->selReg(tmp, type);
>      if (needMov) {
>      // Generate the MOV instruction and replace the register in the instruction
> @@ -2693,6 +2693,29 @@ namespace gbe
>      }
>    }
>  
> +  GenRegister replaceUniformSource(Selection::Opaque &sel, ir::Register reg, ir::Type type) {
> +    const bool isUniform = sel.isScalarReg(reg);
> +    ir::RegisterFamily family = ir::getFamily(type);
> +    const GenRegister uniform = sel.selReg(reg, type);
> +    if(isUniform) {
> +      const GenRegister varying = sel.selReg(sel.reg(family), type);
> +      sel.push();
> +        sel.curr.noMask = 1;
> +        sel.MOV(varying, uniform);
> +      sel.pop();
> +      return varying;
> +    }
> +    return uniform;
> +  }
> +
> +  void moveVarying2Uniform(Selection::Opaque &sel, GenRegister uniform, GenRegister varying) {
> +    sel.push();
> +      sel.curr.noMask = 1;
> +      sel.curr.execWidth = 1;
> +      sel.MOV(uniform, varying);
> +    sel.pop();
> +  }
> +
>    /*! Load instruction pattern */
>    DECL_PATTERN(LoadInstruction)
>    {
> @@ -2704,9 +2727,31 @@ namespace gbe
>        using namespace ir;
>        const uint32_t valueNum = insn.getValueNum();
>        vector<GenRegister> dst(valueNum);
> -      for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
> -        dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F);
> -      sel.UNTYPED_READ(addr, dst.data(), valueNum, bti);
> +      bool isUniformDst = sel.isScalarReg(insn.getValue(0));
> +      GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), ir::TYPE_FLOAT);
> +
> +      if(isUniformDst) {
> +        for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
> +          dst[dstID] = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_FLOAT);
> +      } else {
> +        for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
> +          dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_FLOAT);
> +      }
> +
> +      sel.push();
> +        if(isUniformDst)
> +          sel.curr.noMask = 1;
> +        sel.UNTYPED_READ(tmpAddr, dst.data(), valueNum, bti);
> +      sel.pop();
> +
> +      if(isUniformDst) {
> +        sel.push();
> +          sel.curr.noMask = 1;
> +          sel.curr.execWidth = 1;
> +          for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
> +            sel.MOV(sel.selReg(insn.getValue(dstID), ir::TYPE_FLOAT), GenRegister::vec1(dst[dstID]));
> +        sel.pop();
> +      }
>      }
>  
>      void emitDWordGather(Selection::Opaque &sel,
> @@ -2715,21 +2760,32 @@ namespace gbe
>                           uint32_t bti) const
>      {
>        using namespace ir;
> -      const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ? 1 : sel.ctx.getSimdWidth();
> +      const bool isUniformDst = sel.isScalarReg(insn.getValue(0));
> +      const bool isUniformSrc = sel.isScalarReg(addr.reg());
> +
>        GBE_ASSERT(insn.getValueNum() == 1);
>        GenRegister dst = GenRegister::retype(sel.selReg(insn.getValue(0)), GEN_TYPE_F);
> -      // get dword based address
> -      GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD, simdWidth == 1));
> +      // Get dword based address
> +      GenRegister addrDW = GenRegister::udxgrf(sel.ctx.getSimdWidth(), sel.reg(FAMILY_DWORD));
>  
>        sel.push();
> -        if (simdWidth == 1) {
> +        if (isUniformSrc) {
>            sel.curr.noMask = 1;
> -          sel.curr.execWidth = 1;
>          }
>          sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
>        sel.pop();
>  
> -      sel.DWORD_GATHER(dst, addrDW, bti);
> +      if(isUniformDst) {
> +        GenRegister tmp = GenRegister::fxgrf(sel.ctx.getSimdWidth(), sel.reg(FAMILY_DWORD));
> +        sel.push();
> +          sel.curr.noMask = 1;
> +          sel.DWORD_GATHER(tmp, addrDW, bti);
> +          sel.curr.execWidth = 1;
> +          sel.MOV(dst, GenRegister::vec1(tmp));
> +        sel.pop();
> +      } else
> +        sel.DWORD_GATHER(dst, addrDW, bti);
> +
>      }
>  
>      void emitRead64(Selection::Opaque &sel,
> @@ -2741,24 +2797,29 @@ namespace gbe
>        const uint32_t valueNum = insn.getValueNum();
>        /* XXX support scalar only right now. */
>        GBE_ASSERT(valueNum == 1);
> +      GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), ir::TYPE_FLOAT);
>  
> +      /// TODO support uniform load for int64
> +      GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false);
>        GenRegister dst[valueNum];
>        for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
>          dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
> -      sel.READ64(addr, dst, valueNum, bti);
> +      sel.READ64(tmpAddr, dst, valueNum, bti);
>      }
>  
>      void emitByteGather(Selection::Opaque &sel,
>                          const ir::LoadInstruction &insn,
>                          const uint32_t elemSize,
> -                        GenRegister address,
> +                        GenRegister addr,
>                          uint32_t bti) const
>      {
>        using namespace ir;
>        const uint32_t valueNum = insn.getValueNum();
> -      const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ?
> -                                 1 : sel.ctx.getSimdWidth();
> +      const bool isUniformDst = sel.isScalarReg(insn.getValue(0));
> +
>        if(valueNum > 1) {
> +        GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), ir::TYPE_FLOAT);
> +
>          vector<GenRegister> dst(valueNum);
>          const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
>  
> @@ -2773,10 +2834,13 @@ namespace gbe
>          uint32_t tmpRegNum = typeSize*valueNum / 4;
>          vector<GenRegister> tmp(tmpRegNum);
>          for(uint32_t i = 0; i < tmpRegNum; i++) {
> -          tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> +          tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
>          }
> -
> -        sel.UNTYPED_READ(address, tmp.data(), tmpRegNum, bti);
> +        sel.push();
> +          if(isUniformDst)
> +            sel.curr.noMask = 1;
> +          sel.UNTYPED_READ(tmpAddr, tmp.data(), tmpRegNum, bti);
> +        sel.pop();
>  
>          for(uint32_t i = 0; i < tmpRegNum; i++) {
>            sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize);
> @@ -2784,35 +2848,39 @@ namespace gbe
>       } else {
>          GBE_ASSERT(insn.getValueNum() == 1);
>          const GenRegister value = sel.selReg(insn.getValue(0));
> +        const bool isUniformAddr = sel.isScalarReg(addr.reg());
>          GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
>  
> -        Register tmpReg = sel.reg(FAMILY_DWORD, simdWidth == 1);
> -        GenRegister tmpAddr = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD, simdWidth == 1));
> -        GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg);
> +        Register tmpReg = sel.reg(FAMILY_DWORD);
> +        GenRegister tmpAddr = GenRegister::udxgrf(sel.ctx.getSimdWidth(), sel.reg(FAMILY_DWORD));
> +        GenRegister tmpData = GenRegister::udxgrf(sel.ctx.getSimdWidth(), tmpReg);
>          // Get dword aligned addr
>          sel.push();
> -          if (simdWidth == 1) {
> -            sel.curr.execWidth = 1;
> +          if (isUniformAddr)
>              sel.curr.noMask = 1;
> -          }
> -          sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
> +          sel.AND(tmpAddr, addr, GenRegister::immud(0xfffffffc));
>          sel.pop();
> +
>          sel.push();
> -          if (simdWidth == 1)
> +          if (isUniformDst)
>              sel.curr.noMask = 1;
>            sel.UNTYPED_READ(tmpAddr, &tmpData, 1, bti);
>  
> -          if (simdWidth == 1)
> +          if (isUniformDst) {
>              sel.curr.execWidth = 1;
> +            addr = GenRegister::vec1(addr);
> +            tmpAddr = GenRegister::vec1(tmpAddr);
> +            tmpData = GenRegister::vec1(tmpData);
> +          }
>            // Get the remaining offset from aligned addr
> -          sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
> +          sel.AND(tmpAddr, addr, GenRegister::immud(0x3));
>            sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
>            sel.SHR(tmpData, tmpData, tmpAddr);
>  
>            if (elemSize == GEN_BYTE_SCATTER_WORD)
> -            sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), sel.unpacked_uw(tmpReg));
> +            sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg, isUniformDst));
>            else if (elemSize == GEN_BYTE_SCATTER_BYTE)
> -            sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), sel.unpacked_ub(tmpReg));
> +            sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(tmpReg, isUniformDst));
>          sel.pop();
>        }
>      }
> @@ -2837,7 +2905,7 @@ namespace gbe
>                   insn.getAddressSpace() == MEM_CONSTANT ||
>                   insn.getAddressSpace() == MEM_PRIVATE ||
>                   insn.getAddressSpace() == MEM_LOCAL);
> -      //GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false);
> +
>        const Type type = insn.getValueType();
>        const uint32_t elemSize = getByteScatterGatherSize(type);
>        if(space == MEM_LOCAL && sel.needPatchSLMAddr()) {
> @@ -2880,11 +2948,12 @@ namespace gbe
>        using namespace ir;
>        const uint32_t valueNum = insn.getValueNum();
>        vector<GenRegister> value(valueNum);
> +      GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), ir::TYPE_FLOAT);
>  
> -      addr = GenRegister::retype(addr, GEN_TYPE_F);
>        for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
> -        value[valueID] = GenRegister::retype(sel.selReg(insn.getValue(valueID)), GEN_TYPE_F);
> -      sel.UNTYPED_WRITE(addr, value.data(), valueNum, bti);
> +        value[valueID] = replaceUniformSource(sel, insn.getValue(valueID), ir::TYPE_FLOAT);
> +
> +      sel.UNTYPED_WRITE(tmpAddr, value.data(), valueNum, bti);
>      }
>  
>      void emitWrite64(Selection::Opaque &sel,
> @@ -2896,12 +2965,12 @@ namespace gbe
>        const uint32_t valueNum = insn.getValueNum();
>        /* XXX support scalar only right now. */
>        GBE_ASSERT(valueNum == 1);
> -      addr = GenRegister::retype(addr, GEN_TYPE_UD);
>        GenRegister src[valueNum];
> +      GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), TYPE_U32);
>  
>        for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
> -        src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
> -      sel.WRITE64(addr, src, valueNum, bti);
> +        src[valueID] = replaceUniformSource(sel, insn.getValue(valueID), ir::TYPE_U64);
> +      sel.WRITE64(tmpAddr, src, valueNum, bti);
>      }
>  
>      void emitByteScatter(Selection::Opaque &sel,
> @@ -2914,6 +2983,8 @@ namespace gbe
>        const uint32_t simdWidth = sel.ctx.getSimdWidth();
>        uint32_t valueNum = insn.getValueNum();
>  
> +      GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), ir::TYPE_FLOAT);
> +
>        if(valueNum > 1) {
>          const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
>          vector<GenRegister> value(valueNum);
> @@ -2933,7 +3004,7 @@ namespace gbe
>            sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, 4/typeSize);
>          }
>  
> -        sel.UNTYPED_WRITE(addr, tmp.data(), tmpRegNum, bti);
> +        sel.UNTYPED_WRITE(tmpAddr, tmp.data(), tmpRegNum, bti);
>        } else {
>          const GenRegister value = sel.selReg(insn.getValue(0));
>          GBE_ASSERT(insn.getValueNum() == 1);
> @@ -2943,7 +3014,7 @@ namespace gbe
>          } else if (elemSize == GEN_BYTE_SCATTER_BYTE) {
>            sel.MOV(tmp, GenRegister::retype(value, GEN_TYPE_UB));
>          }
> -        sel.BYTE_SCATTER(addr, tmp, elemSize, bti);
> +        sel.BYTE_SCATTER(tmpAddr, tmp, elemSize, bti);
>        }
>      }
>  
> @@ -3367,18 +3438,23 @@ namespace gbe
>        const AddressSpace space = insn.getAddressSpace();
>        const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01;
>        const uint32_t srcNum = insn.getSrcNum();
> -      GenRegister src0 = sel.selReg(insn.getSrc(0), TYPE_U32);   //address
> +      GenRegister src0 = replaceUniformSource(sel, insn.getSrc(0), TYPE_U32); //address
>        GenRegister src1 = src0, src2 = src0;
> -      if(srcNum > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
> -      if(srcNum > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
> +      if(srcNum > 1) src1 = replaceUniformSource(sel, insn.getSrc(1), TYPE_U32);
> +      if(srcNum > 2) src2 = replaceUniformSource(sel, insn.getSrc(2), TYPE_U32);
> +
>        GenRegister dst  = sel.selReg(insn.getDst(0), TYPE_U32);
> +      const bool isUniformDst = sel.isScalarReg(insn.getDst(0));
> +      GenRegister tmpDst = isUniformDst ? sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32) : dst;
> +
>        GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
>        if(space == MEM_LOCAL && sel.needPatchSLMAddr()){
>          GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
>          sel.ADD(temp, src0, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
>          src0 = temp;
>        }
> -      sel.ATOMIC(dst, genAtomicOp, srcNum, src0, src1, src2, bti);
> +      sel.ATOMIC(tmpDst, genAtomicOp, srcNum, src0, src1, src2, bti);
> +      if(isUniformDst) moveVarying2Uniform(sel, dst, tmpDst);
>        return true;
>      }
>      DECL_CTOR(AtomicInstruction, 1, 1);
> @@ -3589,12 +3665,12 @@ namespace gbe
>        if (insn.getSamplerOffset() != 0) {
>          // U, lod, [V], [W]
>          GBE_ASSERT(insn.getSrcType() != TYPE_FLOAT);
> -        msgPayloads[0] = sel.selReg(insn.getSrc(0), insn.getSrcType());
> +        msgPayloads[0] = replaceUniformSource(sel, insn.getSrc(0), insn.getSrcType());
>          msgPayloads[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
>          if (srcNum > 1)
> -          msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType());
> +          msgPayloads[2] = replaceUniformSource(sel, insn.getSrc(1), insn.getSrcType());
>          if (srcNum > 2)
> -          msgPayloads[3] = sel.selReg(insn.getSrc(2), insn.getSrcType());
> +          msgPayloads[3] = replaceUniformSource(sel, insn.getSrc(2), insn.getSrcType());
>          // Clear the lod to zero.
>          sel.MOV(msgPayloads[1], GenRegister::immud(0));
>          msgLen = srcNum + 1;
> @@ -3602,7 +3678,7 @@ namespace gbe
>          // U, V, [W]
>          GBE_ASSERT(insn.getSrcType() == TYPE_FLOAT);
>          for (valueID = 0; valueID < srcNum; ++valueID)
> -          msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
> +          msgPayloads[valueID] = replaceUniformSource(sel, insn.getSrc(valueID), insn.getSrcType());
>          msgLen = srcNum;
>        }
>        // We switch to a fixup bti for linear filter on a image1d array sampling.
> @@ -3637,7 +3713,7 @@ namespace gbe
>          uint32_t valueID = 0;
>          msgs[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
>          for(uint32_t msgID = 1; msgID < 1 + coordNum; msgID++, valueID++)
> -          msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), insn.getCoordType());
> +          msgs[msgID] = replaceUniformSource(sel, insn.getSrc(msgID - 1), insn.getCoordType());
>  
>          // fake u.
>          if (insn.getSrc(1) == ir::ocl::invalid)
> @@ -3648,7 +3724,7 @@ namespace gbe
>          // LOD.
>          msgs[4] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
>          for(uint32_t msgID = 5; valueID < insn.getSrcNum(); msgID++, valueID++)
> -          msgs[msgID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
> +          msgs[msgID] = replaceUniformSource(sel, insn.getSrc(valueID), insn.getSrcType());
>        }
>  
>        sel.push();
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index b7fbc93..e1e5b93 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -315,6 +315,8 @@ namespace gbe
>        else {
>          ir::Register tmp;
>          ir::Type type = getIRType(vector->reg[regID].type);
> +        // We currently don't have scalar register in SelectionVector
> +        GBE_ASSERT(ctx.sel->isScalarReg(reg) == false);
>          tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc, type);
>          const VectorLocation location = std::make_pair(vector, regID);
>          this->vectorMap.insert(std::make_pair(tmp, location));
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list