[Beignet] [PATCH] GBE: avoid to use the GenRegister::xxxgrf(simdWidth, xxx).

Zhigang Gong zhigang.gong at linux.intel.com
Fri Apr 10 00:48:31 PDT 2015


Just as we discussed offline, the vector load(dst number larger than 1)
part of getEffectByteData() and emitUnalignedByteGather() will not have uniform
values due to current uniform analysis method.
And the emitDWordGather() will handle uniform correctly in the sel.SAMPLE code path.

I will add some assert code and comments in the above functions to describe
why we don't check uniform for those cases.

Thanks for the careful review comments.

On Fri, Apr 10, 2015 at 08:17:38AM +0000, Yang, Rong R wrote:
> After use the uniform register, the execWidth must set to 1 in function emitDWordGather, getEffectByteData and emitUnalignedByteGather.
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> > Zhigang Gong
> > Sent: Friday, April 10, 2015 12:47
> > To: Gong, Zhigang
> > Cc: beignet at lists.freedesktop.org
> > Subject: Re: [Beignet] [PATCH] GBE: avoid to use the
> > GenRegister::xxxgrf(simdWidth, xxx).
> > 
> > This patch is based on the previous "GBE: correct some temporary virtual
> > register's simdWidth."
> > 
> > On Fri, Apr 10, 2015 at 12:43:35PM +0800, Zhigang Gong wrote:
> > > All the gen registers should get the uniform information from the
> > > corresponding virtual registers. The use of GenRegister::xxxgrf on a
> > > virtual register is very dangerous which may cause inconsistency.
> > > This patch eliminate all the use of it in gen_insn_selection stage.
> > >
> > > Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> > > ---
> > >  backend/src/backend/gen_insn_selection.cpp | 92
> > > +++++++++++++++---------------
> > >  1 file changed, 45 insertions(+), 47 deletions(-)
> > >
> > > diff --git a/backend/src/backend/gen_insn_selection.cpp
> > > b/backend/src/backend/gen_insn_selection.cpp
> > > index 0f6054a..a4c5987 100644
> > > --- a/backend/src/backend/gen_insn_selection.cpp
> > > +++ b/backend/src/backend/gen_insn_selection.cpp
> > > @@ -2218,6 +2218,7 @@ namespace gbe
> > >        GenRegister src0 = sel.selReg(insn.getSrc(0), type);
> > >        GenRegister src1 = sel.selReg(insn.getSrc(1), type);
> > >        const uint32_t simdWidth = sel.curr.execWidth;
> > > +      const bool isUniform = simdWidth == 1;
> > >        const RegisterFamily family = getFamily(type);
> > >        uint32_t function = (op == OP_DIV)?
> > >                            GEN_MATH_FUNCTION_INT_DIV_QUOTIENT :
> > > @@ -2226,16 +2227,11 @@ namespace gbe
> > >        //bytes and shorts must be converted to int for DIV and REM per GEN
> > restriction
> > >        if((family == FAMILY_WORD || family == FAMILY_BYTE)) {
> > >          GenRegister tmp0, tmp1;
> > > -        ir::Register reg = sel.reg(FAMILY_DWORD, simdWidth == 1);
> > > -
> > > -        tmp0 = GenRegister::udxgrf(simdWidth, reg);
> > > -        tmp0 = GenRegister::retype(tmp0, GEN_TYPE_D);
> > > +        ir::Register reg = sel.reg(FAMILY_DWORD, isUniform);
> > > +        tmp0 = sel.selReg(reg, ir::TYPE_S32);
> > >          sel.MOV(tmp0, src0);
> > > -
> > > -        tmp1 = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> > > -        tmp1 = GenRegister::retype(tmp1, GEN_TYPE_D);
> > > +        tmp1 = sel.selReg(sel.reg(FAMILY_DWORD, isUniform),
> > > + ir::TYPE_S32);
> > >          sel.MOV(tmp1, src1);
> > > -
> > >          sel.MATH(tmp0, function, tmp0, tmp1);
> > >          GenRegister unpacked;
> > >          if(family == FAMILY_WORD) {
> > > @@ -3055,10 +3051,10 @@ namespace gbe
> > >      {
> > >        using namespace ir;
> > >        GBE_ASSERT(bti.count == 1);
> > > -      const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ? 1 :
> > sel.ctx.getSimdWidth();
> > > +      const uint32_t isUniform = sel.isScalarReg(insn.getValue(0));
> > >        GBE_ASSERT(insn.getValueNum() == 1);
> > >
> > > -      if(simdWidth == 1) {
> > > +      if(isUniform) {
> > >          GenRegister dst = sel.selReg(insn.getValue(0), ir::TYPE_U32);
> > >          sel.push();
> > >            sel.curr.noMask = 1;
> > > @@ -3069,7 +3065,7 @@ namespace gbe
> > >
> > >        GenRegister dst = GenRegister::retype(sel.selReg(insn.getValue(0)),
> > GEN_TYPE_F);
> > >        // get dword based address
> > > -      GenRegister addrDW = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD));
> > > +      GenRegister addrDW = sel.selReg(sel.reg(FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >
> > >        sel.push();
> > >          if (sel.isScalarReg(addr.reg())) { @@ -3112,27 +3108,27 @@
> > > namespace gbe
> > >                          const uint32_t elemSize,
> > >                          GenRegister address,
> > >                          GenRegister dst,
> > > -                        uint32_t simdWidth,
> > > +                        bool isUniform,
> > >                          uint8_t bti) const
> > >      {
> > >        using namespace ir;
> > > -        Register tmpReg = sel.reg(FAMILY_DWORD, simdWidth == 1);
> > > -        GenRegister tmpAddr = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD, simdWidth == 1));
> > > -        GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg);
> > > +        Register tmpReg = sel.reg(FAMILY_DWORD, isUniform);
> > > +        GenRegister tmpAddr = sel.selReg(sel.reg(FAMILY_DWORD,
> > isUniform), ir::TYPE_U32);
> > > +        GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32);
> > >          // Get dword aligned addr
> > >          sel.push();
> > > -          if (simdWidth == 1) {
> > > +          if (isUniform) {
> > >              sel.curr.noMask = 1;
> > >              sel.curr.execWidth = 1;
> > >            }
> > >            sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD),
> > GenRegister::immud(0xfffffffc));
> > >          sel.pop();
> > >          sel.push();
> > > -          if (simdWidth == 1)
> > > +          if (isUniform)
> > >              sel.curr.noMask = 1;
> > >            sel.UNTYPED_READ(tmpAddr, &tmpData, 1, bti);
> > >
> > > -          if (simdWidth == 1)
> > > +          if (isUniform)
> > >              sel.curr.execWidth = 1;
> > >            // Get the remaining offset from aligned addr
> > >            sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD),
> > > GenRegister::immud(0x3)); @@ -3155,8 +3151,7 @@ namespace gbe
> > >      {
> > >        using namespace ir;
> > >        const uint32_t valueNum = insn.getValueNum();
> > > -      const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ?
> > > -                                 1 : sel.ctx.getSimdWidth();
> > > +      const bool isUniform = sel.isScalarReg(insn.getValue(0));
> > >        RegisterFamily family = getFamily(insn.getValueType());
> > >
> > >        vector<GenRegister> dst(valueNum); @@ -3170,8 +3165,8 @@
> > > namespace gbe
> > >        vector<GenRegister> tmp2(tmpRegNum);
> > >        vector<Register> tmpReg(tmpRegNum);
> > >        for(uint32_t i = 0; i < tmpRegNum; i++) {
> > > -        tmpReg[i] = sel.reg(FAMILY_DWORD, simdWidth == 1);
> > > -        tmp2[i] = tmp[i] = GenRegister::udxgrf(simdWidth, tmpReg[i]);
> > > +        tmpReg[i] = sel.reg(FAMILY_DWORD, isUniform);
> > > +        tmp2[i] = tmp[i] = sel.selReg(tmpReg[i], ir::TYPE_U32);
> > >        }
> > >
> > >        readDWord(sel, tmp, tmp2, address, tmpRegNum, bti); @@ -3190,18
> > > +3185,18 @@ namespace gbe
> > >                             vector<GenRegister> &tmp,
> > >                             uint32_t effectDataNum,
> > >                             const GenRegister &address,
> > > -                           uint32_t simdWidth) const
> > > +                           bool isUniform) const
> > >      {
> > >        using namespace ir;
> > >        GBE_ASSERT(effectData.size() == effectDataNum);
> > >        GBE_ASSERT(tmp.size() == effectDataNum + 1);
> > >        sel.push();
> > > -        Register alignedFlag = sel.reg(FAMILY_BOOL, simdWidth == 1);
> > > -        GenRegister shiftL = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD));
> > > -        Register shiftHReg = sel.reg(FAMILY_DWORD, simdWidth == 1);
> > > -        GenRegister shiftH = GenRegister::udxgrf(simdWidth, shiftHReg);
> > > +        Register alignedFlag = sel.reg(FAMILY_BOOL, isUniform);
> > > +        GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD, isUniform),
> > ir::TYPE_U32);
> > > +        Register shiftHReg = sel.reg(FAMILY_DWORD, isUniform);
> > > +        GenRegister shiftH = sel.selReg(shiftHReg, ir::TYPE_U32);
> > >          sel.push();
> > > -          if (simdWidth == 1)
> > > +          if (isUniform)
> > >              sel.curr.noMask = 1;
> > >            sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD),
> > GenRegister::immud(0x3));
> > >            sel.SHL(shiftL, shiftL, GenRegister::immud(0x3)); @@
> > > -3215,7 +3210,7 @@ namespace gbe
> > >
> > >          sel.curr.noMask = 1;
> > >          for(uint32_t i = 0; i < effectDataNum; i++) {
> > > -          GenRegister tmpH = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD, simdWidth == 1));
> > > +          GenRegister tmpH = sel.selReg(sel.reg(FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >            GenRegister tmpL = effectData[i];
> > >            sel.SHR(tmpL, tmp[i], shiftL);
> > >            sel.push();
> > > @@ -3241,6 +3236,7 @@ namespace gbe
> > >        const uint32_t valueNum = insn.getValueNum();
> > >        const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ?
> > >                                   1 : sel.ctx.getSimdWidth();
> > > +      const bool isUniform = simdWidth == 1;
> > >        RegisterFamily family = getFamily(insn.getValueType());
> > >
> > >        if(valueNum > 1) {
> > > @@ -3255,11 +3251,11 @@ namespace gbe
> > >          vector<GenRegister> tmp2(effectDataNum + 1);
> > >          vector<GenRegister> effectData(effectDataNum);
> > >          for(uint32_t i = 0; i < effectDataNum + 1; i++)
> > > -          tmp2[i] = tmp[i] = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD, simdWidth == 1));
> > > +          tmp2[i] = tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >
> > > -        GenRegister alignedAddr = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD, simdWidth == 1));
> > > +        GenRegister alignedAddr = sel.selReg(sel.reg(FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >          sel.push();
> > > -          if (simdWidth == 1)
> > > +          if (isUniform)
> > >              sel.curr.noMask = 1;
> > >            sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD),
> > GenRegister::immud(~0x3));
> > >          sel.pop();
> > > @@ -3272,7 +3268,7 @@ namespace gbe
> > >            vector<GenRegister> t2(tmp2.begin() + pos, tmp2.begin() + pos +
> > width);
> > >            if (pos != 0) {
> > >              sel.push();
> > > -              if (simdWidth == 1)
> > > +              if (isUniform)
> > >                  sel.curr.noMask = 1;
> > >                sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
> > >              sel.pop();
> > > @@ -3283,9 +3279,9 @@ namespace gbe
> > >          } while(remainedReg);
> > >
> > >          for(uint32_t i = 0; i < effectDataNum; i++)
> > > -          effectData[i] = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD, simdWidth == 1));
> > > +          effectData[i] = sel.selReg(sel.reg(FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >
> > > -        getEffectByteData(sel, effectData, tmp, effectDataNum, address,
> > simdWidth);
> > > +        getEffectByteData(sel, effectData, tmp, effectDataNum,
> > > + address, isUniform);
> > >
> > >          for(uint32_t i = 0; i < effectDataNum; i++) {
> > >            unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ?
> > > @@ -3300,13 +3296,13 @@ namespace gbe
> > >
> > >          for (int x = 0; x < bti.count; x++) {
> > >            if (x > 0)
> > > -            tmp = sel.selReg(sel.reg(family, simdWidth == 1),
> > insn.getValueType());
> > > +            tmp = sel.selReg(sel.reg(family, isUniform),
> > > + insn.getValueType());
> > >
> > >            GenRegister addr = getRelativeAddress(sel, address, bti.bti[x]);
> > > -          readByteAsDWord(sel, elemSize, addr, tmp, simdWidth, bti.bti[x]);
> > > +          readByteAsDWord(sel, elemSize, addr, tmp, isUniform,
> > > + bti.bti[x]);
> > >            if (x > 0) {
> > >              sel.push();
> > > -              if (simdWidth == 1) {
> > > +              if (isUniform) {
> > >                  sel.curr.noMask = 1;
> > >                  sel.curr.execWidth = 1;
> > >                }
> > > @@ -3439,10 +3435,10 @@ namespace gbe
> > >                           const ir::StoreInstruction &insn,
> > >                           const uint32_t elemSize,
> > >                           GenRegister addr,
> > > -                         uint32_t bti) const
> > > +                         uint32_t bti,
> > > +                         bool isUniform) const
> > >      {
> > >        using namespace ir;
> > > -      const uint32_t simdWidth = sel.ctx.getSimdWidth();
> > >        uint32_t valueNum = insn.getValueNum();
> > >
> > >        if(valueNum > 1) {
> > > @@ -3460,7 +3456,7 @@ namespace gbe
> > >          uint32_t tmpRegNum = typeSize*valueNum / 4;
> > >          vector<GenRegister> tmp(tmpRegNum);
> > >          for(uint32_t i = 0; i < tmpRegNum; i++) {
> > > -          tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD,
> > simdWidth == 1));
> > > +          tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD, isUniform),
> > > + ir::TYPE_U32);
> > >            sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, typeSize,
> > 4/typeSize);
> > >          }
> > >
> > > @@ -3468,7 +3464,7 @@ namespace gbe
> > >        } else {
> > >          const GenRegister value = sel.selReg(insn.getValue(0));
> > >          GBE_ASSERT(insn.getValueNum() == 1);
> > > -        const GenRegister tmp = GenRegister::udxgrf(simdWidth,
> > sel.reg(FAMILY_DWORD, simdWidth == 1));
> > > +        const GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >          if (elemSize == GEN_BYTE_SCATTER_WORD) {
> > >            sel.MOV(tmp, GenRegister::retype(value, GEN_TYPE_UW));
> > >          } else if (elemSize == GEN_BYTE_SCATTER_BYTE) { @@ -3478,15
> > > +3474,15 @@ namespace gbe
> > >        }
> > >      }
> > >
> > > -    INLINE GenRegister getRelativeAddress(Selection::Opaque &sel,
> > GenRegister address, uint8_t bti) const {
> > > +    INLINE GenRegister getRelativeAddress(Selection::Opaque &sel,
> > > + GenRegister address, uint8_t bti, bool isUniform) const {
> > >        if(bti == 0xfe)
> > >          return address;
> > >
> > >        sel.push();
> > >          sel.curr.noMask = 1;
> > > -        if (GenRegister::hstride_size(address) == 0)
> > > +        if (isUniform)
> > >            sel.curr.execWidth = 1;
> > > -        GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD,
> > sel.curr.execWidth == 1), ir::TYPE_U32);
> > > +        GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD,
> > > + isUniform), ir::TYPE_U32);
> > >          sel.ADD(temp, address,
> > GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti),
> > ir::TYPE_U32)));
> > >        sel.pop();
> > >        return temp;
> > > @@ -3499,15 +3495,17 @@ namespace gbe
> > >        const uint32_t elemSize = getByteScatterGatherSize(type);
> > >        GenRegister address = sel.selReg(insn.getAddress(),
> > > ir::TYPE_U32);
> > >
> > > +      const bool isUniform = sel.isScalarReg(insn.getAddress()) &&
> > > + sel.isScalarReg(insn.getValue(0));
> > > +
> > >        BTI bti = insn.getBTI();
> > >        for (int x = 0; x < bti.count; x++) {
> > > -        GenRegister temp = getRelativeAddress(sel, address, bti.bti[x]);
> > > +        GenRegister temp = getRelativeAddress(sel, address,
> > > + bti.bti[x], isUniform);
> > >          if (insn.isAligned() == true && elemSize ==
> > GEN_BYTE_SCATTER_QWORD)
> > >            this->emitWrite64(sel, insn, temp, bti.bti[x]);
> > >          else if (insn.isAligned() == true && elemSize ==
> > GEN_BYTE_SCATTER_DWORD)
> > >            this->emitUntypedWrite(sel, insn, temp,  bti.bti[x]);
> > >          else {
> > > -          this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
> > > +          this->emitByteScatter(sel, insn, elemSize, temp,
> > > + bti.bti[x], isUniform);
> > >          }
> > >        }
> > >        return true;
> > > --
> > > 1.9.1
> > >
> > > _______________________________________________
> > > Beignet mailing list
> > > Beignet at lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/beignet
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list