[Beignet] [PATCH 2/2] Use the Byte Gather after HSW when read byte/shor.

Mon Jun 15 20:34:25 PDT 2015

Yes, the vector load merged optimization, aligned byte/short vector load may be same as split load, and unaligned byte/short vector, the split load may be better than merged load.

I will send a new patch to handle unaligned byte/short vector load.

> -----Original Message-----
> From: Zou, Nanhai
> Sent: Tuesday, June 16, 2015 07:01
> To: Yang, Rong R; beignet at lists.freedesktop.org
> Cc: Yang, Rong R
> Subject: RE: [Beignet] [PATCH 2/2] Use the Byte Gather after HSW when
> read byte/shor.
> 
> Should the unaligned optimization we did in vload/vstore also gone after
> HSW?
> 
> Thanks
> Zou Nanhai
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of Yang Rong
> > Sent: Monday, June 15, 2015 2:46 PM
> > To: beignet at lists.freedesktop.org
> > Cc: Yang, Rong R
> > Subject: [Beignet] [PATCH 2/2] Use the Byte Gather after HSW when read
> > byte/shor.
> >
> > After HSW, the byte gather's performance issue has gone, so needn't
> > read dword and extract.
> > But for multi dst load, the combine reduce the address calc, but need
> > the extract the dst, maybe performance is approximate, so still use the old
> logic.
> >
> > Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> > ---
> >  backend/src/backend/gen_insn_selection.cpp | 36
> > ++++++++++++++++++++++++++++--
> >  1 file changed, 34 insertions(+), 2 deletions(-)
> >
> > diff --git a/backend/src/backend/gen_insn_selection.cpp
> > b/backend/src/backend/gen_insn_selection.cpp
> > index d63c7e3..d289e8e 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -365,6 +365,8 @@ namespace gbe
> >      void setLongRegRestrict(bool b) { bLongRegRestrict = b; }
> >      void setLdMsgOrder(uint32_t type)  { ldMsgOrder = type; }
> >      uint32_t getLdMsgOrder()  const { return ldMsgOrder; }
> > +    void setSlowByteGather(bool b) { slowByteGather = b; }
> > +    bool getSlowByteGather() { return slowByteGather; }
> >      /*! indicate whether a register is a scalar/uniform register. */
> >      INLINE bool isPartialWrite(const ir::Register &reg) const {
> >        return partialWriteRegs.find(reg.value()) !=
> > partialWriteRegs.end(); @@ -740,6 +742,7 @@ namespace gbe
> >      bool bHasLongType;
> >      bool bLongRegRestrict;
> >      uint32_t ldMsgOrder;
> > +    bool slowByteGather;
> >      INLINE ir::LabelIndex newAuxLabel()
> >      {
> >        currAuxLabel++;
> > @@ -779,7 +782,8 @@ namespace gbe
> >      curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
> >      maxInsnNum(ctx.getFunction().getLargestBlockSize()),
> > dagPool(maxInsnNum),
> >      stateNum(0), vectorNum(0), bwdCodeGeneration(false),
> > currAuxLabel(ctx.getFunction().labelNum()),
> > -    bHas32X32Mul(false), bHasLongType(false), bLongRegRestrict(false),
> > ldMsgOrder(LD_MSG_ORDER_IVB)
> > +    bHas32X32Mul(false), bHasLongType(false),
> > + bLongRegRestrict(false),
> > ldMsgOrder(LD_MSG_ORDER_IVB),
> > +    slowByteGather(false)
> >    {
> >      const ir::Function &fn = ctx.getFunction();
> >      this->regNum = fn.regNum();
> > @@ -2025,26 +2029,31 @@ namespace gbe
> >    Selection::Selection(GenContext &ctx) {
> >      this->blockList = NULL;
> >      this->opaque = GBE_NEW(Selection::Opaque, ctx);
> > +    this->opaque->setSlowByteGather(true);
> >    }
> >
> >    Selection75::Selection75(GenContext &ctx) : Selection(ctx) {
> > +    this->opaque->setSlowByteGather(false);
> >    }
> >
> >    Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
> >      this->opaque->setHas32X32Mul(true);
> >      this->opaque->setHasLongType(true);
> > +    this->opaque->setSlowByteGather(false);
> >    }
> >
> >    SelectionChv::SelectionChv(GenContext &ctx) : Selection(ctx) {
> >      this->opaque->setHas32X32Mul(true);
> >      this->opaque->setHasLongType(true);
> >      this->opaque->setLongRegRestrict(true);
> > +    this->opaque->setSlowByteGather(false);
> >    }
> >
> >    Selection9::Selection9(GenContext &ctx) : Selection(ctx) {
> >      this->opaque->setHas32X32Mul(true);
> >      this->opaque->setHasLongType(true);
> >      this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
> > +    this->opaque->setSlowByteGather(false);
> >    }
> >
> >    void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t
> > msgNum, @@ -3519,8 +3528,31 @@ namespace gbe
> >          GBE_ASSERT(insn.getValueNum() == 1);
> >          const GenRegister value = sel.selReg(insn.getValue(0),
> > insn.getValueType());
> >          GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize ==
> > GEN_BYTE_SCATTER_BYTE);
> > +        if(sel.getSlowByteGather())
> > +          readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
> > +        else {
> > +          GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) :
> > sel.selReg(bti.reg, ir::TYPE_U32);
> > +          GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD,
> > + true), ir::TYPE_U16);
> > +
> > +          // We need a temporary register if we read bytes or words
> > +          Register dst = sel.reg(FAMILY_DWORD, isUniform);
> > +          sel.push();
> > +            if (isUniform)
> > +              sel.curr.noMask = 1;
> > +            sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address,
> > elemSize, b, bti.isConst ? NULL : & tmpFlag);
> > +          sel.pop();
> >
> > -        readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
> > +          sel.push();
> > +            if (isUniform) {
> > +              sel.curr.noMask = 1;
> > +              sel.curr.execWidth = 1;
> > +            }
> > +            if (elemSize == GEN_BYTE_SCATTER_WORD)
> > +              sel.MOV(GenRegister::retype(value, GEN_TYPE_UW),
> > GenRegister::unpacked_uw(dst));
> > +            else if (elemSize == GEN_BYTE_SCATTER_BYTE)
> > +              sel.MOV(GenRegister::retype(value, GEN_TYPE_UB),
> > GenRegister::unpacked_ub(dst));
> > +          sel.pop();
> > +        }
> >        }
> >      }
> >
> > --
> > 1.8.3.2
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet