[Beignet] [PATCH 2/2] Use the Byte Gather after HSW when read byte/shor.

Zou, Nanhai nanhai.zou at intel.com
Mon Jun 15 16:00:52 PDT 2015


Should the unaligned optimization we did in vload/vstore also gone after HSW?

Thanks
Zou Nanhai

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Yang Rong
> Sent: Monday, June 15, 2015 2:46 PM
> To: beignet at lists.freedesktop.org
> Cc: Yang, Rong R
> Subject: [Beignet] [PATCH 2/2] Use the Byte Gather after HSW when read
> byte/shor.
> 
> After HSW, the byte gather's performance issue has gone, so needn't read
> dword and extract.
> But for multi dst load, the combine reduce the address calc, but need the
> extract the dst, maybe performance is approximate, so still use the old logic.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp | 36
> ++++++++++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 2 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index d63c7e3..d289e8e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -365,6 +365,8 @@ namespace gbe
>      void setLongRegRestrict(bool b) { bLongRegRestrict = b; }
>      void setLdMsgOrder(uint32_t type)  { ldMsgOrder = type; }
>      uint32_t getLdMsgOrder()  const { return ldMsgOrder; }
> +    void setSlowByteGather(bool b) { slowByteGather = b; }
> +    bool getSlowByteGather() { return slowByteGather; }
>      /*! indicate whether a register is a scalar/uniform register. */
>      INLINE bool isPartialWrite(const ir::Register &reg) const {
>        return partialWriteRegs.find(reg.value()) != partialWriteRegs.end();
> @@ -740,6 +742,7 @@ namespace gbe
>      bool bHasLongType;
>      bool bLongRegRestrict;
>      uint32_t ldMsgOrder;
> +    bool slowByteGather;
>      INLINE ir::LabelIndex newAuxLabel()
>      {
>        currAuxLabel++;
> @@ -779,7 +782,8 @@ namespace gbe
>      curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
>      maxInsnNum(ctx.getFunction().getLargestBlockSize()),
> dagPool(maxInsnNum),
>      stateNum(0), vectorNum(0), bwdCodeGeneration(false),
> currAuxLabel(ctx.getFunction().labelNum()),
> -    bHas32X32Mul(false), bHasLongType(false), bLongRegRestrict(false),
> ldMsgOrder(LD_MSG_ORDER_IVB)
> +    bHas32X32Mul(false), bHasLongType(false), bLongRegRestrict(false),
> ldMsgOrder(LD_MSG_ORDER_IVB),
> +    slowByteGather(false)
>    {
>      const ir::Function &fn = ctx.getFunction();
>      this->regNum = fn.regNum();
> @@ -2025,26 +2029,31 @@ namespace gbe
>    Selection::Selection(GenContext &ctx) {
>      this->blockList = NULL;
>      this->opaque = GBE_NEW(Selection::Opaque, ctx);
> +    this->opaque->setSlowByteGather(true);
>    }
> 
>    Selection75::Selection75(GenContext &ctx) : Selection(ctx) {
> +    this->opaque->setSlowByteGather(false);
>    }
> 
>    Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
>      this->opaque->setHas32X32Mul(true);
>      this->opaque->setHasLongType(true);
> +    this->opaque->setSlowByteGather(false);
>    }
> 
>    SelectionChv::SelectionChv(GenContext &ctx) : Selection(ctx) {
>      this->opaque->setHas32X32Mul(true);
>      this->opaque->setHasLongType(true);
>      this->opaque->setLongRegRestrict(true);
> +    this->opaque->setSlowByteGather(false);
>    }
> 
>    Selection9::Selection9(GenContext &ctx) : Selection(ctx) {
>      this->opaque->setHas32X32Mul(true);
>      this->opaque->setHasLongType(true);
>      this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
> +    this->opaque->setSlowByteGather(false);
>    }
> 
>    void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t
> msgNum, @@ -3519,8 +3528,31 @@ namespace gbe
>          GBE_ASSERT(insn.getValueNum() == 1);
>          const GenRegister value = sel.selReg(insn.getValue(0),
> insn.getValueType());
>          GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize
> == GEN_BYTE_SCATTER_BYTE);
> +        if(sel.getSlowByteGather())
> +          readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
> +        else {
> +          GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) :
> sel.selReg(bti.reg, ir::TYPE_U32);
> +          GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD,
> + true), ir::TYPE_U16);
> +
> +          // We need a temporary register if we read bytes or words
> +          Register dst = sel.reg(FAMILY_DWORD, isUniform);
> +          sel.push();
> +            if (isUniform)
> +              sel.curr.noMask = 1;
> +            sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address,
> elemSize, b, bti.isConst ? NULL : & tmpFlag);
> +          sel.pop();
> 
> -        readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
> +          sel.push();
> +            if (isUniform) {
> +              sel.curr.noMask = 1;
> +              sel.curr.execWidth = 1;
> +            }
> +            if (elemSize == GEN_BYTE_SCATTER_WORD)
> +              sel.MOV(GenRegister::retype(value, GEN_TYPE_UW),
> GenRegister::unpacked_uw(dst));
> +            else if (elemSize == GEN_BYTE_SCATTER_BYTE)
> +              sel.MOV(GenRegister::retype(value, GEN_TYPE_UB),
> GenRegister::unpacked_ub(dst));
> +          sel.pop();
> +        }
>        }
>      }
> 
> --
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list