[Beignet] [PATCH 16/27] Modify the convert logic in gen selection.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Jan 18 15:35:50 PST 2015


On Sat, Jan 17, 2015 at 11:32:38PM +0800, He Junyan wrote:
> 
> 
> On 四, 2015-01-08 at 13:14 +0800, Zhigang Gong wrote:
> > On Tue, Jan 06, 2015 at 06:01:54PM +0800, junyan.he at inbox.com wrote:
> > > From: Junyan He <junyan.he at linux.intel.com>
> > > 
> > > The conversion logic is too complicated.
> > > We split it more clearly for each case.
> > > Notice: For I64 to I8, the conversion can not be completed
> > > within one step because of the hardware hstride restriction.
> > > So we need to convert it to i32 and than convert it to i8.
> >                                        typo here, should be then.
> > > 
> > > Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> > > ---
> > >  backend/src/backend/gen8_context.cpp       |   8 +-
> > >  backend/src/backend/gen_insn_selection.cpp | 195 ++++++++++++++++++++++++-----
> > >  2 files changed, 168 insertions(+), 35 deletions(-)
> > > 
> > > diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
> > > index cffb10d..18a3425 100644
> > > --- a/backend/src/backend/gen8_context.cpp
> > > +++ b/backend/src/backend/gen8_context.cpp
> > > @@ -55,7 +55,9 @@ namespace gbe
> > >    {
> > >      switch (insn.opcode) {
> > >        case SEL_OP_CONVI64_TO_I:
> > > -
> > > +        /* Should never come to here, just use the common OPCODE. */
> > > +        GBE_ASSERT(0);
> > > +        break;
> > >        default:
> > >          GenContext::emitUnaryInstruction(insn);
> > >      }
> > > @@ -65,7 +67,9 @@ namespace gbe
> > >    {
> > >      switch (insn.opcode) {
> > >        case SEL_OP_CONVI_TO_I64:
> > > -
> > > +        /* Should never come to here, just use the common OPCODE. */
> > > +        GBE_ASSERT(0);
> > > +        break;
> > >        default:
> > >          GenContext::emitUnaryWithTempInstruction(insn);
> > >      }
> > > diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> > > index b6a13bf..60f45f7 100644
> > > --- a/backend/src/backend/gen_insn_selection.cpp
> > > +++ b/backend/src/backend/gen_insn_selection.cpp
> > > @@ -349,9 +349,17 @@ namespace gbe
> > >        const ir::RegisterData &regData = getRegisterData(reg);
> > >        return regData.isUniform();
> > >      }
> > > +    INLINE bool isLongReg(const ir::Register &reg) const {
> > > +      const ir::RegisterData &regData = getRegisterData(reg);
> > > +      return regData.family == ir::FAMILY_QWORD;
> > > +    }
> > > +
> > > +    INLINE GenRegister unpacked_ud(const ir::Register &reg) const {
> > > +      return GenRegister::unpacked_ud(reg, isScalarReg(reg));
> > > +    }
> > >  
> > >      INLINE GenRegister unpacked_uw(const ir::Register &reg) const {
> > > -      return GenRegister::unpacked_uw(reg, isScalarReg(reg));
> > > +      return GenRegister::unpacked_uw(reg, isScalarReg(reg), isLongReg(reg));
> > >      }
> > >  
> > >      INLINE GenRegister unpacked_ub(const ir::Register &reg) const {
> > > @@ -3658,7 +3666,7 @@ namespace gbe
> > >            sel.F32TO16(unpacked, src);
> > >          sel.pop();
> > >          sel.MOV(dst, unpacked);
> > > -      } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
> > > +      } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {//convert i32 to small int
> > >          GenRegister unpacked;
> > >          if (dstFamily == FAMILY_WORD) {
> > >            const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> > > @@ -3675,27 +3683,115 @@ namespace gbe
> > >            } else
> > >              unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> > >          }
> > > -        if(srcFamily == FAMILY_QWORD) {
> > > +
> > > +        sel.push();
> > > +        if (sel.isScalarReg(insn.getSrc(0))) {
> > > +          sel.curr.execWidth = 1;
> > > +          sel.curr.predicate = GEN_PREDICATE_NONE;
> > > +          sel.curr.noMask = 1;
> > > +        }
> > > +        sel.MOV(unpacked, src);
> > > +        sel.pop();
> > > +
> > > +        if (unpacked.reg() != dst.reg())
> > > +          sel.MOV(dst, unpacked);
> > > +      } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) { //convert i64 to i16
> > > +        const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> > > +        GenRegister unpacked;
> > > +        if (!sel.isScalarReg(dst.reg())) {
> > > +          if (sel.hasLongType()) {
> > > +            unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > > +          } else {
> > > +            unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> > > +          }
> > > +          unpacked = GenRegister::retype(unpacked, type);
> > > +        } else {
> > > +          unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> > > +        }
> > > +
> > > +        if(!sel.hasLongType()) {
> > 
> > You already remove (|| srcFamily == FAMILY_QWORD at Line 3658, why still
> > do the following code which is to convert I64 source operand to I32?
> > It looks incorrect for me. The following else branch should be put here
> > unconditional.
> > 
> I think here we are converting 64bits to 16bits, we  first mov 64 bits
> to 32bits and then mov it to 16bits.
> I do not modify the origin logic here, but really we can optimize it.
Right, the logic is not changed. As there are too many if/elseif block and I missed the
newly added block "else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD)"
when I gave the first comment.

> 
> > >            GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> > >            tmp.type = GEN_TYPE_D;
> > >            sel.CONVI64_TO_I(tmp, src);
> > >            sel.MOV(unpacked, tmp);
> > >          } else {
> > >            sel.push();
> > > -            if (sel.isScalarReg(insn.getSrc(0))) {
> > > -              sel.curr.execWidth = 1;
> > > -              sel.curr.predicate = GEN_PREDICATE_NONE;
> > > -              sel.curr.noMask = 1;
> > > -            }
> > > -            sel.MOV(unpacked, src);
> > > +          if (sel.isScalarReg(insn.getSrc(0))) {
> > > +            sel.curr.execWidth = 1;
> > > +            sel.curr.predicate = GEN_PREDICATE_NONE;
> > > +            sel.curr.noMask = 1;
> > > +          }
> > > +          sel.MOV(unpacked, src);
> > >            sel.pop();
> > >          }
> > > -        if (unpacked.reg() != dst.reg())
> > > +
> > > +        if (unpacked.reg() != dst.reg()) {
> > > +          sel.MOV(dst, unpacked);
> > > +        }
> > > +      } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { //convert i64 to i8
> > > +        GenRegister unpacked;
> > > +        const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> > > +        if (!sel.isScalarReg(dst.reg())) {
> > > +          if (sel.hasLongType()) {
> > > +            /* When convert i64 to i8, the hstride should be 8, but the hstride do not
> > > +               support more than 4, so we need to split it to 2 steps. */
> > > +            unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > > +            unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> > > +          } else {
> > > +            unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> > > +            unpacked = GenRegister::retype(unpacked, type);
> > > +          }
> > > +        } else {
> > > +          unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> > > +        }
> > > +
> >            The logic seems broken for me, as the unpacked register which is computed above but is overrided
> >            in the following code before use it.
> I think the unpacked here is used as the dst or temp result, what is the
> problem?
Right, the logic is not incorrect but is quite complicate and fragmented with all
different conditions which make it hard to review and maintained. The original
logic is already very complicate, and now it becomes even worse.

Could you slightly change the logic for this code block to seprate the code for
the platform which has native long support and the platform which doesn't have
native long support. I mean the code block between
> > > +      } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { //convert i64 to i8
and
> > > +        }
> > >        } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&

Anyway, may we need to refine the whole convert instruction in the future.

Thanks,
Zhigang Gong.

> 
> > > +        if(!sel.hasLongType()) {
> > > +          GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> > > +          tmp.type = GEN_TYPE_D;
> > > +          sel.CONVI64_TO_I(tmp, src);
> > 
> > > +          sel.MOV(unpacked, tmp);
> > > +        } else {
> > > +          sel.push();
> > > +          if (sel.isScalarReg(insn.getSrc(0))) {
> > > +            sel.curr.execWidth = 1;
> > > +            sel.curr.predicate = GEN_PREDICATE_NONE;
> > > +            sel.curr.noMask = 1;
> > > +          }
> > > +          sel.MOV(unpacked, src);
> > > +          sel.pop();
> > > +        }
> > > +
> > > +        if (unpacked.reg() != dst.reg()) {
> > >            sel.MOV(dst, unpacked);
> > > +        }
> > >        } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&
> > > -                 (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64))
> > > -        sel.CONVI64_TO_I(dst, src);
> > > -      else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {
> > > +                 (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// Convert i64 to i32
> > > +        if (sel.hasLongType()) {
> > > +          GenRegister unpacked;
> > > +          const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D;
> > > +          if (!sel.isScalarReg(dst.reg())) {
> > > +            unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > > +            unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> > > +          } else {
> > > +            unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> > > +          }
> > > +
> > > +          sel.push();
> > > +          if (sel.isScalarReg(insn.getSrc(0))) {
> > > +            sel.curr.execWidth = 1;
> > > +            sel.curr.predicate = GEN_PREDICATE_NONE;
> > > +            sel.curr.noMask = 1;
> > > +          }
> > > +          sel.MOV(unpacked, src);
> > > +          sel.pop();
> > > +
> > > +          if (unpacked.reg() != dst.reg()) {
> > > +            sel.MOV(dst, unpacked);
> > > +          }
> > > +        } else {
> > > +          sel.CONVI64_TO_I(dst, src);
> > > +        }
> > > +      } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) { //i64 to float
> > >          auto dag = sel.regDAG[src.reg()];
> > >          // FIXME, in the future, we need to do a common I64 lower to I32 analysis
> > >          // at llvm IR layer which could cover more cases then just this one.
> > > @@ -3729,37 +3825,70 @@ namespace gbe
> > >              }
> > >            }
> > >          }
> > > -        GenRegister tmp[6];
> > > -        for(int i=0; i<6; i++) {
> > > -          tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > > -        }
> > > -        sel.push();
> > > +
> > > +        if (!sel.hasLongType()) {
> > > +          GenRegister tmp[6];
> > > +          for(int i=0; i<6; i++) {
> > > +            tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > > +          }
> > > +          sel.push();
> >              The following sel.curr.xxx between the push()/pop() pair should have 2 extra space indent
> > >            sel.curr.flag = 0;
> > >            sel.curr.subFlag = 1;
> > >            sel.CONVI64_TO_F(dst, src, tmp);
> > > -        sel.pop();
> > > +          sel.pop();
> > > +        } else {
> > > +          GenRegister unpacked;
> > > +          const uint32_t type = GEN_TYPE_F;
> > > +          if (!sel.isScalarReg(dst.reg())) {
> > > +            unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > > +            unpacked = GenRegister::retype(unpacked, type);
> > > +          } else {
> > > +            unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> > > +          }
> > > +
> > > +          sel.push();
> > > +          if (sel.isScalarReg(insn.getSrc(0))) {
> > > +            sel.curr.execWidth = 1;
> > > +            sel.curr.predicate = GEN_PREDICATE_NONE;
> > > +            sel.curr.noMask = 1;
> > > +          }
> > > +          sel.MOV(unpacked, src);
> > > +          sel.pop();
> > > +
> > > +          if (unpacked.reg() != dst.reg()) {
> > > +            sel.MOV(dst, unpacked);
> > > +          }
> > > +        }
> > >        } else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
> > > -                 (src.isdf() && dstType == ir::TYPE_FLOAT)) {
> > > +                 (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and double conversion
> > >          ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> > >          sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
> > > -      } else if (dst.isint64()) {
> > > +      } else if (dst.isint64()) { // promote to i64
> > >          switch(src.type) {
> > >            case GEN_TYPE_F:
> > > -          {
> > > -            GenRegister tmp[2];
> > > -            tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > > -            tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> > > -            sel.push();
> > > -              sel.curr.flag = 0;
> > > -              sel.curr.subFlag = 1;
> > > -              sel.CONVF_TO_I64(dst, src, tmp);
> > > -            sel.pop();
> > > -            break;
> > > -          }
> > > +            {
> > > +              if (!sel.hasLongType()) {
> > > +                GenRegister tmp[2];
> > > +                tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > > +                tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> > > +                sel.push();
> >                    need fix sel.xxx indent between push()/pop() pair.
> > > +                sel.curr.flag = 0;
> > > +                sel.curr.subFlag = 1;
> > > +                sel.CONVF_TO_I64(dst, src, tmp);
> > > +                sel.pop();
> > > +              } else {
> > > +                sel.MOV(dst, src);
> > > +              }
> > > +              break;
> > > +            }
> > >            case GEN_TYPE_DF:
> > >              NOT_IMPLEMENTED;
> > >            default:
> > > -            sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> > > +            if (sel.hasLongType()) {
> > > +              sel.MOV(dst, src);
> > > +            } else {
> > > +              sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> > > +            }
> > >          }
> > >        } else
> > >          sel.MOV(dst, src);
> > > -- 
> > > 1.9.1
> > > 
> > > _______________________________________________
> > > Beignet mailing list
> > > Beignet at lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/beignet
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list