[Beignet] [PATCH 16/27] Modify the convert logic in gen selection.
He Junyan
junyan.he at inbox.com
Sat Jan 17 07:32:38 PST 2015
On 四, 2015-01-08 at 13:14 +0800, Zhigang Gong wrote:
> On Tue, Jan 06, 2015 at 06:01:54PM +0800, junyan.he at inbox.com wrote:
> > From: Junyan He <junyan.he at linux.intel.com>
> >
> > The conversion logic is too complicated.
> > We split it more clearly for each case.
> > Notice: For I64 to I8, the conversion can not be completed
> > within one step because of the hardware hstride restriction.
> > So we need to convert it to i32 and than convert it to i8.
> typo here, should be then.
> >
> > Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> > ---
> > backend/src/backend/gen8_context.cpp | 8 +-
> > backend/src/backend/gen_insn_selection.cpp | 195 ++++++++++++++++++++++++-----
> > 2 files changed, 168 insertions(+), 35 deletions(-)
> >
> > diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
> > index cffb10d..18a3425 100644
> > --- a/backend/src/backend/gen8_context.cpp
> > +++ b/backend/src/backend/gen8_context.cpp
> > @@ -55,7 +55,9 @@ namespace gbe
> > {
> > switch (insn.opcode) {
> > case SEL_OP_CONVI64_TO_I:
> > -
> > + /* Should never come to here, just use the common OPCODE. */
> > + GBE_ASSERT(0);
> > + break;
> > default:
> > GenContext::emitUnaryInstruction(insn);
> > }
> > @@ -65,7 +67,9 @@ namespace gbe
> > {
> > switch (insn.opcode) {
> > case SEL_OP_CONVI_TO_I64:
> > -
> > + /* Should never come to here, just use the common OPCODE. */
> > + GBE_ASSERT(0);
> > + break;
> > default:
> > GenContext::emitUnaryWithTempInstruction(insn);
> > }
> > diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> > index b6a13bf..60f45f7 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -349,9 +349,17 @@ namespace gbe
> > const ir::RegisterData ®Data = getRegisterData(reg);
> > return regData.isUniform();
> > }
> > + INLINE bool isLongReg(const ir::Register ®) const {
> > + const ir::RegisterData ®Data = getRegisterData(reg);
> > + return regData.family == ir::FAMILY_QWORD;
> > + }
> > +
> > + INLINE GenRegister unpacked_ud(const ir::Register ®) const {
> > + return GenRegister::unpacked_ud(reg, isScalarReg(reg));
> > + }
> >
> > INLINE GenRegister unpacked_uw(const ir::Register ®) const {
> > - return GenRegister::unpacked_uw(reg, isScalarReg(reg));
> > + return GenRegister::unpacked_uw(reg, isScalarReg(reg), isLongReg(reg));
> > }
> >
> > INLINE GenRegister unpacked_ub(const ir::Register ®) const {
> > @@ -3658,7 +3666,7 @@ namespace gbe
> > sel.F32TO16(unpacked, src);
> > sel.pop();
> > sel.MOV(dst, unpacked);
> > - } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
> > + } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {//convert i32 to small int
> > GenRegister unpacked;
> > if (dstFamily == FAMILY_WORD) {
> > const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> > @@ -3675,27 +3683,115 @@ namespace gbe
> > } else
> > unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> > }
> > - if(srcFamily == FAMILY_QWORD) {
> > +
> > + sel.push();
> > + if (sel.isScalarReg(insn.getSrc(0))) {
> > + sel.curr.execWidth = 1;
> > + sel.curr.predicate = GEN_PREDICATE_NONE;
> > + sel.curr.noMask = 1;
> > + }
> > + sel.MOV(unpacked, src);
> > + sel.pop();
> > +
> > + if (unpacked.reg() != dst.reg())
> > + sel.MOV(dst, unpacked);
> > + } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) { //convert i64 to i16
> > + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> > + GenRegister unpacked;
> > + if (!sel.isScalarReg(dst.reg())) {
> > + if (sel.hasLongType()) {
> > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > + } else {
> > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> > + }
> > + unpacked = GenRegister::retype(unpacked, type);
> > + } else {
> > + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> > + }
> > +
> > + if(!sel.hasLongType()) {
>
> You already remove (|| srcFamily == FAMILY_QWORD at Line 3658, why still
> do the following code which is to convert I64 source operand to I32?
> It looks incorrect for me. The following else branch should be put here
> unconditional.
>
I think here we are converting 64bits to 16bits, we first mov 64 bits
to 32bits and then mov it to 16bits.
I do not modify the origin logic here, but really we can optimize it.
> > GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> > tmp.type = GEN_TYPE_D;
> > sel.CONVI64_TO_I(tmp, src);
> > sel.MOV(unpacked, tmp);
> > } else {
> > sel.push();
> > - if (sel.isScalarReg(insn.getSrc(0))) {
> > - sel.curr.execWidth = 1;
> > - sel.curr.predicate = GEN_PREDICATE_NONE;
> > - sel.curr.noMask = 1;
> > - }
> > - sel.MOV(unpacked, src);
> > + if (sel.isScalarReg(insn.getSrc(0))) {
> > + sel.curr.execWidth = 1;
> > + sel.curr.predicate = GEN_PREDICATE_NONE;
> > + sel.curr.noMask = 1;
> > + }
> > + sel.MOV(unpacked, src);
> > sel.pop();
> > }
> > - if (unpacked.reg() != dst.reg())
> > +
> > + if (unpacked.reg() != dst.reg()) {
> > + sel.MOV(dst, unpacked);
> > + }
> > + } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { //convert i64 to i8
> > + GenRegister unpacked;
> > + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> > + if (!sel.isScalarReg(dst.reg())) {
> > + if (sel.hasLongType()) {
> > + /* When convert i64 to i8, the hstride should be 8, but the hstride do not
> > + support more than 4, so we need to split it to 2 steps. */
> > + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> > + } else {
> > + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> > + unpacked = GenRegister::retype(unpacked, type);
> > + }
> > + } else {
> > + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> > + }
> > +
> The logic seems broken for me, as the unpacked register which is computed above but is overrided
> in the following code before use it.
I think the unpacked here is used as the dst or temp result, what is the
problem?
> > + if(!sel.hasLongType()) {
> > + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> > + tmp.type = GEN_TYPE_D;
> > + sel.CONVI64_TO_I(tmp, src);
>
> > + sel.MOV(unpacked, tmp);
> > + } else {
> > + sel.push();
> > + if (sel.isScalarReg(insn.getSrc(0))) {
> > + sel.curr.execWidth = 1;
> > + sel.curr.predicate = GEN_PREDICATE_NONE;
> > + sel.curr.noMask = 1;
> > + }
> > + sel.MOV(unpacked, src);
> > + sel.pop();
> > + }
> > +
> > + if (unpacked.reg() != dst.reg()) {
> > sel.MOV(dst, unpacked);
> > + }
> > } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&
> > - (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64))
> > - sel.CONVI64_TO_I(dst, src);
> > - else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {
> > + (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// Convert i64 to i32
> > + if (sel.hasLongType()) {
> > + GenRegister unpacked;
> > + const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D;
> > + if (!sel.isScalarReg(dst.reg())) {
> > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> > + } else {
> > + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> > + }
> > +
> > + sel.push();
> > + if (sel.isScalarReg(insn.getSrc(0))) {
> > + sel.curr.execWidth = 1;
> > + sel.curr.predicate = GEN_PREDICATE_NONE;
> > + sel.curr.noMask = 1;
> > + }
> > + sel.MOV(unpacked, src);
> > + sel.pop();
> > +
> > + if (unpacked.reg() != dst.reg()) {
> > + sel.MOV(dst, unpacked);
> > + }
> > + } else {
> > + sel.CONVI64_TO_I(dst, src);
> > + }
> > + } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) { //i64 to float
> > auto dag = sel.regDAG[src.reg()];
> > // FIXME, in the future, we need to do a common I64 lower to I32 analysis
> > // at llvm IR layer which could cover more cases then just this one.
> > @@ -3729,37 +3825,70 @@ namespace gbe
> > }
> > }
> > }
> > - GenRegister tmp[6];
> > - for(int i=0; i<6; i++) {
> > - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > - }
> > - sel.push();
> > +
> > + if (!sel.hasLongType()) {
> > + GenRegister tmp[6];
> > + for(int i=0; i<6; i++) {
> > + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > + }
> > + sel.push();
> The following sel.curr.xxx between the push()/pop() pair should have 2 extra space indent
> > sel.curr.flag = 0;
> > sel.curr.subFlag = 1;
> > sel.CONVI64_TO_F(dst, src, tmp);
> > - sel.pop();
> > + sel.pop();
> > + } else {
> > + GenRegister unpacked;
> > + const uint32_t type = GEN_TYPE_F;
> > + if (!sel.isScalarReg(dst.reg())) {
> > + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> > + unpacked = GenRegister::retype(unpacked, type);
> > + } else {
> > + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> > + }
> > +
> > + sel.push();
> > + if (sel.isScalarReg(insn.getSrc(0))) {
> > + sel.curr.execWidth = 1;
> > + sel.curr.predicate = GEN_PREDICATE_NONE;
> > + sel.curr.noMask = 1;
> > + }
> > + sel.MOV(unpacked, src);
> > + sel.pop();
> > +
> > + if (unpacked.reg() != dst.reg()) {
> > + sel.MOV(dst, unpacked);
> > + }
> > + }
> > } else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
> > - (src.isdf() && dstType == ir::TYPE_FLOAT)) {
> > + (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and double conversion
> > ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> > sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
> > - } else if (dst.isint64()) {
> > + } else if (dst.isint64()) { // promote to i64
> > switch(src.type) {
> > case GEN_TYPE_F:
> > - {
> > - GenRegister tmp[2];
> > - tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > - tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> > - sel.push();
> > - sel.curr.flag = 0;
> > - sel.curr.subFlag = 1;
> > - sel.CONVF_TO_I64(dst, src, tmp);
> > - sel.pop();
> > - break;
> > - }
> > + {
> > + if (!sel.hasLongType()) {
> > + GenRegister tmp[2];
> > + tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> > + tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> > + sel.push();
> need fix sel.xxx indent between push()/pop() pair.
> > + sel.curr.flag = 0;
> > + sel.curr.subFlag = 1;
> > + sel.CONVF_TO_I64(dst, src, tmp);
> > + sel.pop();
> > + } else {
> > + sel.MOV(dst, src);
> > + }
> > + break;
> > + }
> > case GEN_TYPE_DF:
> > NOT_IMPLEMENTED;
> > default:
> > - sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> > + if (sel.hasLongType()) {
> > + sel.MOV(dst, src);
> > + } else {
> > + sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> > + }
> > }
> > } else
> > sel.MOV(dst, src);
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list