[Beignet] [PATCH 16/27] Modify the convert logic in gen selection.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Jan 7 21:14:27 PST 2015
On Tue, Jan 06, 2015 at 06:01:54PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> The conversion logic is too complicated.
> We split it more clearly for each case.
> Notice: For I64 to I8, the conversion can not be completed
> within one step because of the hardware hstride restriction.
> So we need to convert it to i32 and than convert it to i8.
typo here, should be then.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/backend/gen8_context.cpp | 8 +-
> backend/src/backend/gen_insn_selection.cpp | 195 ++++++++++++++++++++++++-----
> 2 files changed, 168 insertions(+), 35 deletions(-)
>
> diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
> index cffb10d..18a3425 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -55,7 +55,9 @@ namespace gbe
> {
> switch (insn.opcode) {
> case SEL_OP_CONVI64_TO_I:
> -
> + /* Should never come to here, just use the common OPCODE. */
> + GBE_ASSERT(0);
> + break;
> default:
> GenContext::emitUnaryInstruction(insn);
> }
> @@ -65,7 +67,9 @@ namespace gbe
> {
> switch (insn.opcode) {
> case SEL_OP_CONVI_TO_I64:
> -
> + /* Should never come to here, just use the common OPCODE. */
> + GBE_ASSERT(0);
> + break;
> default:
> GenContext::emitUnaryWithTempInstruction(insn);
> }
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index b6a13bf..60f45f7 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -349,9 +349,17 @@ namespace gbe
> const ir::RegisterData ®Data = getRegisterData(reg);
> return regData.isUniform();
> }
> + INLINE bool isLongReg(const ir::Register ®) const {
> + const ir::RegisterData ®Data = getRegisterData(reg);
> + return regData.family == ir::FAMILY_QWORD;
> + }
> +
> + INLINE GenRegister unpacked_ud(const ir::Register ®) const {
> + return GenRegister::unpacked_ud(reg, isScalarReg(reg));
> + }
>
> INLINE GenRegister unpacked_uw(const ir::Register ®) const {
> - return GenRegister::unpacked_uw(reg, isScalarReg(reg));
> + return GenRegister::unpacked_uw(reg, isScalarReg(reg), isLongReg(reg));
> }
>
> INLINE GenRegister unpacked_ub(const ir::Register ®) const {
> @@ -3658,7 +3666,7 @@ namespace gbe
> sel.F32TO16(unpacked, src);
> sel.pop();
> sel.MOV(dst, unpacked);
> - } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
> + } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {//convert i32 to small int
> GenRegister unpacked;
> if (dstFamily == FAMILY_WORD) {
> const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> @@ -3675,27 +3683,115 @@ namespace gbe
> } else
> unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> }
> - if(srcFamily == FAMILY_QWORD) {
> +
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
> +
> + if (unpacked.reg() != dst.reg())
> + sel.MOV(dst, unpacked);
> + } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) { //convert i64 to i16
> + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> + GenRegister unpacked;
> + if (!sel.isScalarReg(dst.reg())) {
> + if (sel.hasLongType()) {
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + } else {
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + }
> + unpacked = GenRegister::retype(unpacked, type);
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> + }
> +
> + if(!sel.hasLongType()) {
You already remove (|| srcFamily == FAMILY_QWORD at Line 3658, why still
do the following code which is to convert I64 source operand to I32?
It looks incorrect for me. The following else branch should be put here
unconditional.
> GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> tmp.type = GEN_TYPE_D;
> sel.CONVI64_TO_I(tmp, src);
> sel.MOV(unpacked, tmp);
> } else {
> sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.MOV(unpacked, src);
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> sel.pop();
> }
> - if (unpacked.reg() != dst.reg())
> +
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { //convert i64 to i8
> + GenRegister unpacked;
> + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> + if (!sel.isScalarReg(dst.reg())) {
> + if (sel.hasLongType()) {
> + /* When convert i64 to i8, the hstride should be 8, but the hstride do not
> + support more than 4, so we need to split it to 2 steps. */
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> + } else {
> + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, type);
> + }
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> + }
> +
The logic seems broken for me, as the unpacked register which is computed above but is overrided
in the following code before use it.
> + if(!sel.hasLongType()) {
> + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> + tmp.type = GEN_TYPE_D;
> + sel.CONVI64_TO_I(tmp, src);
> + sel.MOV(unpacked, tmp);
> + } else {
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
> + }
> +
> + if (unpacked.reg() != dst.reg()) {
> sel.MOV(dst, unpacked);
> + }
> } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&
> - (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64))
> - sel.CONVI64_TO_I(dst, src);
> - else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {
> + (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// Convert i64 to i32
> + if (sel.hasLongType()) {
> + GenRegister unpacked;
> + const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D;
> + if (!sel.isScalarReg(dst.reg())) {
> + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> + }
> +
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
> +
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + } else {
> + sel.CONVI64_TO_I(dst, src);
> + }
> + } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) { //i64 to float
> auto dag = sel.regDAG[src.reg()];
> // FIXME, in the future, we need to do a common I64 lower to I32 analysis
> // at llvm IR layer which could cover more cases then just this one.
> @@ -3729,37 +3825,70 @@ namespace gbe
> }
> }
> }
> - GenRegister tmp[6];
> - for(int i=0; i<6; i++) {
> - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> - }
> - sel.push();
> +
> + if (!sel.hasLongType()) {
> + GenRegister tmp[6];
> + for(int i=0; i<6; i++) {
> + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> + }
> + sel.push();
The following sel.curr.xxx between the push()/pop() pair should have 2 extra space indent
> sel.curr.flag = 0;
> sel.curr.subFlag = 1;
> sel.CONVI64_TO_F(dst, src, tmp);
> - sel.pop();
> + sel.pop();
> + } else {
> + GenRegister unpacked;
> + const uint32_t type = GEN_TYPE_F;
> + if (!sel.isScalarReg(dst.reg())) {
> + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, type);
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> + }
> +
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
> +
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + }
> } else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
> - (src.isdf() && dstType == ir::TYPE_FLOAT)) {
> + (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and double conversion
> ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
> - } else if (dst.isint64()) {
> + } else if (dst.isint64()) { // promote to i64
> switch(src.type) {
> case GEN_TYPE_F:
> - {
> - GenRegister tmp[2];
> - tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> - tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> - sel.push();
> - sel.curr.flag = 0;
> - sel.curr.subFlag = 1;
> - sel.CONVF_TO_I64(dst, src, tmp);
> - sel.pop();
> - break;
> - }
> + {
> + if (!sel.hasLongType()) {
> + GenRegister tmp[2];
> + tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> + tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> + sel.push();
need fix sel.xxx indent between push()/pop() pair.
> + sel.curr.flag = 0;
> + sel.curr.subFlag = 1;
> + sel.CONVF_TO_I64(dst, src, tmp);
> + sel.pop();
> + } else {
> + sel.MOV(dst, src);
> + }
> + break;
> + }
> case GEN_TYPE_DF:
> NOT_IMPLEMENTED;
> default:
> - sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> + if (sel.hasLongType()) {
> + sel.MOV(dst, src);
> + } else {
> + sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> + }
> }
> } else
> sel.MOV(dst, src);
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list