[Beignet] [PATCH 16/27] Modify the convert logic in gen selection.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Jan 6 02:01:54 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
The conversion logic is too complicated.
We split it more clearly for each case.
Notice: For I64 to I8, the conversion can not be completed
within one step because of the hardware hstride restriction.
So we need to convert it to i32 and than convert it to i8.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen8_context.cpp | 8 +-
backend/src/backend/gen_insn_selection.cpp | 195 ++++++++++++++++++++++++-----
2 files changed, 168 insertions(+), 35 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index cffb10d..18a3425 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -55,7 +55,9 @@ namespace gbe
{
switch (insn.opcode) {
case SEL_OP_CONVI64_TO_I:
-
+ /* Should never come to here, just use the common OPCODE. */
+ GBE_ASSERT(0);
+ break;
default:
GenContext::emitUnaryInstruction(insn);
}
@@ -65,7 +67,9 @@ namespace gbe
{
switch (insn.opcode) {
case SEL_OP_CONVI_TO_I64:
-
+ /* Should never come to here, just use the common OPCODE. */
+ GBE_ASSERT(0);
+ break;
default:
GenContext::emitUnaryWithTempInstruction(insn);
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b6a13bf..60f45f7 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -349,9 +349,17 @@ namespace gbe
const ir::RegisterData ®Data = getRegisterData(reg);
return regData.isUniform();
}
+ INLINE bool isLongReg(const ir::Register ®) const {
+ const ir::RegisterData ®Data = getRegisterData(reg);
+ return regData.family == ir::FAMILY_QWORD;
+ }
+
+ INLINE GenRegister unpacked_ud(const ir::Register ®) const {
+ return GenRegister::unpacked_ud(reg, isScalarReg(reg));
+ }
INLINE GenRegister unpacked_uw(const ir::Register ®) const {
- return GenRegister::unpacked_uw(reg, isScalarReg(reg));
+ return GenRegister::unpacked_uw(reg, isScalarReg(reg), isLongReg(reg));
}
INLINE GenRegister unpacked_ub(const ir::Register ®) const {
@@ -3658,7 +3666,7 @@ namespace gbe
sel.F32TO16(unpacked, src);
sel.pop();
sel.MOV(dst, unpacked);
- } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
+ } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {//convert i32 to small int
GenRegister unpacked;
if (dstFamily == FAMILY_WORD) {
const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
@@ -3675,27 +3683,115 @@ namespace gbe
} else
unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
}
- if(srcFamily == FAMILY_QWORD) {
+
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+
+ if (unpacked.reg() != dst.reg())
+ sel.MOV(dst, unpacked);
+ } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) { //convert i64 to i16
+ const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
+ GenRegister unpacked;
+ if (!sel.isScalarReg(dst.reg())) {
+ if (sel.hasLongType()) {
+ unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ } else {
+ unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
+ }
+ unpacked = GenRegister::retype(unpacked, type);
+ } else {
+ unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
+ }
+
+ if(!sel.hasLongType()) {
GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
tmp.type = GEN_TYPE_D;
sel.CONVI64_TO_I(tmp, src);
sel.MOV(unpacked, tmp);
} else {
sel.push();
- if (sel.isScalarReg(insn.getSrc(0))) {
- sel.curr.execWidth = 1;
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.noMask = 1;
- }
- sel.MOV(unpacked, src);
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
sel.pop();
}
- if (unpacked.reg() != dst.reg())
+
+ if (unpacked.reg() != dst.reg()) {
+ sel.MOV(dst, unpacked);
+ }
+ } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { //convert i64 to i8
+ GenRegister unpacked;
+ const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
+ if (!sel.isScalarReg(dst.reg())) {
+ if (sel.hasLongType()) {
+ /* When convert i64 to i8, the hstride should be 8, but the hstride do not
+ support more than 4, so we need to split it to 2 steps. */
+ unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
+ } else {
+ unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, type);
+ }
+ } else {
+ unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
+ }
+
+ if(!sel.hasLongType()) {
+ GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp.type = GEN_TYPE_D;
+ sel.CONVI64_TO_I(tmp, src);
+ sel.MOV(unpacked, tmp);
+ } else {
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+ }
+
+ if (unpacked.reg() != dst.reg()) {
sel.MOV(dst, unpacked);
+ }
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&
- (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64))
- sel.CONVI64_TO_I(dst, src);
- else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {
+ (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// Convert i64 to i32
+ if (sel.hasLongType()) {
+ GenRegister unpacked;
+ const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D;
+ if (!sel.isScalarReg(dst.reg())) {
+ unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
+ } else {
+ unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
+ }
+
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+
+ if (unpacked.reg() != dst.reg()) {
+ sel.MOV(dst, unpacked);
+ }
+ } else {
+ sel.CONVI64_TO_I(dst, src);
+ }
+ } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) { //i64 to float
auto dag = sel.regDAG[src.reg()];
// FIXME, in the future, we need to do a common I64 lower to I32 analysis
// at llvm IR layer which could cover more cases then just this one.
@@ -3729,37 +3825,70 @@ namespace gbe
}
}
}
- GenRegister tmp[6];
- for(int i=0; i<6; i++) {
- tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
- }
- sel.push();
+
+ if (!sel.hasLongType()) {
+ GenRegister tmp[6];
+ for(int i=0; i<6; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ }
+ sel.push();
sel.curr.flag = 0;
sel.curr.subFlag = 1;
sel.CONVI64_TO_F(dst, src, tmp);
- sel.pop();
+ sel.pop();
+ } else {
+ GenRegister unpacked;
+ const uint32_t type = GEN_TYPE_F;
+ if (!sel.isScalarReg(dst.reg())) {
+ unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
+ unpacked = GenRegister::retype(unpacked, type);
+ } else {
+ unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
+ }
+
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+
+ if (unpacked.reg() != dst.reg()) {
+ sel.MOV(dst, unpacked);
+ }
+ }
} else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
- (src.isdf() && dstType == ir::TYPE_FLOAT)) {
+ (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and double conversion
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
- } else if (dst.isint64()) {
+ } else if (dst.isint64()) { // promote to i64
switch(src.type) {
case GEN_TYPE_F:
- {
- GenRegister tmp[2];
- tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
- tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
- sel.push();
- sel.curr.flag = 0;
- sel.curr.subFlag = 1;
- sel.CONVF_TO_I64(dst, src, tmp);
- sel.pop();
- break;
- }
+ {
+ if (!sel.hasLongType()) {
+ GenRegister tmp[2];
+ tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
+ sel.push();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CONVF_TO_I64(dst, src, tmp);
+ sel.pop();
+ } else {
+ sel.MOV(dst, src);
+ }
+ break;
+ }
case GEN_TYPE_DF:
NOT_IMPLEMENTED;
default:
- sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
+ if (sel.hasLongType()) {
+ sel.MOV(dst, src);
+ } else {
+ sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
+ }
}
} else
sel.MOV(dst, src);
--
1.9.1
More information about the Beignet
mailing list