[Beignet] [PATCH] Backend: Refine ConvertInstruction logic in insn_selection
He Junyan
junyan.he at inbox.com
Tue Oct 13 23:49:24 PDT 2015
Ping for review.
On Tue, Sep 22, 2015 at 06:29:23PM +0800, junyan.he at inbox.com wrote:
> Date: Tue, 22 Sep 2015 18:29:23 +0800
> From: junyan.he at inbox.com
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH] Backend: Refine ConvertInstruction logic in
> insn_selection
> X-Mailer: git-send-email 1.7.9.5
>
> From: Junyan He <junyan.he at linux.intel.com>
>
> The ConvertInstruction now need to handle a lot of special
> cases instead of simple MOV. The judgement of native long
> support, half support and reg restriction of long type and
> the situation very complicated. The current code logic is
> too verbose and hard to read. We now use sub routine functions
> to make it clear and readable.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/backend/gen_insn_selection.cpp | 780 +++++++++++++++++-----------
> 1 file changed, 475 insertions(+), 305 deletions(-)
>
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index ab00269..4800f7f 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -4124,148 +4124,132 @@ namespace gbe
> return false;
> }
>
> - INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + INLINE void convertBetweenHalfFloat(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> {
> using namespace ir;
> const Type dstType = insn.getDstType();
> const Type srcType = insn.getSrcType();
> - const RegisterFamily dstFamily = getFamily(dstType);
> - const RegisterFamily srcFamily = getFamily(srcType);
> const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> const Opcode opcode = insn.getOpcode();
> - sel.push();
> - if (sel.isScalarReg(insn.getDst(0)) == true) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - if(opcode == ir::OP_SAT_CVT)
> - sel.curr.saturate = 1;
>
> - // We need two instructions to make the conversion
> if (opcode == OP_F16TO32) {
> sel.F16TO32(dst, src);
> } else if (opcode == OP_F32TO16) {
> + // We need two instructions to make the conversion
> GenRegister unpacked;
> unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.F32TO16(unpacked, src);
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.F32TO16(unpacked, src);
> sel.pop();
> sel.MOV(dst, unpacked);
> - } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {//convert i32 to small int and half
> - GenRegister unpacked;
> - if (dstFamily == FAMILY_WORD) {
> - uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> -
> - /* The special case, when dst is half, float->word->half will lose accuracy. */
> - if (dstType == TYPE_HALF) {
> - GBE_ASSERT(sel.hasHalfType());
> - type = GEN_TYPE_HF;
> - }
> + } else {
> + GBE_ASSERT("Not conversion between float and half\n");
> + }
> + }
>
> - if (!sel.isScalarReg(dst.reg())) {
> - unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, type);
> - } else
> - unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> - } else {
> - const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> - if (!sel.isScalarReg(dst.reg())) {
> - unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, type);
> - } else
> - unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> - }
> + INLINE void convert32bitsToSmall(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + GenRegister unpacked;
> + const RegisterFamily dstFamily = getFamily(dstType);
>
> - sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.MOV(unpacked, src);
> - sel.pop();
> + if (dstFamily == FAMILY_WORD) {
> + uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
>
> - if (unpacked.reg() != dst.reg())
> - sel.MOV(dst, unpacked);
> - } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD) { //convert i64 to i16 and half.
> + /* The special case, when dst is half, float->word->half will lose accuracy. */
> if (dstType == TYPE_HALF) {
> - /* There is no MOV for Long <---> Half. So Long-->Float-->half. */
> - GBE_ASSERT(sel.hasLongType());
> GBE_ASSERT(sel.hasHalfType());
> - sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> + type = GEN_TYPE_HF;
> + }
>
> - GenRegister funpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - funpacked = GenRegister::retype(funpacked, GEN_TYPE_F);
> - sel.MOV(funpacked, src);
> - GenRegister ftmp = sel.selReg(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - ftmp = GenRegister::retype(ftmp, GEN_TYPE_F);
> - sel.MOV(ftmp, funpacked);
> - GenRegister unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, GEN_TYPE_HF);
> - sel.MOV(unpacked, ftmp);
> - sel.pop();
> - sel.MOV(dst, unpacked);
> - } else {
> - uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> + if (!sel.isScalarReg(dst.reg())) {
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, type);
> + } else
> + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> + } else {
> + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> + if (!sel.isScalarReg(dst.reg())) {
> + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, type);
> + } else
> + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> + }
>
> - GenRegister unpacked;
> - if (!sel.isScalarReg(dst.reg())) {
> - if (sel.hasLongType()) {
> - unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - } else {
> - unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - }
> - unpacked = GenRegister::retype(unpacked, type);
> - } else {
> - unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> - }
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
>
> - if(!sel.hasLongType()) {
> - GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> - tmp.type = GEN_TYPE_D;
> - sel.CONVI64_TO_I(tmp, src);
> - sel.MOV(unpacked, tmp);
> - } else {
> - sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.MOV(unpacked, src);
> - sel.pop();
> - }
> + if (unpacked.reg() != dst.reg())
> + sel.MOV(dst, unpacked);
> + }
>
> - if (unpacked.reg() != dst.reg()) {
> - sel.MOV(dst, unpacked);
> - }
> + INLINE void convertI64To16bits(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> +
> + if (dstType == TYPE_HALF) {
> + /* There is no MOV for Long <---> Half. So Long-->Float-->half. */
> + GBE_ASSERT(sel.hasLongType());
> + GBE_ASSERT(sel.hasHalfType());
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> }
> - } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD) { //convert i64 to i8
> - GenRegister unpacked;
> - const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
>
> - if (sel.hasLongType()) { // handle the native long logic.
> - if (!sel.isScalarReg(dst.reg())) {
> - /* When convert i64 to i8, the hstride should be 8, but the hstride do not
> - support more than 4, so we need to split it to 2 steps. */
> + GenRegister funpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + funpacked = GenRegister::retype(funpacked, GEN_TYPE_F);
> + sel.MOV(funpacked, src);
> + GenRegister ftmp = sel.selReg(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + ftmp = GenRegister::retype(ftmp, GEN_TYPE_F);
> + sel.MOV(ftmp, funpacked);
> + GenRegister unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, GEN_TYPE_HF);
> + sel.MOV(unpacked, ftmp);
> + sel.pop();
> + sel.MOV(dst, unpacked);
> + } else {
> + uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> +
> + GenRegister unpacked;
> + if (!sel.isScalarReg(dst.reg())) {
> + if (sel.hasLongType()) {
> unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> } else {
> - unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> }
> + unpacked = GenRegister::retype(unpacked, type);
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_uw(dst.reg()), type);
> + }
>
> + if(!sel.hasLongType()) {
> + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> + tmp.type = GEN_TYPE_D;
> + sel.CONVI64_TO_I(tmp, src);
> + sel.MOV(unpacked, tmp);
> + } else {
> sel.push();
> if (sel.isScalarReg(insn.getSrc(0))) {
> sel.curr.execWidth = 1;
> @@ -4274,229 +4258,263 @@ namespace gbe
> }
> sel.MOV(unpacked, src);
> sel.pop();
> + }
>
> - if (unpacked.reg() != dst.reg()) {
> - sel.MOV(dst, unpacked);
> - }
> - } else { // Do not have native long
> - if (!sel.isScalarReg(dst.reg())) {
> - unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, type);
> - } else {
> - unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> - }
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + }
> + }
>
> - GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> - tmp.type = GEN_TYPE_D;
> - sel.CONVI64_TO_I(tmp, src);
> - sel.MOV(unpacked, tmp);
> + INLINE void convertI64ToI8(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + GenRegister unpacked;
> + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> +
> + if (sel.hasLongType()) { // handle the native long logic.
> + if (!sel.isScalarReg(dst.reg())) {
> + /* When convert i64 to i8, the hstride should be 8, but the hstride do not
> + support more than 4, so we need to split it to 2 steps. */
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> + }
>
> - if (unpacked.reg() != dst.reg()) {
> - sel.MOV(dst, unpacked);
> - }
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> }
> - } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&
> - (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// Convert i64 to i32
> - if (sel.hasLongType()) {
> - GenRegister unpacked;
> - const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D;
> - if (!sel.isScalarReg(dst.reg())) {
> - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> - } else {
> - unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> - }
> + sel.MOV(unpacked, src);
> + sel.pop();
>
> - sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.MOV(unpacked, src);
> - sel.pop();
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + } else { // Do not have native long
> + if (!sel.isScalarReg(dst.reg())) {
> + unpacked = sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, type);
> + } else {
> + unpacked = GenRegister::retype(sel.unpacked_ub(dst.reg()), type);
> + }
>
> - if (unpacked.reg() != dst.reg()) {
> - sel.MOV(dst, unpacked);
> - }
> + GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
> + tmp.type = GEN_TYPE_D;
> + sel.CONVI64_TO_I(tmp, src);
> + sel.MOV(unpacked, tmp);
> +
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + }
> + }
> +
> + INLINE void convertI64ToI32(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + if (sel.hasLongType()) {
> + GenRegister unpacked;
> + const uint32_t type = dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D;
> + if (!sel.isScalarReg(dst.reg())) {
> + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, dstType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> } else {
> - sel.CONVI64_TO_I(dst, src);
> + unpacked = GenRegister::retype(sel.unpacked_ud(dst.reg()), type);
> }
> - } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) { //i64 to float
> - auto dag = sel.regDAG[src.reg()];
> - // FIXME, in the future, we need to do a common I64 lower to I32 analysis
> - // at llvm IR layer which could cover more cases then just this one.
> - SelectionDAG *dag0, *dag1;
> - if (dag && dag->child[0] && dag->child[1]) {
> - if (dag->child[0]->insn.getOpcode() == OP_LOADI) {
> - dag0 = dag->child[1];
> - dag1 = dag->child[0];
> - } else {
> - dag0 = dag->child[0];
> - dag1 = dag->child[1];
> - }
> - GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI &&
> - dag->child[1]->insn.getOpcode() == OP_LOADI));
> - if (dag->insn.getOpcode() == OP_AND ||
> - dag->insn.getOpcode() == OP_OR ||
> - dag->insn.getOpcode() == OP_XOR) {
> - GenRegister src0;
> - GenRegister src1;
> - if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) &&
> - lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) {
> - switch (dag->insn.getOpcode()) {
> - default:
> - case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> - case OP_OR: sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> - case OP_XOR: sel.XOR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> - }
> - sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD));
> - markChildren = false;
> - return true;
> +
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
> +
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> + }
> + } else {
> + sel.CONVI64_TO_I(dst, src);
> + }
> + }
> +
> + INLINE void convertI64ToFloat(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + auto dag = sel.regDAG[src.reg()];
> +
> + // FIXME, in the future, we need to do a common I64 lower to I32 analysis
> + // at llvm IR layer which could cover more cases then just this one.
> + SelectionDAG *dag0, *dag1;
> + if (dag && dag->child[0] && dag->child[1]) {
> + if (dag->child[0]->insn.getOpcode() == OP_LOADI) {
> + dag0 = dag->child[1];
> + dag1 = dag->child[0];
> + } else {
> + dag0 = dag->child[0];
> + dag1 = dag->child[1];
> + }
> + GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI &&
> + dag->child[1]->insn.getOpcode() == OP_LOADI));
> + if (dag->insn.getOpcode() == OP_AND ||
> + dag->insn.getOpcode() == OP_OR ||
> + dag->insn.getOpcode() == OP_XOR) {
> + GenRegister src0;
> + GenRegister src1;
> + if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) &&
> + lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) {
> + switch (dag->insn.getOpcode()) {
> + default:
> + case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> + case OP_OR: sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> + case OP_XOR: sel.XOR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
> }
> + sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD));
> + markChildren = false;
> + return;
> }
> }
> + }
>
> - if (!sel.hasLongType()) {
> - GenRegister tmp[6];
> - for(int i=0; i<6; i++) {
> - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> - }
> - sel.push();
> - sel.curr.flag = 0;
> - sel.curr.subFlag = 1;
> - sel.CONVI64_TO_F(dst, src, tmp);
> - sel.pop();
> - } else {
> - GenRegister unpacked;
> - const uint32_t type = GEN_TYPE_F;
> - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, type);
> + if (!sel.hasLongType()) {
> + GenRegister tmp[6];
> + for(int i=0; i<6; i++) {
> + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> + }
> + sel.push();
> + sel.curr.flag = 0;
> + sel.curr.subFlag = 1;
> + sel.CONVI64_TO_F(dst, src, tmp);
> + sel.pop();
> + } else {
> + GenRegister unpacked;
> + const uint32_t type = GEN_TYPE_F;
> + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, type);
>
> - sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.MOV(unpacked, src);
> - sel.pop();
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
>
> - if (unpacked.reg() != dst.reg()) {
> - sel.MOV(dst, unpacked);
> - }
> + if (unpacked.reg() != dst.reg()) {
> + sel.MOV(dst, unpacked);
> }
> - } else if (sel.hasLongType() && sel.hasLongRegRestrict() && dstFamily == FAMILY_QWORD && srcFamily != FAMILY_QWORD) {
> - // Convert i32/i16/i8/float to i64/double if hasLongRegRestrict(src and dst hstride must be aligned to the same qword).
> + }
> + }
> +
> + INLINE void convertSmallIntsToI64(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + const RegisterFamily srcFamily = getFamily(srcType);
> +
> + if (sel.hasLongType() && sel.hasLongRegRestrict()) {
> + // Convert i32/i16/i8 to i64 if hasLongRegRestrict(src and dst hstride must be aligned to the same qword).
> GenRegister unpacked;
> GenRegister unpacked_src = src;
>
> sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
>
> - if (srcType == ir::TYPE_FLOAT) {
> - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, GEN_TYPE_F);
> - } else if(srcFamily == FAMILY_DWORD) {
> - unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, srcType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> - } else if(srcFamily == FAMILY_WORD) {
> - unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, srcType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W);
> - } else if(srcFamily == FAMILY_BYTE) {
> - GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, sel.isScalarReg(insn.getSrc(0))));
> - tmp = GenRegister::retype(tmp, srcType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> - unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> - unpacked = GenRegister::retype(unpacked, srcType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> - sel.MOV(tmp, src);
> - unpacked_src = tmp;
> - } else
> - GBE_ASSERT(0);
> + if(srcFamily == FAMILY_DWORD) {
> + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, srcType == TYPE_U32 ? GEN_TYPE_UD : GEN_TYPE_D);
> + } else if(srcFamily == FAMILY_WORD) {
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, srcType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W);
> + } else if(srcFamily == FAMILY_BYTE) {
> + GenRegister tmp = sel.selReg(sel.reg(FAMILY_WORD, sel.isScalarReg(insn.getSrc(0))));
> + tmp = GenRegister::retype(tmp, srcType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> + unpacked = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, srcType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W);
> + sel.MOV(tmp, src);
> + unpacked_src = tmp;
> + } else
> + GBE_ASSERT(0);
>
> - sel.MOV(unpacked, unpacked_src);
> + sel.MOV(unpacked, unpacked_src);
> sel.pop();
> sel.MOV(dst, unpacked);
> - }else if ((dst.isdf() && srcType == ir::TYPE_FLOAT) ||
> - (src.isdf() && dstType == ir::TYPE_FLOAT)) { // float and double conversion
> - ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> - sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
> - } else if (dst.isint64()) { // promote to i64
> - switch(src.type) {
> - case GEN_TYPE_F:
> - {
> - if (!sel.hasLongType()) {
> - GenRegister tmp[2];
> - tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> - tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> - sel.push();
> - sel.curr.flag = 0;
> - sel.curr.subFlag = 1;
> - sel.CONVF_TO_I64(dst, src, tmp);
> - sel.pop();
> - } else {
> - sel.MOV(dst, src);
> - }
> - break;
> - }
> - case GEN_TYPE_HF:
> - {
> - GBE_ASSERT(sel.hasLongType());
> - GBE_ASSERT(sel.hasHalfType());
> - uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D;
> - GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type);
> - sel.push();
> - if (sel.isScalarReg(insn.getSrc(0))) {
> - sel.curr.execWidth = 1;
> - sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.curr.noMask = 1;
> - }
> - sel.MOV(tmp, src);
> - sel.pop();
> - sel.MOV(dst, tmp);
> - break;
> - }
> - case GEN_TYPE_DF:
> - NOT_IMPLEMENTED;
> - default:
> - if (sel.hasLongType()) {
> - sel.MOV(dst, src);
> - } else {
> - sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> - }
> - }
> - } else if (srcType == ir::TYPE_HALF && (dstFamily == FAMILY_BYTE || dstFamily == FAMILY_WORD)) {
> - // Special case, half -> char/short.
> - /* [DevBDW+]: Format conversion to or from HF (Half Float) must be DWord-aligned and
> - strided by a DWord on the destination. */
> - GBE_ASSERT(sel.hasHalfType());
> - GenRegister tmp;
> + } else if (sel.hasLongType()) {
> + sel.MOV(dst, src);
> + } else {
> + sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> + }
> + }
> +
> + INLINE void convertFToI64(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> +
> + if (sel.hasLongType() && sel.hasLongRegRestrict() && srcType == ir::TYPE_FLOAT) { // typical bsw float->long case
> + // Convert float to i64 if hasLongRegRestrict(src and dst hstride must be aligned to the same qword).
> + GenRegister unpacked;
> + GenRegister unpacked_src = src;
> +
> sel.push();
> if (sel.isScalarReg(insn.getSrc(0))) {
> sel.curr.execWidth = 1;
> sel.curr.predicate = GEN_PREDICATE_NONE;
> sel.curr.noMask = 1;
> }
> - if (dstFamily == FAMILY_BYTE) {
> - const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> - tmp = GenRegister::retype(sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), type);
> - sel.MOV(tmp, src);
> - } else {
> - const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> - tmp = GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), type);
> - sel.MOV(tmp, src);
> - }
> +
> + unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0))));
> + unpacked = GenRegister::retype(unpacked, GEN_TYPE_F);
> + sel.MOV(unpacked, unpacked_src);
> sel.pop();
> - sel.MOV(dst, tmp);
> - } else if (dstType == ir::TYPE_HALF && (srcFamily == FAMILY_BYTE || srcFamily == FAMILY_WORD)) {
> - // Special case, char/uchar -> half
> - /* [DevBDW+]: Format conversion to or from HF (Half Float) must be DWord-aligned and
> - strided by a DWord on the destination. */
> + sel.MOV(dst, unpacked);
> + } else if (srcType == ir::TYPE_FLOAT) {
> + if (sel.hasLongType()) { // typical bdw float->long case
> + sel.MOV(dst, src);
> + } else { // typical old platform float->long case
> + GenRegister tmp[2];
> + tmp[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
> + tmp[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_FLOAT);
> + sel.push();
> + sel.curr.flag = 0;
> + sel.curr.subFlag = 1;
> + sel.CONVF_TO_I64(dst, src, tmp);
> + sel.pop();
> + }
> + } else if (srcType == ir::TYPE_HALF) { // TODO: We may consider bsw's hasLongRegRestrict case here.
> + /* No need to consider old platform. if we support half, we must have native long. */
> + GBE_ASSERT(sel.hasLongType());
> GBE_ASSERT(sel.hasHalfType());
> - GenRegister tmp = GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), GEN_TYPE_HF);
> + uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D;
> + GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type);
> sel.push();
> if (sel.isScalarReg(insn.getSrc(0))) {
> sel.curr.execWidth = 1;
> @@ -4506,11 +4524,163 @@ namespace gbe
> sel.MOV(tmp, src);
> sel.pop();
> sel.MOV(dst, tmp);
> - } else
> - sel.MOV(dst, src);
> + } else if (src.type == GEN_TYPE_DF) {
> + //TODO:
> + GBE_ASSERT(0);
> + } else {
> + /* Invalid case. */
> + GBE_ASSERT(0);
> + }
> + }
> +
> + INLINE void convertBetweenFloatDouble(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> +
> +
> + //TODO:
> + ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> + sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
> + }
> +
> + INLINE void convertBetweenHalfDouble(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
>
> + //TODO:
> + ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> + sel.MOV_DF(dst, src, sel.selReg(r, TYPE_U64));
> + }
> +
> + INLINE void convertHalfToSmallInts(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + const RegisterFamily dstFamily = getFamily(dstType);
> +
> + // Special case, half -> char/short.
> + /* [DevBDW+]: Format conversion to or from HF (Half Float) must be DWord-aligned and
> + strided by a DWord on the destination. */
> + GBE_ASSERT(sel.hasHalfType());
> + GenRegister tmp;
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + if (dstFamily == FAMILY_BYTE) {
> + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> + tmp = GenRegister::retype(sel.unpacked_ub(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), type);
> + sel.MOV(tmp, src);
> + } else {
> + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> + tmp = GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), type);
> + sel.MOV(tmp, src);
> + }
> sel.pop();
> + sel.MOV(dst, tmp);
> + }
>
> + INLINE void convertSmallIntsToHalf(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> +
> + // Special case, char/uchar -> half
> + /* [DevBDW+]: Format conversion to or from HF (Half Float) must be DWord-aligned and
> + strided by a DWord on the destination. */
> + GBE_ASSERT(sel.hasHalfType());
> + GenRegister tmp = GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), GEN_TYPE_HF);
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(tmp, src);
> + sel.pop();
> + sel.MOV(dst, tmp);
> + }
> +
> + INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
> + {
> + using namespace ir;
> + const Type dstType = insn.getDstType();
> + const Type srcType = insn.getSrcType();
> + const RegisterFamily dstFamily = getFamily(dstType);
> + const RegisterFamily srcFamily = getFamily(srcType);
> + const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
> + const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> + const Opcode opcode = insn.getOpcode();
> + sel.push();
> + if (sel.isScalarReg(insn.getDst(0)) == true) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + if(opcode == ir::OP_SAT_CVT)
> + sel.curr.saturate = 1;
> +
> + if (opcode == OP_F16TO32 || opcode == OP_F32TO16) { /* Conversion between float and half. */
> + convertBetweenHalfFloat(sel, insn, markChildren);
> + } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {
> + //convert i32/float to small int/half
> + convert32bitsToSmall(sel, insn, markChildren);
> + } else if (dstFamily == FAMILY_WORD && srcFamily == FAMILY_QWORD && srcType != ir::TYPE_DOUBLE) {
> + //convert i64 to i16 and half.
> + convertI64To16bits(sel, insn, markChildren);
> + } else if (dstFamily == FAMILY_BYTE && srcFamily == FAMILY_QWORD && srcType != ir::TYPE_DOUBLE) {
> + //convert i64 to i8
> + convertI64ToI8(sel, insn, markChildren);
> + } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) &&
> + (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {// Convert i64 to i32
> + convertI64ToI32(sel, insn, markChildren);
> + } else if (dstType == ir::TYPE_FLOAT && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {
> + convertI64ToFloat(sel, insn, markChildren);
> + } else if (dstType == ir::TYPE_DOUBLE && (srcType == ir::TYPE_U64 || srcType == ir::TYPE_S64)) {
> + // TODO: long -> double
> + GBE_ASSERT(0);
> + } else if ((dstType == ir::TYPE_U64 || dstType == ir::TYPE_S64)
> + && (srcFamily != FAMILY_QWORD && srcType != ir::TYPE_FLOAT && srcType != ir::TYPE_HALF)) {
> + convertSmallIntsToI64(sel, insn, markChildren);
> + } else if ((dstType == ir::TYPE_U64 || dstType == ir::TYPE_S64)
> + && (srcType == ir::TYPE_FLOAT || srcType == ir::TYPE_HALF || srcType == ir::TYPE_DOUBLE)) {
> + convertFToI64(sel, insn, markChildren);
> + } else if ((srcType == ir::TYPE_FLOAT && dstType == ir::TYPE_DOUBLE) ||
> + (dstType == ir::TYPE_FLOAT && srcType == ir::TYPE_DOUBLE)) {
> + // float and double conversion
> + convertBetweenFloatDouble(sel, insn, markChildren);
> + } else if ((srcType == ir::TYPE_HALF && dstType == ir::TYPE_DOUBLE) ||
> + (dstType == ir::TYPE_HALF && srcType == ir::TYPE_DOUBLE)) {
> + // float and double conversion
> + convertBetweenHalfDouble(sel, insn, markChildren);
> + } else if (srcType == ir::TYPE_HALF && (dstFamily == FAMILY_BYTE || dstFamily == FAMILY_WORD)) {
> + // Convert half to small int
> + convertHalfToSmallInts(sel, insn, markChildren);
> + } else if (dstType == ir::TYPE_HALF && (srcFamily == FAMILY_BYTE || srcFamily == FAMILY_WORD)) {
> + // Convert small int to half
> + convertSmallIntsToHalf(sel, insn, markChildren);
> + } else {
> + /* All special cases has been handled, just MOV. */
> + sel.MOV(dst, src);
> + }
> +
> + sel.pop();
> return true;
> }
> DECL_CTOR(ConvertInstruction, 1, 1);
> --
> 1.7.9.5
>
>
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list