[Beignet] [PATCH 1/2] Fix convert long/ulong to float.

Mon Dec 23 21:25:09 PST 2013

The JMP instructions in this change is only for optimize, it will not jump out of this BB. In fact, the JMPs can be removed totally.
>From the GEN IR view, it will only jump to next GEN IR. So I thinks it will not break the register interval.
There are lots of long relative functions in this stage. If only mov it to other stage, I think it is meaningless.

-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com] 
Sent: Monday, December 23, 2013 6:32 PM
To: Yang, Rong R
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 1/2] Fix convert long/ulong to float.

My major concern is that this patch introduces a branch instruction at instruction selection stage. And our current implementation assumes that the instruction selection stage should not break the original BB, so it doesn't check the JMP instruction we generated here, and it still treat the origin BB which now has some JMPs as a valid BB. Then the register interval calculating may not be correct within that invalid BB.

IMO, we'd better avoid introducing branching at the select stage. Is it possible to mov it to the earilier stage?

On Tue, Dec 17, 2013 at 03:32:12PM +0800, Yang Rong wrote:
> Previour implement don't handle rounding. The default rouding mode should be round to even.
> According float format, separate long/ulong to two part, first 23 non 
> zero bits is mantissa, add 1 when the next bit is 1, and than round to even when all remain bits is zero.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/backend/gen_context.cpp        | 110 +++++++++++++++++++++++++----
>  backend/src/backend/gen_context.hpp        |   2 +-
>  backend/src/backend/gen_insn_selection.cpp |  18 ++---
>  3 files changed, 108 insertions(+), 22 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp 
> b/backend/src/backend/gen_context.cpp
> index 6fd2dea..7ebe03c 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -822,12 +822,95 @@ namespace gbe
>      p->pop();
>    }
>  
> -  void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp) {
> -    p->MOV(dst, high);
> -    p->MUL(dst, dst, GenRegister::immf(65536.f * 65536.f));
> -    tmp.type = GEN_TYPE_F;
> -    p->MOV(tmp, low);
> -    p->ADD(dst, dst, tmp);
> +  void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister exp,
> +                                            GenRegister mantissa, GenRegister tmp, GenRegister flag) {
> +    uint32_t jip0, jip1;
> +    GenRegister dst_ud = GenRegister::retype(dst, GEN_TYPE_UD);
> +    p->FBH(exp, high);
> +    p->ADD(exp, GenRegister::negate(exp), GenRegister::immud(31));  //exp = 32 when high == 0
> +    p->push();
> +      p->curr.useFlag(flag.flag_nr(), flag.flag_subnr());
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->CMP(GEN_CONDITIONAL_EQ, exp, GenRegister::immud(32));   //high == 0
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->MOV(dst, low);
> +      p->push();
> +        if (simdWidth == 8)
> +          p->curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
> +        else if (simdWidth == 16)
> +          p->curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
> +        else
> +          NOT_IMPLEMENTED;
> +        p->curr.execWidth = 1;
> +        p->curr.noMask = 1;
> +        p->JMPI(GenRegister::immud(0));
> +        jip0 = p->n_instruction() - 1;
> +      p->pop();
> +
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->CMP(GEN_CONDITIONAL_G, exp, GenRegister::immud(23));
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->CMP(GEN_CONDITIONAL_L, exp, GenRegister::immud(32));  //exp>23 && high!=0
> +      p->ADD(tmp, exp, GenRegister::immud(-23));
> +      p->SHR(mantissa, high, tmp);
> +      p->AND(mantissa, mantissa, GenRegister::immud(0x7fffff));
> +      p->SHR(dst_ud, low, tmp);   //dst is temp regitster here
> +      p->ADD(tmp, GenRegister::negate(tmp), GenRegister::immud(32));
> +      p->SHL(high, high, tmp);
> +      p->OR(high, high, dst_ud);
> +      p->SHL(low, low, tmp);
> +      p->push();
> +        if (simdWidth == 8)
> +          p->curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
> +        else if (simdWidth == 16)
> +          p->curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
> +        else
> +          NOT_IMPLEMENTED;
> +        p->curr.execWidth = 1;
> +        p->curr.noMask = 1;
> +        p->JMPI(GenRegister::immud(0));
> +        jip1 = p->n_instruction() - 1;
> +      p->pop();
> +
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->CMP(GEN_CONDITIONAL_EQ, exp, GenRegister::immud(23));
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->MOV(dst_ud, GenRegister::immud(0));   //exp==9, SHR == 0
> +
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->CMP(GEN_CONDITIONAL_L, exp, GenRegister::immud(23));
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->ADD(tmp, exp, GenRegister::immud(9));
> +      p->SHR(dst_ud, low, tmp);   //dst is temp regitster here
> +
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->CMP(GEN_CONDITIONAL_LE, exp, GenRegister::immud(23));
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->ADD(tmp, GenRegister::negate(exp), GenRegister::immud(23));
> +      p->SHL(mantissa, high, tmp);
> +      p->OR(mantissa, mantissa, dst_ud);
> +      p->AND(mantissa, mantissa, GenRegister::immud(0x7fffff));
> +      p->SHL(high, low, tmp);
> +      p->MOV(low, GenRegister::immud(0));
> +
> +      p->patchJMPI(jip1, (p->n_instruction() - jip1 + 1) * 2);
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->CMP(GEN_CONDITIONAL_LE, exp, GenRegister::immud(31));  //update dst where high != 0
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->ADD(exp, exp, GenRegister::immud(159));
> +      p->SHL(exp, exp, GenRegister::immud(23));
> +      p->OR(dst_ud, exp, mantissa);
> +
> +      p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000));
> +      p->ADD(dst_ud, dst_ud, GenRegister::immud(1));
> +
> +      p->CMP(GEN_CONDITIONAL_EQ, high, GenRegister::immud(0x80000000));
> +      p->CMP(GEN_CONDITIONAL_EQ, low, GenRegister::immud(0x0));
> +      p->AND(dst_ud, dst_ud, GenRegister::immud(0xfffffffe));
> +      p->patchJMPI(jip0, (p->n_instruction() - jip0 + 1) * 2);
> +
> +    p->pop();
> +
>    }
>  
>    void GenContext::emitI64ToFloatInstruction(const 
> SelectionInstruction &insn) { @@ -835,16 +918,19 @@ namespace gbe
>      GenRegister dest = ra->genReg(insn.dst(0));
>      GenRegister high = ra->genReg(insn.dst(1));
>      GenRegister low = ra->genReg(insn.dst(2));
> -    GenRegister tmp = ra->genReg(insn.dst(3));
> -    GenRegister flagReg = ra->genReg(insn.dst(4));
> +    GenRegister exp = ra->genReg(insn.dst(3));
> +    GenRegister mantissa = ra->genReg(insn.dst(4));
> +    GenRegister tmp = ra->genReg(insn.dst(5));
> +    GenRegister f0 = ra->genReg(insn.dst(6));
> +    GenRegister f1 = ra->genReg(insn.dst(7));
>      loadTopHalf(high, src);
>      loadBottomHalf(low, src);
>      if(!src.is_signed_int()) {
> -      UnsignedI64ToFloat(dest, high, low, tmp);
> +      UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, f0);
>      } else {
>        p->push();
>        p->curr.predicate = GEN_PREDICATE_NONE;
> -      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
> +      p->curr.useFlag(f1.flag_nr(), f1.flag_subnr());
>        p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000));
>        p->curr.predicate = GEN_PREDICATE_NORMAL;
>        p->NOT(high, high);
> @@ -853,9 +939,9 @@ namespace gbe
>        addWithCarry(low, low, tmp);
>        p->ADD(high, high, tmp);
>        p->pop();
> -      UnsignedI64ToFloat(dest, high, low, tmp);
> +      UnsignedI64ToFloat(dest, high, low, exp, mantissa, tmp, f0);
>        p->push();
> -      p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
> +      p->curr.useFlag(f1.flag_nr(), f1.flag_subnr());
>        dest.type = GEN_TYPE_UD;
>        p->OR(dest, dest, GenRegister::immud(0x80000000));
>        p->pop();
> diff --git a/backend/src/backend/gen_context.hpp 
> b/backend/src/backend/gen_context.hpp
> index f8ef8e0..c9c5621 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -92,7 +92,7 @@ namespace gbe
>      void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
>      void I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low);
>      void saveFlag(GenRegister dest, int flag, int subFlag);
> -    void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister low, GenRegister tmp);
> +    void UnsignedI64ToFloat(GenRegister dst, GenRegister high, 
> + GenRegister low, GenRegister exp, GenRegister mantissa, GenRegister 
> + tmp, GenRegister flag);
>  
>      /*! Final Gen ISA emission helper functions */
>      void emitLabelInstruction(const SelectionInstruction &insn); diff 
> --git a/backend/src/backend/gen_insn_selection.cpp 
> b/backend/src/backend/gen_insn_selection.cpp
> index 23e9da7..a35b237 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -473,7 +473,7 @@ namespace gbe
>  #undef ALU3
>  #undef I64Shift
>      /*! Convert 64-bit integer to 32-bit float */
> -    void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]);
> +    void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[7]);
>      /*! Convert 64-bit integer to 32-bit float */
>      void CONVF_TO_I64(Reg dst, Reg src, GenRegister tmp[3]);
>      /*! Saturated 64bit x*y + z */
> @@ -1132,11 +1132,11 @@ namespace gbe
>        insn->dst(i + 1) = tmp[i];
>    }
>  
> -  void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]) {
> -    SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 5, 1);
> +  void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[7]) {
> +    SelectionInstruction *insn = 
> + this->appendInsn(SEL_OP_CONVI64_TO_F, 8, 1);
>      insn->dst(0) = dst;
>      insn->src(0) = src;
> -    for(int i = 0; i < 4; i ++)
> +    for(int i = 0; i < 7; i ++)
>        insn->dst(i + 1) = tmp[i];
>    }
>  
> @@ -2697,12 +2697,12 @@ namespace gbe
>        } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
>          sel.CONVI64_TO_I(dst, src);
>        } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
> -        GenRegister tmp[4];
> -        for(int i=0; i<3; i++) {
> -          tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> -          tmp[i].type = GEN_TYPE_UD;
> +        GenRegister tmp[7];
> +        for(int i=0; i<5; i++) {
> +          tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
>          }
> -        tmp[3] = sel.selReg(sel.reg(FAMILY_BOOL));
> +        tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL), TYPE_BOOL);
> +        tmp[6] = sel.selReg(sel.reg(FAMILY_BOOL), TYPE_BOOL);
>          sel.CONVI64_TO_F(dst, src, tmp);
>        } else if (dst.isdf()) {
>          ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> --
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet