[Beignet] [PATCH] Define temporary reg as dest reg of instruction

Zhigang Gong zhigang.gong at linux.intel.com
Mon Aug 12 01:16:12 PDT 2013


Homer,

Just found you sent two duplicate emails on this patch. Should
I just ignore the first one and take this one?

On Mon, Aug 12, 2013 at 10:26:44AM +0800, Homer Hsing wrote:
> I defined temporary reg as source reg of instruction.
> But instruction scheduler looks source reg as read only reg.
> So I define them as dest now.
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen_context.cpp                | 212 +++++++++++----------
>  backend/src/backend/gen_context.hpp                |   2 +
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |   2 +
>  backend/src/backend/gen_insn_selection.cpp         |  39 +++-
>  backend/src/backend/gen_insn_selection.hxx         |  14 +-
>  5 files changed, 153 insertions(+), 116 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 024da8a..b3650c5 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -151,13 +151,121 @@ namespace gbe
>      }
>    }
>  
> +  void GenContext::emitUnaryWithTempInstruction(const SelectionInstruction &insn) {
> +    GenRegister dst = ra->genReg(insn.dst(0));
> +    GenRegister src = ra->genReg(insn.src(0));
> +    GenRegister tmp = ra->genReg(insn.dst(1));
> +    switch (insn.opcode) {
> +      case SEL_OP_LOAD_DF_IMM:
> +        p->LOAD_DF_IMM(dst, tmp, src.value.df);
> +        break;
> +      case SEL_OP_MOV_DF:
> +        p->MOV_DF(dst, src, tmp);
> +        break;
> +      default:
> +        NOT_IMPLEMENTED;
> +    }
> +  }
> +
> +  void GenContext::emitBinaryWithTempInstruction(const SelectionInstruction &insn) {
> +    GenRegister dst = ra->genReg(insn.dst(0));
> +    GenRegister src0 = ra->genReg(insn.src(0));
> +    GenRegister src1 = ra->genReg(insn.src(1));
> +    GenRegister tmp = ra->genReg(insn.dst(1));
> +    switch (insn.opcode) {
> +      case SEL_OP_I64ADD: {
> +        GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
> +                    y = GenRegister::suboffset(x, p->curr.execWidth);
> +        loadBottomHalf(x, src0);
> +        loadBottomHalf(y, src1);
> +        addWithCarry(x, x, y);
> +        storeBottomHalf(dst, x);
> +        loadTopHalf(x, src0);
> +        p->ADD(x, x, y);
> +        loadTopHalf(y, src1);
> +        p->ADD(x, x, y);
> +        storeTopHalf(dst, x);
> +        break;
> +      }
> +      case SEL_OP_I64SUB: {
> +        GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
> +                    y = GenRegister::suboffset(x, p->curr.execWidth);
> +        loadBottomHalf(x, src0);
> +        loadBottomHalf(y, src1);
> +        subWithBorrow(x, x, y);
> +        storeBottomHalf(dst, x);
> +        loadTopHalf(x, src0);
> +        subWithBorrow(x, x, y);
> +        loadTopHalf(y, src1);
> +        subWithBorrow(x, x, y);
> +        storeTopHalf(dst, x);
> +        break;
> +      }
> +      case SEL_OP_MUL_HI: {
> +        int w = p->curr.execWidth;
> +        p->push();
> +        p->curr.execWidth = 8;
> +        for (int i = 0; i < w / 8; i ++) {
> +          p->push();
> +          p->curr.predicate = GEN_PREDICATE_NONE;
> +          p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
> +          p->curr.accWrEnable = 1;
> +          p->MACH(tmp, src0, src1);
> +          p->pop();
> +          p->curr.quarterControl = i;
> +          p->MOV(dst, tmp);
> +          dst = GenRegister::Qn(dst, 1);
> +          src0 = GenRegister::Qn(src0, 1);
> +          src1 = GenRegister::Qn(src1, 1);
> +        } 
> +        p->pop();
> +        break;
> +       }
> +     case SEL_OP_HADD: {
> +        int w = p->curr.execWidth;
> +        p->push();
> +        p->curr.execWidth = 8;
> +        for (int i = 0; i < w / 8; i ++) {
> +          p->curr.quarterControl = i;
> +          p->ADDC(dst, src0, src1);
> +          p->SHR(dst, dst, GenRegister::immud(1));
> +          p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> +          p->OR(dst, dst, tmp);
> +          dst = GenRegister::Qn(dst, 1);
> +          src0 = GenRegister::Qn(src0, 1);
> +          src1 = GenRegister::Qn(src1, 1);
> +        }
> +        p->pop();
> +        break;
> +       }
> +      case SEL_OP_RHADD: {
> +        int w = p->curr.execWidth;
> +        p->push();
> +        p->curr.execWidth = 8;
> +        for (int i = 0; i < w / 8; i ++) {
> +          p->curr.quarterControl = i;
> +          p->ADDC(dst, src0, src1);
> +          p->ADD(dst, dst, GenRegister::immud(1));
> +          p->SHR(dst, dst, GenRegister::immud(1));
> +          p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> +          p->OR(dst, dst, tmp);
> +          dst = GenRegister::Qn(dst, 1);
> +          src0 = GenRegister::Qn(src0, 1);
> +          src1 = GenRegister::Qn(src1, 1);
> +        }
> +        p->pop();
> +        break;
> +       }
> +      default:
> +        NOT_IMPLEMENTED;
> +    }
> +  }
> +
>    void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
>      const GenRegister dst = ra->genReg(insn.dst(0));
>      const GenRegister src0 = ra->genReg(insn.src(0));
>      const GenRegister src1 = ra->genReg(insn.src(1));
>      switch (insn.opcode) {
> -      case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
> -      case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
>        case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
>        case SEL_OP_SEL_INT64:
>          {
> @@ -358,107 +466,7 @@ namespace gbe
>      const GenRegister src1 = ra->genReg(insn.src(1));
>      const GenRegister src2 = ra->genReg(insn.src(2));
>      switch (insn.opcode) {
> -      case SEL_OP_I64ADD:
> -        {
> -          GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> -                      y = GenRegister::suboffset(x, p->curr.execWidth);
> -          loadBottomHalf(x, src0);
> -          loadBottomHalf(y, src1);
> -          addWithCarry(x, x, y);
> -          storeBottomHalf(dst, x);
> -          loadTopHalf(x, src0);
> -          p->ADD(x, x, y);
> -          loadTopHalf(y, src1);
> -          p->ADD(x, x, y);
> -          storeTopHalf(dst, x);
> -        }
> -        break;
> -      case SEL_OP_I64SUB:
> -        {
> -          GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> -                      y = GenRegister::suboffset(x, p->curr.execWidth);
> -          loadBottomHalf(x, src0);
> -          loadBottomHalf(y, src1);
> -          subWithBorrow(x, x, y);
> -          storeBottomHalf(dst, x);
> -          loadTopHalf(x, src0);
> -          subWithBorrow(x, x, y);
> -          loadTopHalf(y, src1);
> -          subWithBorrow(x, x, y);
> -          storeTopHalf(dst, x);
> -        }
> -        break;
> -      case SEL_OP_MUL_HI:
> -       {
> -        int w = p->curr.execWidth;
> -        p->push();
> -        p->curr.execWidth = 8;
> -        p->curr.quarterControl = 0;
> -        p->push();
> -        p->curr.predicate = GEN_PREDICATE_NONE;
> -        p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
> -        p->curr.accWrEnable = 1;
> -        p->MACH(src2, src0, src1);
> -        p->curr.accWrEnable = 0;
> -        p->pop();
> -        p->MOV(dst, src2);
> -        if (w == 16) {
> -          p->push();
> -          p->curr.predicate = GEN_PREDICATE_NONE;
> -          p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> -          p->curr.accWrEnable = 1;
> -          p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> -          p->curr.accWrEnable = 0;
> -          p->pop();
> -          p->curr.quarterControl = 1;
> -          p->MOV(GenRegister::Qn(dst, 1), src2);
> -        }
> -        p->pop();
> -        break;
> -       }
>        case SEL_OP_MAD:  p->MAD(dst, src0, src1, src2); break;
> -      case SEL_OP_HADD:
> -       {
> -        int w = p->curr.execWidth;
> -        p->push();
> -        p->curr.execWidth = 8;
> -        p->curr.quarterControl = 0;
> -        p->ADDC(dst, src0, src1);
> -        p->SHR(dst, dst, GenRegister::immud(1));
> -        p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> -        p->OR(dst, dst, src2);
> -        if (w == 16) {
> -          p->curr.quarterControl = 1;
> -          p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> -          p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> -          p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> -          p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> -        }
> -        p->pop();
> -        break;
> -       }
> -      case SEL_OP_RHADD:
> -       {
> -        int w = p->curr.execWidth;
> -        p->push();
> -        p->curr.execWidth = 8;
> -        p->curr.quarterControl = 0;
> -        p->ADDC(dst, src0, src1);
> -        p->ADD(dst, dst, GenRegister::immud(1));
> -        p->SHR(dst, dst, GenRegister::immud(1));
> -        p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> -        p->OR(dst, dst, src2);
> -        if (w == 16) {
> -          p->curr.quarterControl = 1;
> -          p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> -          p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> -          p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> -          p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> -          p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> -        }
> -        p->pop();
> -        break;
> -       }
>        default: NOT_IMPLEMENTED;
>      }
>    }
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 694ae98..cdca859 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -88,7 +88,9 @@ namespace gbe
>      /*! Final Gen ISA emission helper functions */
>      void emitLabelInstruction(const SelectionInstruction &insn);
>      void emitUnaryInstruction(const SelectionInstruction &insn);
> +    void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
>      void emitBinaryInstruction(const SelectionInstruction &insn);
> +    void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
>      void emitTernaryInstruction(const SelectionInstruction &insn);
>      void emitCompareInstruction(const SelectionInstruction &insn);
>      void emitJumpInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index da8f2a2..77b1b43 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -1,7 +1,9 @@
>  //                 Family     Latency     SIMD16     SIMD8
>  DECL_GEN7_SCHEDULE(Label,           0,         0,        0)
>  DECL_GEN7_SCHEDULE(Unary,           20,        4,        2)
> +DECL_GEN7_SCHEDULE(UnaryWithTemp,   20,        4,        2)
>  DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
> +DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        4,        2)
>  DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
>  DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
>  DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index d40fbfe..5013eab 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -410,13 +410,17 @@ namespace gbe
>  
>  #define ALU1(OP) \
>    INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
> +#define ALU1WithTemp(OP) \
> +  INLINE void OP(Reg dst, Reg src, Reg temp) { ALU1WithTemp(SEL_OP_##OP, dst, src, temp); }
>  #define ALU2(OP) \
>    INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP, dst, src0, src1); }
> +#define ALU2WithTemp(OP) \
> +  INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) { ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); }
>  #define ALU3(OP) \
>    INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
>      ALU1(MOV)
> -    ALU2(MOV_DF)
> -    ALU2(LOAD_DF_IMM)
> +    ALU1WithTemp(MOV_DF)
> +    ALU1WithTemp(LOAD_DF_IMM)
>      ALU1(LOAD_INT64_IMM)
>      ALU1(RNDZ)
>      ALU1(RNDE)
> @@ -435,8 +439,8 @@ namespace gbe
>      ALU2(RSL)
>      ALU2(ASR)
>      ALU2(ADD)
> -    ALU3(I64ADD)
> -    ALU3(I64SUB)
> +    ALU2WithTemp(I64ADD)
> +    ALU2WithTemp(I64SUB)
>      ALU2(MUL)
>      ALU1(FRC)
>      ALU1(RNDD)
> @@ -444,15 +448,17 @@ namespace gbe
>      ALU2(MACH)
>      ALU1(LZD)
>      ALU3(MAD)
> -    ALU3(MUL_HI)
> +    ALU2WithTemp(MUL_HI)
>      ALU1(FBH)
>      ALU1(FBL)
> -    ALU3(HADD)
> -    ALU3(RHADD)
> +    ALU2WithTemp(HADD)
> +    ALU2WithTemp(RHADD)
>      ALU2(UPSAMPLE_SHORT)
>      ALU2(UPSAMPLE_INT)
>  #undef ALU1
> +#undef ALU1WithTemp
>  #undef ALU2
> +#undef ALU2WithTemp
>  #undef ALU3
>      /*! Encode a barrier instruction */
>      void BARRIER(GenRegister src);
> @@ -494,8 +500,12 @@ namespace gbe
>      void MATH(Reg dst, uint32_t function, Reg src);
>      /*! Encode unary instructions */
>      void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
> +    /*! Encode unary with temp reg instructions */
> +    void ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg temp);
>      /*! Encode binary instructions */
>      void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1);
> +    /*! Encode binary with temp reg instructions */
> +    void ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp);
>      /*! Encode ternary instructions */
>      void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
>      /*! Encode sample instructions */
> @@ -987,6 +997,13 @@ namespace gbe
>      insn->src(0) = src;
>    }
>  
> +  void Selection::Opaque::ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src, Reg temp) {
> +    SelectionInstruction *insn = this->appendInsn(opcode, 2, 1);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src;
> +    insn->dst(1) = temp;
> +  }
> +
>    void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) {
>      SelectionInstruction *insn = this->appendInsn(opcode, 1, 2);
>      insn->dst(0) = dst;
> @@ -994,6 +1011,14 @@ namespace gbe
>      insn->src(1) = src1;
>    }
>  
> +  void Selection::Opaque::ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp) {
> +    SelectionInstruction *insn = this->appendInsn(opcode, 2, 2);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src0;
> +    insn->src(1) = src1;
> +    insn->dst(1) = temp;
> +  }
> +
>    void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) {
>      SelectionInstruction *insn = this->appendInsn(opcode, 1, 3);
>      insn->dst(0) = dst;
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index eeca9af..5660078 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -1,7 +1,7 @@
>  DECL_SELECTION_IR(LABEL, LabelInstruction)
>  DECL_SELECTION_IR(MOV, UnaryInstruction)
> -DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
> -DECL_SELECTION_IR(LOAD_DF_IMM, BinaryInstruction)
> +DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction)
> +DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction)
>  DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction)
>  DECL_SELECTION_IR(NOT, UnaryInstruction)
>  DECL_SELECTION_IR(LZD, UnaryInstruction)
> @@ -24,8 +24,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
>  DECL_SELECTION_IR(RSL, BinaryInstruction)
>  DECL_SELECTION_IR(ASR, BinaryInstruction)
>  DECL_SELECTION_IR(ADD, BinaryInstruction)
> -DECL_SELECTION_IR(I64ADD, TernaryInstruction)
> -DECL_SELECTION_IR(I64SUB, TernaryInstruction)
> +DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
> +DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(MUL, BinaryInstruction)
>  DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
>  DECL_SELECTION_IR(MACH, BinaryInstruction)
> @@ -51,10 +51,10 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
>  DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
>  DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
>  DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
> -DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
> +DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(FBH, UnaryInstruction)
>  DECL_SELECTION_IR(FBL, UnaryInstruction)
> -DECL_SELECTION_IR(HADD, TernaryInstruction)
> -DECL_SELECTION_IR(RHADD, TernaryInstruction)
> +DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
> +DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list