[Beignet] [PATCH] Define temporary reg as dest reg of instruction
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Aug 12 01:43:59 PDT 2013
OK, LGTM, pushed, thanks.
On Mon, Aug 12, 2013 at 08:17:34AM +0000, Xing, Homer wrote:
> Sure. After I sent out the first one this morning, I thought I didn't send out successfully, so I sent it again. Please feel free to ignore any one.
>
> -----Original Message-----
> From: beignet-bounces+homer.xing=intel.com at lists.freedesktop.org [mailto:beignet-bounces+homer.xing=intel.com at lists.freedesktop.org] On Behalf Of Zhigang Gong
> Sent: Monday, August 12, 2013 4:16 PM
> To: Xing, Homer
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] Define temporary reg as dest reg of instruction
>
> Homer,
>
> Just found you sent two duplicate emails on this patch. Should I just ignore the first one and take this one?
>
> On Mon, Aug 12, 2013 at 10:26:44AM +0800, Homer Hsing wrote:
> > I defined temporary reg as source reg of instruction.
> > But instruction scheduler looks source reg as read only reg.
> > So I define them as dest now.
> >
> > Signed-off-by: Homer Hsing <homer.xing at intel.com>
> > ---
> > backend/src/backend/gen_context.cpp | 212 +++++++++++----------
> > backend/src/backend/gen_context.hpp | 2 +
> > .../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
> > backend/src/backend/gen_insn_selection.cpp | 39 +++-
> > backend/src/backend/gen_insn_selection.hxx | 14 +-
> > 5 files changed, 153 insertions(+), 116 deletions(-)
> >
> > diff --git a/backend/src/backend/gen_context.cpp
> > b/backend/src/backend/gen_context.cpp
> > index 024da8a..b3650c5 100644
> > --- a/backend/src/backend/gen_context.cpp
> > +++ b/backend/src/backend/gen_context.cpp
> > @@ -151,13 +151,121 @@ namespace gbe
> > }
> > }
> >
> > + void GenContext::emitUnaryWithTempInstruction(const SelectionInstruction &insn) {
> > + GenRegister dst = ra->genReg(insn.dst(0));
> > + GenRegister src = ra->genReg(insn.src(0));
> > + GenRegister tmp = ra->genReg(insn.dst(1));
> > + switch (insn.opcode) {
> > + case SEL_OP_LOAD_DF_IMM:
> > + p->LOAD_DF_IMM(dst, tmp, src.value.df);
> > + break;
> > + case SEL_OP_MOV_DF:
> > + p->MOV_DF(dst, src, tmp);
> > + break;
> > + default:
> > + NOT_IMPLEMENTED;
> > + }
> > + }
> > +
> > + void GenContext::emitBinaryWithTempInstruction(const SelectionInstruction &insn) {
> > + GenRegister dst = ra->genReg(insn.dst(0));
> > + GenRegister src0 = ra->genReg(insn.src(0));
> > + GenRegister src1 = ra->genReg(insn.src(1));
> > + GenRegister tmp = ra->genReg(insn.dst(1));
> > + switch (insn.opcode) {
> > + case SEL_OP_I64ADD: {
> > + GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
> > + y = GenRegister::suboffset(x, p->curr.execWidth);
> > + loadBottomHalf(x, src0);
> > + loadBottomHalf(y, src1);
> > + addWithCarry(x, x, y);
> > + storeBottomHalf(dst, x);
> > + loadTopHalf(x, src0);
> > + p->ADD(x, x, y);
> > + loadTopHalf(y, src1);
> > + p->ADD(x, x, y);
> > + storeTopHalf(dst, x);
> > + break;
> > + }
> > + case SEL_OP_I64SUB: {
> > + GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
> > + y = GenRegister::suboffset(x, p->curr.execWidth);
> > + loadBottomHalf(x, src0);
> > + loadBottomHalf(y, src1);
> > + subWithBorrow(x, x, y);
> > + storeBottomHalf(dst, x);
> > + loadTopHalf(x, src0);
> > + subWithBorrow(x, x, y);
> > + loadTopHalf(y, src1);
> > + subWithBorrow(x, x, y);
> > + storeTopHalf(dst, x);
> > + break;
> > + }
> > + case SEL_OP_MUL_HI: {
> > + int w = p->curr.execWidth;
> > + p->push();
> > + p->curr.execWidth = 8;
> > + for (int i = 0; i < w / 8; i ++) {
> > + p->push();
> > + p->curr.predicate = GEN_PREDICATE_NONE;
> > + p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
> > + p->curr.accWrEnable = 1;
> > + p->MACH(tmp, src0, src1);
> > + p->pop();
> > + p->curr.quarterControl = i;
> > + p->MOV(dst, tmp);
> > + dst = GenRegister::Qn(dst, 1);
> > + src0 = GenRegister::Qn(src0, 1);
> > + src1 = GenRegister::Qn(src1, 1);
> > + }
> > + p->pop();
> > + break;
> > + }
> > + case SEL_OP_HADD: {
> > + int w = p->curr.execWidth;
> > + p->push();
> > + p->curr.execWidth = 8;
> > + for (int i = 0; i < w / 8; i ++) {
> > + p->curr.quarterControl = i;
> > + p->ADDC(dst, src0, src1);
> > + p->SHR(dst, dst, GenRegister::immud(1));
> > + p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> > + p->OR(dst, dst, tmp);
> > + dst = GenRegister::Qn(dst, 1);
> > + src0 = GenRegister::Qn(src0, 1);
> > + src1 = GenRegister::Qn(src1, 1);
> > + }
> > + p->pop();
> > + break;
> > + }
> > + case SEL_OP_RHADD: {
> > + int w = p->curr.execWidth;
> > + p->push();
> > + p->curr.execWidth = 8;
> > + for (int i = 0; i < w / 8; i ++) {
> > + p->curr.quarterControl = i;
> > + p->ADDC(dst, src0, src1);
> > + p->ADD(dst, dst, GenRegister::immud(1));
> > + p->SHR(dst, dst, GenRegister::immud(1));
> > + p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> > + p->OR(dst, dst, tmp);
> > + dst = GenRegister::Qn(dst, 1);
> > + src0 = GenRegister::Qn(src0, 1);
> > + src1 = GenRegister::Qn(src1, 1);
> > + }
> > + p->pop();
> > + break;
> > + }
> > + default:
> > + NOT_IMPLEMENTED;
> > + }
> > + }
> > +
> > void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
> > const GenRegister dst = ra->genReg(insn.dst(0));
> > const GenRegister src0 = ra->genReg(insn.src(0));
> > const GenRegister src1 = ra->genReg(insn.src(1));
> > switch (insn.opcode) {
> > - case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
> > - case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
> > case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
> > case SEL_OP_SEL_INT64:
> > {
> > @@ -358,107 +466,7 @@ namespace gbe
> > const GenRegister src1 = ra->genReg(insn.src(1));
> > const GenRegister src2 = ra->genReg(insn.src(2));
> > switch (insn.opcode) {
> > - case SEL_OP_I64ADD:
> > - {
> > - GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> > - y = GenRegister::suboffset(x, p->curr.execWidth);
> > - loadBottomHalf(x, src0);
> > - loadBottomHalf(y, src1);
> > - addWithCarry(x, x, y);
> > - storeBottomHalf(dst, x);
> > - loadTopHalf(x, src0);
> > - p->ADD(x, x, y);
> > - loadTopHalf(y, src1);
> > - p->ADD(x, x, y);
> > - storeTopHalf(dst, x);
> > - }
> > - break;
> > - case SEL_OP_I64SUB:
> > - {
> > - GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> > - y = GenRegister::suboffset(x, p->curr.execWidth);
> > - loadBottomHalf(x, src0);
> > - loadBottomHalf(y, src1);
> > - subWithBorrow(x, x, y);
> > - storeBottomHalf(dst, x);
> > - loadTopHalf(x, src0);
> > - subWithBorrow(x, x, y);
> > - loadTopHalf(y, src1);
> > - subWithBorrow(x, x, y);
> > - storeTopHalf(dst, x);
> > - }
> > - break;
> > - case SEL_OP_MUL_HI:
> > - {
> > - int w = p->curr.execWidth;
> > - p->push();
> > - p->curr.execWidth = 8;
> > - p->curr.quarterControl = 0;
> > - p->push();
> > - p->curr.predicate = GEN_PREDICATE_NONE;
> > - p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
> > - p->curr.accWrEnable = 1;
> > - p->MACH(src2, src0, src1);
> > - p->curr.accWrEnable = 0;
> > - p->pop();
> > - p->MOV(dst, src2);
> > - if (w == 16) {
> > - p->push();
> > - p->curr.predicate = GEN_PREDICATE_NONE;
> > - p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> > - p->curr.accWrEnable = 1;
> > - p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> > - p->curr.accWrEnable = 0;
> > - p->pop();
> > - p->curr.quarterControl = 1;
> > - p->MOV(GenRegister::Qn(dst, 1), src2);
> > - }
> > - p->pop();
> > - break;
> > - }
> > case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
> > - case SEL_OP_HADD:
> > - {
> > - int w = p->curr.execWidth;
> > - p->push();
> > - p->curr.execWidth = 8;
> > - p->curr.quarterControl = 0;
> > - p->ADDC(dst, src0, src1);
> > - p->SHR(dst, dst, GenRegister::immud(1));
> > - p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> > - p->OR(dst, dst, src2);
> > - if (w == 16) {
> > - p->curr.quarterControl = 1;
> > - p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> > - p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> > - p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> > - p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> > - }
> > - p->pop();
> > - break;
> > - }
> > - case SEL_OP_RHADD:
> > - {
> > - int w = p->curr.execWidth;
> > - p->push();
> > - p->curr.execWidth = 8;
> > - p->curr.quarterControl = 0;
> > - p->ADDC(dst, src0, src1);
> > - p->ADD(dst, dst, GenRegister::immud(1));
> > - p->SHR(dst, dst, GenRegister::immud(1));
> > - p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> > - p->OR(dst, dst, src2);
> > - if (w == 16) {
> > - p->curr.quarterControl = 1;
> > - p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> > - p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> > - p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> > - p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> > - p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> > - }
> > - p->pop();
> > - break;
> > - }
> > default: NOT_IMPLEMENTED;
> > }
> > }
> > diff --git a/backend/src/backend/gen_context.hpp
> > b/backend/src/backend/gen_context.hpp
> > index 694ae98..cdca859 100644
> > --- a/backend/src/backend/gen_context.hpp
> > +++ b/backend/src/backend/gen_context.hpp
> > @@ -88,7 +88,9 @@ namespace gbe
> > /*! Final Gen ISA emission helper functions */
> > void emitLabelInstruction(const SelectionInstruction &insn);
> > void emitUnaryInstruction(const SelectionInstruction &insn);
> > + void emitUnaryWithTempInstruction(const SelectionInstruction
> > + &insn);
> > void emitBinaryInstruction(const SelectionInstruction &insn);
> > + void emitBinaryWithTempInstruction(const SelectionInstruction
> > + &insn);
> > void emitTernaryInstruction(const SelectionInstruction &insn);
> > void emitCompareInstruction(const SelectionInstruction &insn);
> > void emitJumpInstruction(const SelectionInstruction &insn); diff
> > --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > index da8f2a2..77b1b43 100644
> > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> > @@ -1,7 +1,9 @@
> > // Family Latency SIMD16 SIMD8
> > DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
> > DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
> > +DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 4, 2)
> > DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
> > +DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2)
> > DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
> > DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
> > DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
> > diff --git a/backend/src/backend/gen_insn_selection.cpp
> > b/backend/src/backend/gen_insn_selection.cpp
> > index d40fbfe..5013eab 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -410,13 +410,17 @@ namespace gbe
> >
> > #define ALU1(OP) \
> > INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
> > +#define ALU1WithTemp(OP) \
> > + INLINE void OP(Reg dst, Reg src, Reg temp) {
> > +ALU1WithTemp(SEL_OP_##OP, dst, src, temp); }
> > #define ALU2(OP) \
> > INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP,
> > dst, src0, src1); }
> > +#define ALU2WithTemp(OP) \
> > + INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) {
> > +ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); }
> > #define ALU3(OP) \
> > INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
> > ALU1(MOV)
> > - ALU2(MOV_DF)
> > - ALU2(LOAD_DF_IMM)
> > + ALU1WithTemp(MOV_DF)
> > + ALU1WithTemp(LOAD_DF_IMM)
> > ALU1(LOAD_INT64_IMM)
> > ALU1(RNDZ)
> > ALU1(RNDE)
> > @@ -435,8 +439,8 @@ namespace gbe
> > ALU2(RSL)
> > ALU2(ASR)
> > ALU2(ADD)
> > - ALU3(I64ADD)
> > - ALU3(I64SUB)
> > + ALU2WithTemp(I64ADD)
> > + ALU2WithTemp(I64SUB)
> > ALU2(MUL)
> > ALU1(FRC)
> > ALU1(RNDD)
> > @@ -444,15 +448,17 @@ namespace gbe
> > ALU2(MACH)
> > ALU1(LZD)
> > ALU3(MAD)
> > - ALU3(MUL_HI)
> > + ALU2WithTemp(MUL_HI)
> > ALU1(FBH)
> > ALU1(FBL)
> > - ALU3(HADD)
> > - ALU3(RHADD)
> > + ALU2WithTemp(HADD)
> > + ALU2WithTemp(RHADD)
> > ALU2(UPSAMPLE_SHORT)
> > ALU2(UPSAMPLE_INT)
> > #undef ALU1
> > +#undef ALU1WithTemp
> > #undef ALU2
> > +#undef ALU2WithTemp
> > #undef ALU3
> > /*! Encode a barrier instruction */
> > void BARRIER(GenRegister src);
> > @@ -494,8 +500,12 @@ namespace gbe
> > void MATH(Reg dst, uint32_t function, Reg src);
> > /*! Encode unary instructions */
> > void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
> > + /*! Encode unary with temp reg instructions */
> > + void ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg
> > + temp);
> > /*! Encode binary instructions */
> > void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1);
> > + /*! Encode binary with temp reg instructions */
> > + void ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg
> > + src1, Reg temp);
> > /*! Encode ternary instructions */
> > void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
> > /*! Encode sample instructions */ @@ -987,6 +997,13 @@ namespace
> > gbe
> > insn->src(0) = src;
> > }
> >
> > + void Selection::Opaque::ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src, Reg temp) {
> > + SelectionInstruction *insn = this->appendInsn(opcode, 2, 1);
> > + insn->dst(0) = dst;
> > + insn->src(0) = src;
> > + insn->dst(1) = temp;
> > + }
> > +
> > void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) {
> > SelectionInstruction *insn = this->appendInsn(opcode, 1, 2);
> > insn->dst(0) = dst;
> > @@ -994,6 +1011,14 @@ namespace gbe
> > insn->src(1) = src1;
> > }
> >
> > + void Selection::Opaque::ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp) {
> > + SelectionInstruction *insn = this->appendInsn(opcode, 2, 2);
> > + insn->dst(0) = dst;
> > + insn->src(0) = src0;
> > + insn->src(1) = src1;
> > + insn->dst(1) = temp;
> > + }
> > +
> > void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) {
> > SelectionInstruction *insn = this->appendInsn(opcode, 1, 3);
> > insn->dst(0) = dst;
> > diff --git a/backend/src/backend/gen_insn_selection.hxx
> > b/backend/src/backend/gen_insn_selection.hxx
> > index eeca9af..5660078 100644
> > --- a/backend/src/backend/gen_insn_selection.hxx
> > +++ b/backend/src/backend/gen_insn_selection.hxx
> > @@ -1,7 +1,7 @@
> > DECL_SELECTION_IR(LABEL, LabelInstruction) DECL_SELECTION_IR(MOV,
> > UnaryInstruction) -DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
> > -DECL_SELECTION_IR(LOAD_DF_IMM, BinaryInstruction)
> > +DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction)
> > +DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction)
> > DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction)
> > DECL_SELECTION_IR(NOT, UnaryInstruction) DECL_SELECTION_IR(LZD,
> > UnaryInstruction) @@ -24,8 +24,8 @@ DECL_SELECTION_IR(RSR,
> > BinaryInstruction) DECL_SELECTION_IR(RSL, BinaryInstruction)
> > DECL_SELECTION_IR(ASR, BinaryInstruction) DECL_SELECTION_IR(ADD,
> > BinaryInstruction) -DECL_SELECTION_IR(I64ADD, TernaryInstruction)
> > -DECL_SELECTION_IR(I64SUB, TernaryInstruction)
> > +DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
> > +DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
> > DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(ATOMIC,
> > AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) @@
> > -51,10 +51,10 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
> > DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
> > DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
> > DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
> > -DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
> > +DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
> > DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL,
> > UnaryInstruction) -DECL_SELECTION_IR(HADD, TernaryInstruction)
> > -DECL_SELECTION_IR(RHADD, TernaryInstruction)
> > +DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
> > +DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
> > DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
> > DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
> > --
> > 1.8.1.2
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list