[Beignet] [PATCH] Define temporary reg as dest reg of instruction
Xing, Homer
homer.xing at intel.com
Mon Aug 12 01:17:34 PDT 2013
Sure. After I sent out the first one this morning, I thought I didn't send out successfully, so I sent it again. Please feel free to ignore any one.
-----Original Message-----
From: beignet-bounces+homer.xing=intel.com at lists.freedesktop.org [mailto:beignet-bounces+homer.xing=intel.com at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Monday, August 12, 2013 4:16 PM
To: Xing, Homer
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH] Define temporary reg as dest reg of instruction
Homer,
Just found you sent two duplicate emails on this patch. Should I just ignore the first one and take this one?
On Mon, Aug 12, 2013 at 10:26:44AM +0800, Homer Hsing wrote:
> I defined temporary reg as source reg of instruction.
> But instruction scheduler looks source reg as read only reg.
> So I define them as dest now.
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 212 +++++++++++----------
> backend/src/backend/gen_context.hpp | 2 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
> backend/src/backend/gen_insn_selection.cpp | 39 +++-
> backend/src/backend/gen_insn_selection.hxx | 14 +-
> 5 files changed, 153 insertions(+), 116 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 024da8a..b3650c5 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -151,13 +151,121 @@ namespace gbe
> }
> }
>
> + void GenContext::emitUnaryWithTempInstruction(const SelectionInstruction &insn) {
> + GenRegister dst = ra->genReg(insn.dst(0));
> + GenRegister src = ra->genReg(insn.src(0));
> + GenRegister tmp = ra->genReg(insn.dst(1));
> + switch (insn.opcode) {
> + case SEL_OP_LOAD_DF_IMM:
> + p->LOAD_DF_IMM(dst, tmp, src.value.df);
> + break;
> + case SEL_OP_MOV_DF:
> + p->MOV_DF(dst, src, tmp);
> + break;
> + default:
> + NOT_IMPLEMENTED;
> + }
> + }
> +
> + void GenContext::emitBinaryWithTempInstruction(const SelectionInstruction &insn) {
> + GenRegister dst = ra->genReg(insn.dst(0));
> + GenRegister src0 = ra->genReg(insn.src(0));
> + GenRegister src1 = ra->genReg(insn.src(1));
> + GenRegister tmp = ra->genReg(insn.dst(1));
> + switch (insn.opcode) {
> + case SEL_OP_I64ADD: {
> + GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
> + y = GenRegister::suboffset(x, p->curr.execWidth);
> + loadBottomHalf(x, src0);
> + loadBottomHalf(y, src1);
> + addWithCarry(x, x, y);
> + storeBottomHalf(dst, x);
> + loadTopHalf(x, src0);
> + p->ADD(x, x, y);
> + loadTopHalf(y, src1);
> + p->ADD(x, x, y);
> + storeTopHalf(dst, x);
> + break;
> + }
> + case SEL_OP_I64SUB: {
> + GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
> + y = GenRegister::suboffset(x, p->curr.execWidth);
> + loadBottomHalf(x, src0);
> + loadBottomHalf(y, src1);
> + subWithBorrow(x, x, y);
> + storeBottomHalf(dst, x);
> + loadTopHalf(x, src0);
> + subWithBorrow(x, x, y);
> + loadTopHalf(y, src1);
> + subWithBorrow(x, x, y);
> + storeTopHalf(dst, x);
> + break;
> + }
> + case SEL_OP_MUL_HI: {
> + int w = p->curr.execWidth;
> + p->push();
> + p->curr.execWidth = 8;
> + for (int i = 0; i < w / 8; i ++) {
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
> + p->curr.accWrEnable = 1;
> + p->MACH(tmp, src0, src1);
> + p->pop();
> + p->curr.quarterControl = i;
> + p->MOV(dst, tmp);
> + dst = GenRegister::Qn(dst, 1);
> + src0 = GenRegister::Qn(src0, 1);
> + src1 = GenRegister::Qn(src1, 1);
> + }
> + p->pop();
> + break;
> + }
> + case SEL_OP_HADD: {
> + int w = p->curr.execWidth;
> + p->push();
> + p->curr.execWidth = 8;
> + for (int i = 0; i < w / 8; i ++) {
> + p->curr.quarterControl = i;
> + p->ADDC(dst, src0, src1);
> + p->SHR(dst, dst, GenRegister::immud(1));
> + p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> + p->OR(dst, dst, tmp);
> + dst = GenRegister::Qn(dst, 1);
> + src0 = GenRegister::Qn(src0, 1);
> + src1 = GenRegister::Qn(src1, 1);
> + }
> + p->pop();
> + break;
> + }
> + case SEL_OP_RHADD: {
> + int w = p->curr.execWidth;
> + p->push();
> + p->curr.execWidth = 8;
> + for (int i = 0; i < w / 8; i ++) {
> + p->curr.quarterControl = i;
> + p->ADDC(dst, src0, src1);
> + p->ADD(dst, dst, GenRegister::immud(1));
> + p->SHR(dst, dst, GenRegister::immud(1));
> + p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> + p->OR(dst, dst, tmp);
> + dst = GenRegister::Qn(dst, 1);
> + src0 = GenRegister::Qn(src0, 1);
> + src1 = GenRegister::Qn(src1, 1);
> + }
> + p->pop();
> + break;
> + }
> + default:
> + NOT_IMPLEMENTED;
> + }
> + }
> +
> void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0));
> const GenRegister src1 = ra->genReg(insn.src(1));
> switch (insn.opcode) {
> - case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
> - case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
> case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
> case SEL_OP_SEL_INT64:
> {
> @@ -358,107 +466,7 @@ namespace gbe
> const GenRegister src1 = ra->genReg(insn.src(1));
> const GenRegister src2 = ra->genReg(insn.src(2));
> switch (insn.opcode) {
> - case SEL_OP_I64ADD:
> - {
> - GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> - y = GenRegister::suboffset(x, p->curr.execWidth);
> - loadBottomHalf(x, src0);
> - loadBottomHalf(y, src1);
> - addWithCarry(x, x, y);
> - storeBottomHalf(dst, x);
> - loadTopHalf(x, src0);
> - p->ADD(x, x, y);
> - loadTopHalf(y, src1);
> - p->ADD(x, x, y);
> - storeTopHalf(dst, x);
> - }
> - break;
> - case SEL_OP_I64SUB:
> - {
> - GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> - y = GenRegister::suboffset(x, p->curr.execWidth);
> - loadBottomHalf(x, src0);
> - loadBottomHalf(y, src1);
> - subWithBorrow(x, x, y);
> - storeBottomHalf(dst, x);
> - loadTopHalf(x, src0);
> - subWithBorrow(x, x, y);
> - loadTopHalf(y, src1);
> - subWithBorrow(x, x, y);
> - storeTopHalf(dst, x);
> - }
> - break;
> - case SEL_OP_MUL_HI:
> - {
> - int w = p->curr.execWidth;
> - p->push();
> - p->curr.execWidth = 8;
> - p->curr.quarterControl = 0;
> - p->push();
> - p->curr.predicate = GEN_PREDICATE_NONE;
> - p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
> - p->curr.accWrEnable = 1;
> - p->MACH(src2, src0, src1);
> - p->curr.accWrEnable = 0;
> - p->pop();
> - p->MOV(dst, src2);
> - if (w == 16) {
> - p->push();
> - p->curr.predicate = GEN_PREDICATE_NONE;
> - p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> - p->curr.accWrEnable = 1;
> - p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> - p->curr.accWrEnable = 0;
> - p->pop();
> - p->curr.quarterControl = 1;
> - p->MOV(GenRegister::Qn(dst, 1), src2);
> - }
> - p->pop();
> - break;
> - }
> case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
> - case SEL_OP_HADD:
> - {
> - int w = p->curr.execWidth;
> - p->push();
> - p->curr.execWidth = 8;
> - p->curr.quarterControl = 0;
> - p->ADDC(dst, src0, src1);
> - p->SHR(dst, dst, GenRegister::immud(1));
> - p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> - p->OR(dst, dst, src2);
> - if (w == 16) {
> - p->curr.quarterControl = 1;
> - p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> - p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> - p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> - p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> - }
> - p->pop();
> - break;
> - }
> - case SEL_OP_RHADD:
> - {
> - int w = p->curr.execWidth;
> - p->push();
> - p->curr.execWidth = 8;
> - p->curr.quarterControl = 0;
> - p->ADDC(dst, src0, src1);
> - p->ADD(dst, dst, GenRegister::immud(1));
> - p->SHR(dst, dst, GenRegister::immud(1));
> - p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> - p->OR(dst, dst, src2);
> - if (w == 16) {
> - p->curr.quarterControl = 1;
> - p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
> - p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> - p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
> - p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
> - p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
> - }
> - p->pop();
> - break;
> - }
> default: NOT_IMPLEMENTED;
> }
> }
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 694ae98..cdca859 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -88,7 +88,9 @@ namespace gbe
> /*! Final Gen ISA emission helper functions */
> void emitLabelInstruction(const SelectionInstruction &insn);
> void emitUnaryInstruction(const SelectionInstruction &insn);
> + void emitUnaryWithTempInstruction(const SelectionInstruction
> + &insn);
> void emitBinaryInstruction(const SelectionInstruction &insn);
> + void emitBinaryWithTempInstruction(const SelectionInstruction
> + &insn);
> void emitTernaryInstruction(const SelectionInstruction &insn);
> void emitCompareInstruction(const SelectionInstruction &insn);
> void emitJumpInstruction(const SelectionInstruction &insn); diff
> --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index da8f2a2..77b1b43 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -1,7 +1,9 @@
> // Family Latency SIMD16 SIMD8
> DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
> DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
> +DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
> +DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index d40fbfe..5013eab 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -410,13 +410,17 @@ namespace gbe
>
> #define ALU1(OP) \
> INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
> +#define ALU1WithTemp(OP) \
> + INLINE void OP(Reg dst, Reg src, Reg temp) {
> +ALU1WithTemp(SEL_OP_##OP, dst, src, temp); }
> #define ALU2(OP) \
> INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP,
> dst, src0, src1); }
> +#define ALU2WithTemp(OP) \
> + INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) {
> +ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); }
> #define ALU3(OP) \
> INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
> ALU1(MOV)
> - ALU2(MOV_DF)
> - ALU2(LOAD_DF_IMM)
> + ALU1WithTemp(MOV_DF)
> + ALU1WithTemp(LOAD_DF_IMM)
> ALU1(LOAD_INT64_IMM)
> ALU1(RNDZ)
> ALU1(RNDE)
> @@ -435,8 +439,8 @@ namespace gbe
> ALU2(RSL)
> ALU2(ASR)
> ALU2(ADD)
> - ALU3(I64ADD)
> - ALU3(I64SUB)
> + ALU2WithTemp(I64ADD)
> + ALU2WithTemp(I64SUB)
> ALU2(MUL)
> ALU1(FRC)
> ALU1(RNDD)
> @@ -444,15 +448,17 @@ namespace gbe
> ALU2(MACH)
> ALU1(LZD)
> ALU3(MAD)
> - ALU3(MUL_HI)
> + ALU2WithTemp(MUL_HI)
> ALU1(FBH)
> ALU1(FBL)
> - ALU3(HADD)
> - ALU3(RHADD)
> + ALU2WithTemp(HADD)
> + ALU2WithTemp(RHADD)
> ALU2(UPSAMPLE_SHORT)
> ALU2(UPSAMPLE_INT)
> #undef ALU1
> +#undef ALU1WithTemp
> #undef ALU2
> +#undef ALU2WithTemp
> #undef ALU3
> /*! Encode a barrier instruction */
> void BARRIER(GenRegister src);
> @@ -494,8 +500,12 @@ namespace gbe
> void MATH(Reg dst, uint32_t function, Reg src);
> /*! Encode unary instructions */
> void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
> + /*! Encode unary with temp reg instructions */
> + void ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg
> + temp);
> /*! Encode binary instructions */
> void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1);
> + /*! Encode binary with temp reg instructions */
> + void ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg
> + src1, Reg temp);
> /*! Encode ternary instructions */
> void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
> /*! Encode sample instructions */ @@ -987,6 +997,13 @@ namespace
> gbe
> insn->src(0) = src;
> }
>
> + void Selection::Opaque::ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src, Reg temp) {
> + SelectionInstruction *insn = this->appendInsn(opcode, 2, 1);
> + insn->dst(0) = dst;
> + insn->src(0) = src;
> + insn->dst(1) = temp;
> + }
> +
> void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) {
> SelectionInstruction *insn = this->appendInsn(opcode, 1, 2);
> insn->dst(0) = dst;
> @@ -994,6 +1011,14 @@ namespace gbe
> insn->src(1) = src1;
> }
>
> + void Selection::Opaque::ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp) {
> + SelectionInstruction *insn = this->appendInsn(opcode, 2, 2);
> + insn->dst(0) = dst;
> + insn->src(0) = src0;
> + insn->src(1) = src1;
> + insn->dst(1) = temp;
> + }
> +
> void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) {
> SelectionInstruction *insn = this->appendInsn(opcode, 1, 3);
> insn->dst(0) = dst;
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index eeca9af..5660078 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -1,7 +1,7 @@
> DECL_SELECTION_IR(LABEL, LabelInstruction) DECL_SELECTION_IR(MOV,
> UnaryInstruction) -DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
> -DECL_SELECTION_IR(LOAD_DF_IMM, BinaryInstruction)
> +DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction)
> +DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction)
> DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction)
> DECL_SELECTION_IR(NOT, UnaryInstruction) DECL_SELECTION_IR(LZD,
> UnaryInstruction) @@ -24,8 +24,8 @@ DECL_SELECTION_IR(RSR,
> BinaryInstruction) DECL_SELECTION_IR(RSL, BinaryInstruction)
> DECL_SELECTION_IR(ASR, BinaryInstruction) DECL_SELECTION_IR(ADD,
> BinaryInstruction) -DECL_SELECTION_IR(I64ADD, TernaryInstruction)
> -DECL_SELECTION_IR(I64SUB, TernaryInstruction)
> +DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
> +DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
> DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(ATOMIC,
> AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) @@
> -51,10 +51,10 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
> DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
> DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
> DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
> -DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
> +DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
> DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL,
> UnaryInstruction) -DECL_SELECTION_IR(HADD, TernaryInstruction)
> -DECL_SELECTION_IR(RHADD, TernaryInstruction)
> +DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
> +DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
> DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
> DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list