[Beignet] [PATCH] Define temporary reg as dest reg of instruction
Homer Hsing
homer.xing at intel.com
Sun Aug 11 19:26:44 PDT 2013
I defined temporary reg as source reg of instruction.
But instruction scheduler looks source reg as read only reg.
So I define them as dest now.
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 212 +++++++++++----------
backend/src/backend/gen_context.hpp | 2 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
backend/src/backend/gen_insn_selection.cpp | 39 +++-
backend/src/backend/gen_insn_selection.hxx | 14 +-
5 files changed, 153 insertions(+), 116 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 024da8a..b3650c5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -151,13 +151,121 @@ namespace gbe
}
}
+ void GenContext::emitUnaryWithTempInstruction(const SelectionInstruction &insn) {
+ GenRegister dst = ra->genReg(insn.dst(0));
+ GenRegister src = ra->genReg(insn.src(0));
+ GenRegister tmp = ra->genReg(insn.dst(1));
+ switch (insn.opcode) {
+ case SEL_OP_LOAD_DF_IMM:
+ p->LOAD_DF_IMM(dst, tmp, src.value.df);
+ break;
+ case SEL_OP_MOV_DF:
+ p->MOV_DF(dst, src, tmp);
+ break;
+ default:
+ NOT_IMPLEMENTED;
+ }
+ }
+
+ void GenContext::emitBinaryWithTempInstruction(const SelectionInstruction &insn) {
+ GenRegister dst = ra->genReg(insn.dst(0));
+ GenRegister src0 = ra->genReg(insn.src(0));
+ GenRegister src1 = ra->genReg(insn.src(1));
+ GenRegister tmp = ra->genReg(insn.dst(1));
+ switch (insn.opcode) {
+ case SEL_OP_I64ADD: {
+ GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
+ y = GenRegister::suboffset(x, p->curr.execWidth);
+ loadBottomHalf(x, src0);
+ loadBottomHalf(y, src1);
+ addWithCarry(x, x, y);
+ storeBottomHalf(dst, x);
+ loadTopHalf(x, src0);
+ p->ADD(x, x, y);
+ loadTopHalf(y, src1);
+ p->ADD(x, x, y);
+ storeTopHalf(dst, x);
+ break;
+ }
+ case SEL_OP_I64SUB: {
+ GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
+ y = GenRegister::suboffset(x, p->curr.execWidth);
+ loadBottomHalf(x, src0);
+ loadBottomHalf(y, src1);
+ subWithBorrow(x, x, y);
+ storeBottomHalf(dst, x);
+ loadTopHalf(x, src0);
+ subWithBorrow(x, x, y);
+ loadTopHalf(y, src1);
+ subWithBorrow(x, x, y);
+ storeTopHalf(dst, x);
+ break;
+ }
+ case SEL_OP_MUL_HI: {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ for (int i = 0; i < w / 8; i ++) {
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
+ p->curr.accWrEnable = 1;
+ p->MACH(tmp, src0, src1);
+ p->pop();
+ p->curr.quarterControl = i;
+ p->MOV(dst, tmp);
+ dst = GenRegister::Qn(dst, 1);
+ src0 = GenRegister::Qn(src0, 1);
+ src1 = GenRegister::Qn(src1, 1);
+ }
+ p->pop();
+ break;
+ }
+ case SEL_OP_HADD: {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ for (int i = 0; i < w / 8; i ++) {
+ p->curr.quarterControl = i;
+ p->ADDC(dst, src0, src1);
+ p->SHR(dst, dst, GenRegister::immud(1));
+ p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(dst, dst, tmp);
+ dst = GenRegister::Qn(dst, 1);
+ src0 = GenRegister::Qn(src0, 1);
+ src1 = GenRegister::Qn(src1, 1);
+ }
+ p->pop();
+ break;
+ }
+ case SEL_OP_RHADD: {
+ int w = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ for (int i = 0; i < w / 8; i ++) {
+ p->curr.quarterControl = i;
+ p->ADDC(dst, src0, src1);
+ p->ADD(dst, dst, GenRegister::immud(1));
+ p->SHR(dst, dst, GenRegister::immud(1));
+ p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+ p->OR(dst, dst, tmp);
+ dst = GenRegister::Qn(dst, 1);
+ src0 = GenRegister::Qn(src0, 1);
+ src1 = GenRegister::Qn(src1, 1);
+ }
+ p->pop();
+ break;
+ }
+ default:
+ NOT_IMPLEMENTED;
+ }
+ }
+
void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
switch (insn.opcode) {
- case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
- case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
case SEL_OP_SEL_INT64:
{
@@ -358,107 +466,7 @@ namespace gbe
const GenRegister src1 = ra->genReg(insn.src(1));
const GenRegister src2 = ra->genReg(insn.src(2));
switch (insn.opcode) {
- case SEL_OP_I64ADD:
- {
- GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
- y = GenRegister::suboffset(x, p->curr.execWidth);
- loadBottomHalf(x, src0);
- loadBottomHalf(y, src1);
- addWithCarry(x, x, y);
- storeBottomHalf(dst, x);
- loadTopHalf(x, src0);
- p->ADD(x, x, y);
- loadTopHalf(y, src1);
- p->ADD(x, x, y);
- storeTopHalf(dst, x);
- }
- break;
- case SEL_OP_I64SUB:
- {
- GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
- y = GenRegister::suboffset(x, p->curr.execWidth);
- loadBottomHalf(x, src0);
- loadBottomHalf(y, src1);
- subWithBorrow(x, x, y);
- storeBottomHalf(dst, x);
- loadTopHalf(x, src0);
- subWithBorrow(x, x, y);
- loadTopHalf(y, src1);
- subWithBorrow(x, x, y);
- storeTopHalf(dst, x);
- }
- break;
- case SEL_OP_MUL_HI:
- {
- int w = p->curr.execWidth;
- p->push();
- p->curr.execWidth = 8;
- p->curr.quarterControl = 0;
- p->push();
- p->curr.predicate = GEN_PREDICATE_NONE;
- p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
- p->curr.accWrEnable = 1;
- p->MACH(src2, src0, src1);
- p->curr.accWrEnable = 0;
- p->pop();
- p->MOV(dst, src2);
- if (w == 16) {
- p->push();
- p->curr.predicate = GEN_PREDICATE_NONE;
- p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
- p->curr.accWrEnable = 1;
- p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
- p->curr.accWrEnable = 0;
- p->pop();
- p->curr.quarterControl = 1;
- p->MOV(GenRegister::Qn(dst, 1), src2);
- }
- p->pop();
- break;
- }
case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break;
- case SEL_OP_HADD:
- {
- int w = p->curr.execWidth;
- p->push();
- p->curr.execWidth = 8;
- p->curr.quarterControl = 0;
- p->ADDC(dst, src0, src1);
- p->SHR(dst, dst, GenRegister::immud(1));
- p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
- p->OR(dst, dst, src2);
- if (w == 16) {
- p->curr.quarterControl = 1;
- p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
- p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
- p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
- p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
- }
- p->pop();
- break;
- }
- case SEL_OP_RHADD:
- {
- int w = p->curr.execWidth;
- p->push();
- p->curr.execWidth = 8;
- p->curr.quarterControl = 0;
- p->ADDC(dst, src0, src1);
- p->ADD(dst, dst, GenRegister::immud(1));
- p->SHR(dst, dst, GenRegister::immud(1));
- p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
- p->OR(dst, dst, src2);
- if (w == 16) {
- p->curr.quarterControl = 1;
- p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
- p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
- p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
- p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
- p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
- }
- p->pop();
- break;
- }
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 694ae98..cdca859 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -88,7 +88,9 @@ namespace gbe
/*! Final Gen ISA emission helper functions */
void emitLabelInstruction(const SelectionInstruction &insn);
void emitUnaryInstruction(const SelectionInstruction &insn);
+ void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
void emitBinaryInstruction(const SelectionInstruction &insn);
+ void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index da8f2a2..77b1b43 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -1,7 +1,9 @@
// Family Latency SIMD16 SIMD8
DECL_GEN7_SCHEDULE(Label, 0, 0, 0)
DECL_GEN7_SCHEDULE(Unary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 4, 2)
DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
+DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2)
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d40fbfe..5013eab 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -410,13 +410,17 @@ namespace gbe
#define ALU1(OP) \
INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
+#define ALU1WithTemp(OP) \
+ INLINE void OP(Reg dst, Reg src, Reg temp) { ALU1WithTemp(SEL_OP_##OP, dst, src, temp); }
#define ALU2(OP) \
INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP, dst, src0, src1); }
+#define ALU2WithTemp(OP) \
+ INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) { ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); }
#define ALU3(OP) \
INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
ALU1(MOV)
- ALU2(MOV_DF)
- ALU2(LOAD_DF_IMM)
+ ALU1WithTemp(MOV_DF)
+ ALU1WithTemp(LOAD_DF_IMM)
ALU1(LOAD_INT64_IMM)
ALU1(RNDZ)
ALU1(RNDE)
@@ -435,8 +439,8 @@ namespace gbe
ALU2(RSL)
ALU2(ASR)
ALU2(ADD)
- ALU3(I64ADD)
- ALU3(I64SUB)
+ ALU2WithTemp(I64ADD)
+ ALU2WithTemp(I64SUB)
ALU2(MUL)
ALU1(FRC)
ALU1(RNDD)
@@ -444,15 +448,17 @@ namespace gbe
ALU2(MACH)
ALU1(LZD)
ALU3(MAD)
- ALU3(MUL_HI)
+ ALU2WithTemp(MUL_HI)
ALU1(FBH)
ALU1(FBL)
- ALU3(HADD)
- ALU3(RHADD)
+ ALU2WithTemp(HADD)
+ ALU2WithTemp(RHADD)
ALU2(UPSAMPLE_SHORT)
ALU2(UPSAMPLE_INT)
#undef ALU1
+#undef ALU1WithTemp
#undef ALU2
+#undef ALU2WithTemp
#undef ALU3
/*! Encode a barrier instruction */
void BARRIER(GenRegister src);
@@ -494,8 +500,12 @@ namespace gbe
void MATH(Reg dst, uint32_t function, Reg src);
/*! Encode unary instructions */
void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
+ /*! Encode unary with temp reg instructions */
+ void ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg temp);
/*! Encode binary instructions */
void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1);
+ /*! Encode binary with temp reg instructions */
+ void ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp);
/*! Encode ternary instructions */
void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
/*! Encode sample instructions */
@@ -987,6 +997,13 @@ namespace gbe
insn->src(0) = src;
}
+ void Selection::Opaque::ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src, Reg temp) {
+ SelectionInstruction *insn = this->appendInsn(opcode, 2, 1);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ insn->dst(1) = temp;
+ }
+
void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) {
SelectionInstruction *insn = this->appendInsn(opcode, 1, 2);
insn->dst(0) = dst;
@@ -994,6 +1011,14 @@ namespace gbe
insn->src(1) = src1;
}
+ void Selection::Opaque::ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp) {
+ SelectionInstruction *insn = this->appendInsn(opcode, 2, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ insn->dst(1) = temp;
+ }
+
void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) {
SelectionInstruction *insn = this->appendInsn(opcode, 1, 3);
insn->dst(0) = dst;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index eeca9af..5660078 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -1,7 +1,7 @@
DECL_SELECTION_IR(LABEL, LabelInstruction)
DECL_SELECTION_IR(MOV, UnaryInstruction)
-DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
-DECL_SELECTION_IR(LOAD_DF_IMM, BinaryInstruction)
+DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction)
+DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction)
DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction)
DECL_SELECTION_IR(NOT, UnaryInstruction)
DECL_SELECTION_IR(LZD, UnaryInstruction)
@@ -24,8 +24,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
DECL_SELECTION_IR(RSL, BinaryInstruction)
DECL_SELECTION_IR(ASR, BinaryInstruction)
DECL_SELECTION_IR(ADD, BinaryInstruction)
-DECL_SELECTION_IR(I64ADD, TernaryInstruction)
-DECL_SELECTION_IR(I64SUB, TernaryInstruction)
+DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
+DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
DECL_SELECTION_IR(MUL, BinaryInstruction)
DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
DECL_SELECTION_IR(MACH, BinaryInstruction)
@@ -51,10 +51,10 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
-DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
+DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
DECL_SELECTION_IR(FBH, UnaryInstruction)
DECL_SELECTION_IR(FBL, UnaryInstruction)
-DECL_SELECTION_IR(HADD, TernaryInstruction)
-DECL_SELECTION_IR(RHADD, TernaryInstruction)
+DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
+DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
--
1.8.1.2
More information about the Beignet
mailing list