[Beignet] [PATCH] Define temporary reg as dest reg of instruction

Homer Hsing homer.xing at intel.com
Sun Aug 11 19:26:44 PDT 2013


I defined temporary reg as source reg of instruction.
But instruction scheduler looks source reg as read only reg.
So I define them as dest now.

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp                | 212 +++++++++++----------
 backend/src/backend/gen_context.hpp                |   2 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |   2 +
 backend/src/backend/gen_insn_selection.cpp         |  39 +++-
 backend/src/backend/gen_insn_selection.hxx         |  14 +-
 5 files changed, 153 insertions(+), 116 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 024da8a..b3650c5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -151,13 +151,121 @@ namespace gbe
     }
   }
 
+  void GenContext::emitUnaryWithTempInstruction(const SelectionInstruction &insn) {
+    GenRegister dst = ra->genReg(insn.dst(0));
+    GenRegister src = ra->genReg(insn.src(0));
+    GenRegister tmp = ra->genReg(insn.dst(1));
+    switch (insn.opcode) {
+      case SEL_OP_LOAD_DF_IMM:
+        p->LOAD_DF_IMM(dst, tmp, src.value.df);
+        break;
+      case SEL_OP_MOV_DF:
+        p->MOV_DF(dst, src, tmp);
+        break;
+      default:
+        NOT_IMPLEMENTED;
+    }
+  }
+
+  void GenContext::emitBinaryWithTempInstruction(const SelectionInstruction &insn) {
+    GenRegister dst = ra->genReg(insn.dst(0));
+    GenRegister src0 = ra->genReg(insn.src(0));
+    GenRegister src1 = ra->genReg(insn.src(1));
+    GenRegister tmp = ra->genReg(insn.dst(1));
+    switch (insn.opcode) {
+      case SEL_OP_I64ADD: {
+        GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
+                    y = GenRegister::suboffset(x, p->curr.execWidth);
+        loadBottomHalf(x, src0);
+        loadBottomHalf(y, src1);
+        addWithCarry(x, x, y);
+        storeBottomHalf(dst, x);
+        loadTopHalf(x, src0);
+        p->ADD(x, x, y);
+        loadTopHalf(y, src1);
+        p->ADD(x, x, y);
+        storeTopHalf(dst, x);
+        break;
+      }
+      case SEL_OP_I64SUB: {
+        GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
+                    y = GenRegister::suboffset(x, p->curr.execWidth);
+        loadBottomHalf(x, src0);
+        loadBottomHalf(y, src1);
+        subWithBorrow(x, x, y);
+        storeBottomHalf(dst, x);
+        loadTopHalf(x, src0);
+        subWithBorrow(x, x, y);
+        loadTopHalf(y, src1);
+        subWithBorrow(x, x, y);
+        storeTopHalf(dst, x);
+        break;
+      }
+      case SEL_OP_MUL_HI: {
+        int w = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        for (int i = 0; i < w / 8; i ++) {
+          p->push();
+          p->curr.predicate = GEN_PREDICATE_NONE;
+          p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
+          p->curr.accWrEnable = 1;
+          p->MACH(tmp, src0, src1);
+          p->pop();
+          p->curr.quarterControl = i;
+          p->MOV(dst, tmp);
+          dst = GenRegister::Qn(dst, 1);
+          src0 = GenRegister::Qn(src0, 1);
+          src1 = GenRegister::Qn(src1, 1);
+        } 
+        p->pop();
+        break;
+       }
+     case SEL_OP_HADD: {
+        int w = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        for (int i = 0; i < w / 8; i ++) {
+          p->curr.quarterControl = i;
+          p->ADDC(dst, src0, src1);
+          p->SHR(dst, dst, GenRegister::immud(1));
+          p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+          p->OR(dst, dst, tmp);
+          dst = GenRegister::Qn(dst, 1);
+          src0 = GenRegister::Qn(src0, 1);
+          src1 = GenRegister::Qn(src1, 1);
+        }
+        p->pop();
+        break;
+       }
+      case SEL_OP_RHADD: {
+        int w = p->curr.execWidth;
+        p->push();
+        p->curr.execWidth = 8;
+        for (int i = 0; i < w / 8; i ++) {
+          p->curr.quarterControl = i;
+          p->ADDC(dst, src0, src1);
+          p->ADD(dst, dst, GenRegister::immud(1));
+          p->SHR(dst, dst, GenRegister::immud(1));
+          p->SHL(tmp, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
+          p->OR(dst, dst, tmp);
+          dst = GenRegister::Qn(dst, 1);
+          src0 = GenRegister::Qn(src0, 1);
+          src1 = GenRegister::Qn(src1, 1);
+        }
+        p->pop();
+        break;
+       }
+      default:
+        NOT_IMPLEMENTED;
+    }
+  }
+
   void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
     const GenRegister src1 = ra->genReg(insn.src(1));
     switch (insn.opcode) {
-      case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
-      case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
       case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
       case SEL_OP_SEL_INT64:
         {
@@ -358,107 +466,7 @@ namespace gbe
     const GenRegister src1 = ra->genReg(insn.src(1));
     const GenRegister src2 = ra->genReg(insn.src(2));
     switch (insn.opcode) {
-      case SEL_OP_I64ADD:
-        {
-          GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
-                      y = GenRegister::suboffset(x, p->curr.execWidth);
-          loadBottomHalf(x, src0);
-          loadBottomHalf(y, src1);
-          addWithCarry(x, x, y);
-          storeBottomHalf(dst, x);
-          loadTopHalf(x, src0);
-          p->ADD(x, x, y);
-          loadTopHalf(y, src1);
-          p->ADD(x, x, y);
-          storeTopHalf(dst, x);
-        }
-        break;
-      case SEL_OP_I64SUB:
-        {
-          GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
-                      y = GenRegister::suboffset(x, p->curr.execWidth);
-          loadBottomHalf(x, src0);
-          loadBottomHalf(y, src1);
-          subWithBorrow(x, x, y);
-          storeBottomHalf(dst, x);
-          loadTopHalf(x, src0);
-          subWithBorrow(x, x, y);
-          loadTopHalf(y, src1);
-          subWithBorrow(x, x, y);
-          storeTopHalf(dst, x);
-        }
-        break;
-      case SEL_OP_MUL_HI:
-       {
-        int w = p->curr.execWidth;
-        p->push();
-        p->curr.execWidth = 8;
-        p->curr.quarterControl = 0;
-        p->push();
-        p->curr.predicate = GEN_PREDICATE_NONE;
-        p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
-        p->curr.accWrEnable = 1;
-        p->MACH(src2, src0, src1);
-        p->curr.accWrEnable = 0;
-        p->pop();
-        p->MOV(dst, src2);
-        if (w == 16) {
-          p->push();
-          p->curr.predicate = GEN_PREDICATE_NONE;
-          p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
-          p->curr.accWrEnable = 1;
-          p->MACH(src2, GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
-          p->curr.accWrEnable = 0;
-          p->pop();
-          p->curr.quarterControl = 1;
-          p->MOV(GenRegister::Qn(dst, 1), src2);
-        }
-        p->pop();
-        break;
-       }
       case SEL_OP_MAD:  p->MAD(dst, src0, src1, src2); break;
-      case SEL_OP_HADD:
-       {
-        int w = p->curr.execWidth;
-        p->push();
-        p->curr.execWidth = 8;
-        p->curr.quarterControl = 0;
-        p->ADDC(dst, src0, src1);
-        p->SHR(dst, dst, GenRegister::immud(1));
-        p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
-        p->OR(dst, dst, src2);
-        if (w == 16) {
-          p->curr.quarterControl = 1;
-          p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
-          p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
-          p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
-          p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
-        }
-        p->pop();
-        break;
-       }
-      case SEL_OP_RHADD:
-       {
-        int w = p->curr.execWidth;
-        p->push();
-        p->curr.execWidth = 8;
-        p->curr.quarterControl = 0;
-        p->ADDC(dst, src0, src1);
-        p->ADD(dst, dst, GenRegister::immud(1));
-        p->SHR(dst, dst, GenRegister::immud(1));
-        p->SHL(src2, GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
-        p->OR(dst, dst, src2);
-        if (w == 16) {
-          p->curr.quarterControl = 1;
-          p->ADDC(GenRegister::Qn(dst, 1), GenRegister::Qn(src0, 1), GenRegister::Qn(src1, 1));
-          p->ADD(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
-          p->SHR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::immud(1));
-          p->SHL(GenRegister::Qn(src2, 1), GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), GenRegister::immud(31));
-          p->OR(GenRegister::Qn(dst, 1), GenRegister::Qn(dst, 1), GenRegister::Qn(src2, 1));
-        }
-        p->pop();
-        break;
-       }
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 694ae98..cdca859 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -88,7 +88,9 @@ namespace gbe
     /*! Final Gen ISA emission helper functions */
     void emitLabelInstruction(const SelectionInstruction &insn);
     void emitUnaryInstruction(const SelectionInstruction &insn);
+    void emitUnaryWithTempInstruction(const SelectionInstruction &insn);
     void emitBinaryInstruction(const SelectionInstruction &insn);
+    void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
     void emitCompareInstruction(const SelectionInstruction &insn);
     void emitJumpInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index da8f2a2..77b1b43 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -1,7 +1,9 @@
 //                 Family     Latency     SIMD16     SIMD8
 DECL_GEN7_SCHEDULE(Label,           0,         0,        0)
 DECL_GEN7_SCHEDULE(Unary,           20,        4,        2)
+DECL_GEN7_SCHEDULE(UnaryWithTemp,   20,        4,        2)
 DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
+DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        4,        2)
 DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d40fbfe..5013eab 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -410,13 +410,17 @@ namespace gbe
 
 #define ALU1(OP) \
   INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); }
+#define ALU1WithTemp(OP) \
+  INLINE void OP(Reg dst, Reg src, Reg temp) { ALU1WithTemp(SEL_OP_##OP, dst, src, temp); }
 #define ALU2(OP) \
   INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(SEL_OP_##OP, dst, src0, src1); }
+#define ALU2WithTemp(OP) \
+  INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) { ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); }
 #define ALU3(OP) \
   INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
     ALU1(MOV)
-    ALU2(MOV_DF)
-    ALU2(LOAD_DF_IMM)
+    ALU1WithTemp(MOV_DF)
+    ALU1WithTemp(LOAD_DF_IMM)
     ALU1(LOAD_INT64_IMM)
     ALU1(RNDZ)
     ALU1(RNDE)
@@ -435,8 +439,8 @@ namespace gbe
     ALU2(RSL)
     ALU2(ASR)
     ALU2(ADD)
-    ALU3(I64ADD)
-    ALU3(I64SUB)
+    ALU2WithTemp(I64ADD)
+    ALU2WithTemp(I64SUB)
     ALU2(MUL)
     ALU1(FRC)
     ALU1(RNDD)
@@ -444,15 +448,17 @@ namespace gbe
     ALU2(MACH)
     ALU1(LZD)
     ALU3(MAD)
-    ALU3(MUL_HI)
+    ALU2WithTemp(MUL_HI)
     ALU1(FBH)
     ALU1(FBL)
-    ALU3(HADD)
-    ALU3(RHADD)
+    ALU2WithTemp(HADD)
+    ALU2WithTemp(RHADD)
     ALU2(UPSAMPLE_SHORT)
     ALU2(UPSAMPLE_INT)
 #undef ALU1
+#undef ALU1WithTemp
 #undef ALU2
+#undef ALU2WithTemp
 #undef ALU3
     /*! Encode a barrier instruction */
     void BARRIER(GenRegister src);
@@ -494,8 +500,12 @@ namespace gbe
     void MATH(Reg dst, uint32_t function, Reg src);
     /*! Encode unary instructions */
     void ALU1(SelectionOpcode opcode, Reg dst, Reg src);
+    /*! Encode unary with temp reg instructions */
+    void ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg temp);
     /*! Encode binary instructions */
     void ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1);
+    /*! Encode binary with temp reg instructions */
+    void ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp);
     /*! Encode ternary instructions */
     void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
     /*! Encode sample instructions */
@@ -987,6 +997,13 @@ namespace gbe
     insn->src(0) = src;
   }
 
+  void Selection::Opaque::ALU1WithTemp(SelectionOpcode opcode, Reg dst, Reg src, Reg temp) {
+    SelectionInstruction *insn = this->appendInsn(opcode, 2, 1);
+    insn->dst(0) = dst;
+    insn->src(0) = src;
+    insn->dst(1) = temp;
+  }
+
   void Selection::Opaque::ALU2(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1) {
     SelectionInstruction *insn = this->appendInsn(opcode, 1, 2);
     insn->dst(0) = dst;
@@ -994,6 +1011,14 @@ namespace gbe
     insn->src(1) = src1;
   }
 
+  void Selection::Opaque::ALU2WithTemp(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg temp) {
+    SelectionInstruction *insn = this->appendInsn(opcode, 2, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    insn->dst(1) = temp;
+  }
+
   void Selection::Opaque::ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2) {
     SelectionInstruction *insn = this->appendInsn(opcode, 1, 3);
     insn->dst(0) = dst;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index eeca9af..5660078 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -1,7 +1,7 @@
 DECL_SELECTION_IR(LABEL, LabelInstruction)
 DECL_SELECTION_IR(MOV, UnaryInstruction)
-DECL_SELECTION_IR(MOV_DF, BinaryInstruction)
-DECL_SELECTION_IR(LOAD_DF_IMM, BinaryInstruction)
+DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction)
+DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction)
 DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction)
 DECL_SELECTION_IR(NOT, UnaryInstruction)
 DECL_SELECTION_IR(LZD, UnaryInstruction)
@@ -24,8 +24,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
 DECL_SELECTION_IR(RSL, BinaryInstruction)
 DECL_SELECTION_IR(ASR, BinaryInstruction)
 DECL_SELECTION_IR(ADD, BinaryInstruction)
-DECL_SELECTION_IR(I64ADD, TernaryInstruction)
-DECL_SELECTION_IR(I64SUB, TernaryInstruction)
+DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
+DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
 DECL_SELECTION_IR(MUL, BinaryInstruction)
 DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
 DECL_SELECTION_IR(MACH, BinaryInstruction)
@@ -51,10 +51,10 @@ DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction)
 DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
 DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
 DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
-DECL_SELECTION_IR(MUL_HI, TernaryInstruction)
+DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
 DECL_SELECTION_IR(FBH, UnaryInstruction)
 DECL_SELECTION_IR(FBL, UnaryInstruction)
-DECL_SELECTION_IR(HADD, TernaryInstruction)
-DECL_SELECTION_IR(RHADD, TernaryInstruction)
+DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
+DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
 DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
 DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
-- 
1.8.1.2



More information about the Beignet mailing list