[Beignet] [PATCH 1/2] support 64bit-integer shifting

Homer Hsing homer.xing at intel.com
Mon Aug 12 01:45:12 PDT 2013


support left-shifting (<<), right-shifting (>>),
and arithmetic right-shifting (>>).
v2: define temp reg as dest reg of instructions

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp                | 113 +++++++++++++++++++++
 backend/src/backend/gen_context.hpp                |   3 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |   1 +
 backend/src/backend/gen_insn_selection.cpp         |  47 ++++++++-
 backend/src/backend/gen_insn_selection.hxx         |   3 +
 5 files changed, 164 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e22a4c5..406cb80 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -385,6 +385,119 @@ namespace gbe
     }
   }
 
+  void GenContext::collectShifter(GenRegister dest, GenRegister src) {
+    int execWidth = p->curr.execWidth;
+    p->push();
+    p->curr.predicate = GEN_PREDICATE_NONE;
+    p->curr.execWidth = 8;
+    for (int nib = 0; nib < execWidth / 4; nib ++) {
+      p->AND(dest, src.bottom_half(), GenRegister::immud(63));
+      dest = GenRegister::suboffset(dest, 4);
+      src = GenRegister::suboffset(src, 4);
+    }
+    p->pop();
+  }
+
+  void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) {
+    GenRegister dest = ra->genReg(insn.dst(0));
+    GenRegister x = ra->genReg(insn.src(0));
+    GenRegister y = ra->genReg(insn.src(1));
+    GenRegister a = ra->genReg(insn.dst(1));
+    GenRegister b = ra->genReg(insn.dst(2));
+    GenRegister c = ra->genReg(insn.dst(3));
+    GenRegister d = ra->genReg(insn.dst(4));
+    GenRegister e = ra->genReg(insn.dst(5));
+    GenRegister f = ra->genReg(insn.dst(6));
+    a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD;
+    GenRegister zero = GenRegister::immud(0);
+    switch(insn.opcode) {
+      case SEL_OP_I64SHL:
+        p->push();
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        collectShifter(a, y);
+        loadBottomHalf(e, x);
+        loadTopHalf(f, x);
+        p->SHR(b, e, GenRegister::negate(a));
+        p->SHL(c, e, a);
+        p->SHL(d, f, a);
+        p->OR(e, d, b);
+        p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+        p->curr.predicate = GEN_PREDICATE_NORMAL;
+        p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+        p->CMP(GEN_CONDITIONAL_Z, a, zero);
+        p->SEL(d, d, e);
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        p->AND(a, a, GenRegister::immud(32));
+        p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+        p->curr.predicate = GEN_PREDICATE_NORMAL;
+        p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+        p->CMP(GEN_CONDITIONAL_Z, a, zero);
+        p->SEL(d, d, c);
+        p->SEL(c, c, zero);
+        p->pop();
+        storeBottomHalf(dest, c);
+        storeTopHalf(dest, d);
+        break;
+      case SEL_OP_I64SHR:
+        p->push();
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        collectShifter(a, y);
+        loadBottomHalf(e, x);
+        loadTopHalf(f, x);
+        p->SHL(b, f, GenRegister::negate(a));
+        p->SHR(c, f, a);
+        p->SHR(d, e, a);
+        p->OR(e, d, b);
+        p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+        p->curr.predicate = GEN_PREDICATE_NORMAL;
+        p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+        p->CMP(GEN_CONDITIONAL_Z, a, zero);
+        p->SEL(d, d, e);
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        p->AND(a, a, GenRegister::immud(32));
+        p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+        p->curr.predicate = GEN_PREDICATE_NORMAL;
+        p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+        p->CMP(GEN_CONDITIONAL_Z, a, zero);
+        p->SEL(d, d, c);
+        p->SEL(c, c, zero);
+        p->pop();
+        storeBottomHalf(dest, d);
+        storeTopHalf(dest, c);
+        break;
+      case SEL_OP_I64ASR:
+        f.type = GEN_TYPE_D;
+        p->push();
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        collectShifter(a, y);
+        loadBottomHalf(e, x);
+        loadTopHalf(f, x);
+        p->SHL(b, f, GenRegister::negate(a));
+        p->ASR(c, f, a);
+        p->SHR(d, e, a);
+        p->OR(e, d, b);
+        p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+        p->curr.predicate = GEN_PREDICATE_NORMAL;
+        p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+        p->CMP(GEN_CONDITIONAL_Z, a, zero);
+        p->SEL(d, d, e);
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        p->AND(a, a, GenRegister::immud(32));
+        p->MOV(GenRegister::flag(1, 1), GenRegister::immuw(0xFFFF));
+        p->curr.predicate = GEN_PREDICATE_NORMAL;
+        p->curr.physicalFlag = 1, p->curr.flag = 1, p->curr.subFlag = 1;
+        p->CMP(GEN_CONDITIONAL_Z, a, zero);
+        p->SEL(d, d, c);
+        p->SEL(c, c, GenRegister::immd(-1));
+        p->pop();
+        storeBottomHalf(dest, d);
+        storeTopHalf(dest, c);
+        break;
+      default:
+        NOT_IMPLEMENTED;
+    }
+  }
+
   void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
     int execWidth = p->curr.execWidth;
     src = src.top_half();
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index cdca859..b294042 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -76,6 +76,8 @@ namespace gbe
       return this->liveness->getLiveOut(bb);
     }
 
+    void collectShifter(GenRegister dest, GenRegister src);
+    
     void loadTopHalf(GenRegister dest, GenRegister src);
     void storeTopHalf(GenRegister dest, GenRegister src);
 
@@ -92,6 +94,7 @@ namespace gbe
     void emitBinaryInstruction(const SelectionInstruction &insn);
     void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
     void emitTernaryInstruction(const SelectionInstruction &insn);
+    void emitI64ShiftInstruction(const SelectionInstruction &insn);
     void emitCompareInstruction(const SelectionInstruction &insn);
     void emitJumpInstruction(const SelectionInstruction &insn);
     void emitIndirectMoveInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 77b1b43..4879b66 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -5,6 +5,7 @@ DECL_GEN7_SCHEDULE(UnaryWithTemp,   20,        4,        2)
 DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
 DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        4,        2)
 DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
+DECL_GEN7_SCHEDULE(I64Shift,        20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
 DECL_GEN7_SCHEDULE(IndirectMove,    20,        2,        2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 46da37f..929a3bd 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -418,6 +418,8 @@ namespace gbe
   INLINE void OP(Reg dst, Reg src0, Reg src1, Reg temp) { ALU2WithTemp(SEL_OP_##OP, dst, src0, src1, temp); }
 #define ALU3(OP) \
   INLINE void OP(Reg dst, Reg src0, Reg src1, Reg src2) { ALU3(SEL_OP_##OP, dst, src0, src1, src2); }
+#define I64Shift(OP) \
+  INLINE void OP(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { I64Shift(SEL_OP_##OP, dst, src0, src1, tmp); }
     ALU1(MOV)
     ALU1WithTemp(MOV_DF)
     ALU1WithTemp(LOAD_DF_IMM)
@@ -456,11 +458,17 @@ namespace gbe
     ALU2(UPSAMPLE_SHORT)
     ALU2(UPSAMPLE_INT)
     ALU1WithTemp(CONVI_TO_I64)
+    I64Shift(I64SHL)
+    I64Shift(I64SHR)
+    I64Shift(I64ASR)
 #undef ALU1
 #undef ALU1WithTemp
 #undef ALU2
 #undef ALU2WithTemp
 #undef ALU3
+#undef I64Shift
+    /*! Shift a 64-bit integer */
+    void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
     /*! Encode a barrier instruction */
     void BARRIER(GenRegister src);
     /*! Encode a barrier instruction */
@@ -1031,6 +1039,15 @@ namespace gbe
     insn->src(2) = src2;
   }
 
+  void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
+    SelectionInstruction *insn = this->appendInsn(opcode, 7, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    for(int i = 0; i < 6; i ++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
   // Boiler plate to initialize the selection library at c++ pre-main
   static SelectionLibrary *selLib = NULL;
   static void destroySelectionLibrary(void) { GBE_DELETE(selLib); }
@@ -1557,9 +1574,33 @@ namespace gbe
             sel.ADD(dst, src0, GenRegister::negate(src1));
           sel.pop();
           break;
-        case OP_SHL: sel.SHL(dst, src0, src1); break;
-        case OP_SHR: sel.SHR(dst, src0, src1); break;
-        case OP_ASR: sel.ASR(dst, src0, src1); break;
+        case OP_SHL:
+          if (type == TYPE_S64 || type == TYPE_U64) {
+            GenRegister tmp[6];
+            for(int i = 0; i < 6; i ++)
+              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+            sel.I64SHL(dst, src0, src1, tmp);
+          } else
+            sel.SHL(dst, src0, src1);
+          break;
+        case OP_SHR:
+          if (type == TYPE_S64 || type == TYPE_U64) {
+            GenRegister tmp[6];
+            for(int i = 0; i < 6; i ++)
+              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+            sel.I64SHR(dst, src0, src1, tmp);
+          } else
+            sel.SHR(dst, src0, src1);
+          break;
+        case OP_ASR:
+          if (type == TYPE_S64 || type == TYPE_U64) {
+            GenRegister tmp[6];
+            for(int i = 0; i < 6; i ++)
+              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+            sel.I64ASR(dst, src0, src1, tmp);
+          } else
+            sel.ASR(dst, src0, src1);
+          break;
         case OP_MUL_HI: {
             GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_UD);
             sel.MUL_HI(dst, src0, src1, temp);
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 9e24dd9..06469ca 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -23,6 +23,9 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)
 DECL_SELECTION_IR(RSR, BinaryInstruction)
 DECL_SELECTION_IR(RSL, BinaryInstruction)
 DECL_SELECTION_IR(ASR, BinaryInstruction)
+DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)
+DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
+DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
 DECL_SELECTION_IR(ADD, BinaryInstruction)
 DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
 DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
-- 
1.8.1.2



More information about the Beignet mailing list