[Beignet] [PATCH 3/8] GBE: extend registers/tuples/immediates to 32bit wide.

Tue Mar 31 19:05:38 PDT 2015

For some extremly large kernel, these values may be larger than
0xFFFF, we have to extend them to 32 bit.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 18 +++++++++---------
 backend/src/backend/gen_reg_allocation.cpp | 12 ++++++------
 backend/src/backend/gen_register.hpp       |  4 ++--
 backend/src/ir/immediate.hpp               |  2 +-
 backend/src/ir/instruction.cpp             |  7 ++++---
 backend/src/ir/instruction.hpp             |  4 ++--
 backend/src/ir/register.hpp                | 12 ++++++------
 7 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 0f5e496..5586468 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2001,7 +2001,7 @@ namespace gbe
                 if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL &&
                     dag->isUsed) {
                 sel.curr.physicalFlag = 0;
-                sel.curr.flagIndex = (uint16_t)(insn.getDst(0));
+                sel.curr.flagIndex = (uint32_t)(insn.getDst(0));
                 sel.curr.modFlag = 1;
               }
               sel.MOV(dst, src);
@@ -2209,7 +2209,7 @@ namespace gbe
                    insn.getOpcode() == OP_OR ||
                    insn.getOpcode() == OP_XOR);
         sel.curr.physicalFlag = 0;
-        sel.curr.flagIndex = (uint16_t)(insn.getDst(0));
+        sel.curr.flagIndex = (uint32_t)(insn.getDst(0));
         sel.curr.modFlag = 1;
       }
 
@@ -2782,7 +2782,7 @@ namespace gbe
           if (!sel.isScalarReg(insn.getDst(0)) && sel.regDAG[insn.getDst(0)]->isUsed) {
             sel.curr.modFlag = 1;
             sel.curr.physicalFlag = 0;
-            sel.curr.flagIndex = (uint16_t) insn.getDst(0);
+            sel.curr.flagIndex = (uint32_t) insn.getDst(0);
           }
           sel.MOV(dst, imm.getIntegerValue() ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
         break;
@@ -3042,7 +3042,7 @@ namespace gbe
           sel.curr.physicalFlag = 0;
           sel.curr.modFlag = 1;
           sel.curr.predicate = GEN_PREDICATE_NONE;
-          sel.curr.flagIndex = (uint16_t)alignedFlag;
+          sel.curr.flagIndex = (uint32_t)alignedFlag;
           sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shiftHReg), GenRegister::immuw(32));
         sel.pop();
 
@@ -3055,7 +3055,7 @@ namespace gbe
             // Only need to consider the tmpH when the addr is not aligned.
             sel.curr.modFlag = 0;
             sel.curr.physicalFlag = 0;
-            sel.curr.flagIndex = (uint16_t)alignedFlag;
+            sel.curr.flagIndex = (uint32_t)alignedFlag;
             sel.curr.predicate = GEN_PREDICATE_NORMAL;
             sel.SHL(tmpH, tmp[i + 1], shiftH);
             sel.OR(effectData[i], tmpL, tmpH);
@@ -3377,7 +3377,7 @@ namespace gbe
           sel.curr.noMask = 1;
         sel.curr.physicalFlag = 0;
         sel.curr.modFlag = 1;
-        sel.curr.flagIndex = (uint16_t)dst;
+        sel.curr.flagIndex = (uint32_t)dst;
         sel.curr.grfFlag = needStoreBool; // indicate whether we need to allocate grf to store this boolean.
         if (type == TYPE_S64 || type == TYPE_U64) {
           GenRegister tmp[3];
@@ -3791,7 +3791,7 @@ namespace gbe
         }
         sel.curr.inversePredicate ^= inverse;
         sel.curr.physicalFlag = 0;
-        sel.curr.flagIndex = (uint16_t) pred;
+        sel.curr.flagIndex = (uint32_t) pred;
         sel.curr.predicate = GEN_PREDICATE_NORMAL;
         // FIXME in general, if the flag is a uniform flag.
         // we should treat that flag as extern flag, as we
@@ -4204,7 +4204,7 @@ namespace gbe
           // as if there is no backward jump latter, then obviously everything will work fine.
           // If there is backward jump latter, then all the pcip will be updated correctly there.
           sel.curr.physicalFlag = 0;
-          sel.curr.flagIndex = (uint16_t) pred;
+          sel.curr.flagIndex = (uint32_t) pred;
           sel.curr.predicate = GEN_PREDICATE_NORMAL;
           sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
           sel.curr.predicate = GEN_PREDICATE_NONE;
@@ -4261,7 +4261,7 @@ namespace gbe
         GBE_ASSERT(jip == dst);
         sel.push();
           sel.curr.physicalFlag = 0;
-          sel.curr.flagIndex = (uint16_t) pred;
+          sel.curr.flagIndex = (uint32_t) pred;
           sel.curr.predicate = GEN_PREDICATE_NORMAL;
           sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
           sel.block->endifOffset = -1;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 26078e0..a5d601a 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -54,14 +54,14 @@ namespace gbe
   };
 
   typedef struct GenRegIntervalKey {
-    GenRegIntervalKey(uint16_t reg, int32_t maxID) {
-      key = ((uint64_t)maxID << 16) | reg;
+    GenRegIntervalKey(uint32_t reg, int32_t maxID) {
+      key = ((uint64_t)maxID << 32) | reg;
     }
     const ir::Register getReg() const {
-      return (ir::Register)(key & 0xFFFF);
+      return (ir::Register)(key & 0xFFFFFFFF);
     }
     int32_t getMaxID() const {
-      return key >> 16;
+      return key >> 32;
     }
     uint64_t key;
   } GenRegIntervalKey;
@@ -126,9 +126,9 @@ namespace gbe
     /*! Allocate the virtual boolean (== flags) registers */
     void allocateFlags(Selection &selection);
     /*! validated flags which contains valid value in the physical flag register */
-    set<uint16_t> validatedFlags;
+    set<uint32_t> validatedFlags;
     /*! validated temp flag register which indicate the flag 0,1 contains which virtual flag register. */
-    uint16_t validTempFlagReg;
+    uint32_t validTempFlagReg;
     /*! validate flag for the current flag user instruction */
     void validateFlag(Selection &selection, SelectionInstruction &insn);
     /*! Allocate the GRF registers */
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index d539937..e166af4 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -132,7 +132,6 @@ namespace gbe
     uint32_t physicalFlag:1; //!< Physical or virtual flag register
     uint32_t flag:1;         //!< Only if physical flag,
     uint32_t subFlag:1;      //!< Only if physical flag
-    uint32_t flagIndex:16;   //!< Only if virtual flag (index of the register)
     uint32_t grfFlag:1;      //!< Only if virtual flag, 0 means we do not need to allocate GRF.
     uint32_t externFlag:1;   //!< Only if virtual flag, 1 means this flag is from external BB.
     uint32_t modFlag:1;      //!< Only if virtual flag, 1 means will modify flag.
@@ -146,6 +145,7 @@ namespace gbe
     uint32_t predicate:4;
     uint32_t inversePredicate:1;
     uint32_t saturate:1;
+    uint32_t flagIndex;   //!< Only if virtual flag (index of the register)
     void chooseNib(int nib) {
       switch (nib) {
         case 0:
@@ -240,7 +240,7 @@ namespace gbe
       float f;
       int32_t d;
       uint32_t ud;
-      uint16_t reg;
+      uint32_t reg;
       int64_t i64;
     } value;
 
diff --git a/backend/src/ir/immediate.hpp b/backend/src/ir/immediate.hpp
index 10bd035..6b27e8b 100644
--- a/backend/src/ir/immediate.hpp
+++ b/backend/src/ir/immediate.hpp
@@ -345,7 +345,7 @@ namespace ir {
   }
 
   /*! A value is stored in a per-function vector. This is the index to it */
-  TYPE_SAFE(ImmediateIndex, uint16_t)
+  TYPE_SAFE(ImmediateIndex, uint32_t)
 
 } /* namespace ir */
 } /* namespace gbe */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 039f085..8bd19b6 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -741,7 +741,7 @@ namespace ir {
                                          const Function &fn,
                                          std::string &whyNot)
     {
-      if (UNLIKELY(uint16_t(ID) >= fn.regNum())) {
+      if (UNLIKELY(uint32_t(ID) >= fn.regNum())) {
         whyNot = "Out-of-bound destination register index";
         return false;
       }
@@ -885,8 +885,9 @@ namespace ir {
         return false;
       const RegisterFamily family = getFamily(this->type);
       for (uint32_t srcID = 0; srcID < 2; ++srcID)
-        if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false))
+        if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false)) {
           return false;
+        }
       return true;
     }
 
@@ -1283,7 +1284,7 @@ namespace ir {
   return HelperIntrospection<CLASS, RefClass>::value == 1;
 
 #define START_INTROSPECTION(CLASS) \
-  static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*2), \
+  static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*4), \
                 "Bad instruction size"); \
   static_assert(offsetof(internal::CLASS, opcode) == 0, \
                 "Bad opcode offset"); \
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 47312f5..37f64af 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -103,7 +103,7 @@ namespace ir {
   ///////////////////////////////////////////////////////////////////////////
 
   /*! Stores instruction internal data and opcode */
-  class ALIGNED(sizeof(uint64_t)*2) InstructionBase
+  class ALIGNED(sizeof(uint64_t)*4) InstructionBase
   {
   public:
     /*! Initialize the instruction from a 8 bytes stream */
@@ -117,7 +117,7 @@ namespace ir {
     /*! Get the instruction opcode */
     INLINE Opcode getOpcode(void) const { return opcode; }
   protected:
-    enum { opaqueSize = sizeof(uint64_t)*2-sizeof(uint8_t) };
+    enum { opaqueSize = sizeof(uint64_t)*4-sizeof(uint8_t) };
     Opcode opcode;               //!< Idendifies the instruction
     char opaque[opaqueSize];     //!< Remainder of it
     GBE_CLASS(InstructionBase);  //!< Use internal allocators
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index ce8bd60..be5f60d 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -111,7 +111,7 @@ namespace ir {
   /*! Register is the position of the index of the register data in the register
    *  file. We enforce type safety with this class
    */
-  TYPE_SAFE(Register, uint16_t)
+  TYPE_SAFE(Register, uint32_t)
   INLINE bool operator< (const Register &r0, const Register &r1) {
     return r0.value() < r1.value();
   }
@@ -119,7 +119,7 @@ namespace ir {
   /*! Tuple is the position of the first register in the tuple vector. We
    *  enforce type safety with this class
    */
-  TYPE_SAFE(Tuple, uint16_t)
+  TYPE_SAFE(Tuple, uint32_t)
 
   /*! A register file allocates and destroys registers. Basically, we will have
    *  one register file per function
@@ -131,7 +131,7 @@ namespace ir {
     INLINE Register append(RegisterFamily family, bool uniform = false) {
       GBE_ASSERTM(regNum() < MAX_INDEX,
                   "Too many defined registers (only 65535 are supported)");
-      const uint16_t index = regNum();
+      const uint32_t index = regNum();
       const RegisterData reg(family, uniform);
       regs.push_back(reg);
       return Register(index);
@@ -157,18 +157,18 @@ namespace ir {
     INLINE void setUniform(Register index, bool uniform) { regs[index].setUniform(uniform); }
     /*! Get the register index from the tuple */
     INLINE Register get(Tuple index, uint32_t which) const {
-      return regTuples[uint16_t(index) + which];
+      return regTuples[uint32_t(index) + which];
     }
     /*! Set the register index from the tuple */
     INLINE void set(Tuple index, uint32_t which, Register reg) {
-      regTuples[uint16_t(index) + which] = reg;
+      regTuples[uint32_t(index) + which] = reg;
     }
     /*! Number of registers in the register file */
     INLINE uint32_t regNum(void) const { return regs.size(); }
     /*! Number of tuples in the register file */
     INLINE uint32_t tupleNum(void) const { return regTuples.size(); }
     /*! register and tuple indices are short */
-    enum { MAX_INDEX = 0xffff }; 
+    enum { MAX_INDEX = 0xffffffff };
   private:
     vector<RegisterData> regs;   //!< All the registers together
     vector<Register> regTuples;  //!< Tuples are used for many src / dst
-- 
1.9.1