[Beignet] [PATCH 03/18] GBE: refine the "scalar" register handling.

Zhigang Gong zhigang.gong at intel.com
Fri Mar 28 00:10:41 PDT 2014


The scalar register's actual meaning should be uniform register.
A non-uniform register is a varying register. For further
uniform analysis and bool data optimization, this patch
make the uniform as a new register data attribute. We
can set each new created register as an uniform or varying
register.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/context.cpp            | 30 ---------------
 backend/src/backend/context.hpp            |  2 -
 backend/src/backend/gen_context.cpp        |  4 +-
 backend/src/backend/gen_insn_selection.cpp | 45 +++++++++++++++++++++-
 backend/src/backend/gen_insn_selection.hpp |  2 +
 backend/src/backend/gen_reg_allocation.cpp |  2 +-
 backend/src/ir/context.cpp                 |  4 +-
 backend/src/ir/context.hpp                 |  2 +-
 backend/src/ir/function.hpp                |  4 +-
 backend/src/ir/lowering.cpp                |  3 +-
 backend/src/ir/profile.cpp                 | 62 +++++++++++++++---------------
 backend/src/ir/register.hpp                | 13 +++++--
 backend/src/llvm/llvm_gen_backend.cpp      | 27 +++++++------
 13 files changed, 108 insertions(+), 92 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index b806586..b0402b9 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -686,35 +686,5 @@ namespace gbe
     kernel->slmSize = fn.getSLMSize();
   }
 
-  bool Context::isScalarReg(const ir::Register &reg) const {
-    GBE_ASSERT(fn.getProfile() == ir::Profile::PROFILE_OCL);
-    if (fn.getArg(reg) != NULL) return true;
-    if (fn.getPushLocation(reg) != NULL) return true;
-    if (reg == ir::ocl::groupid0  ||
-        reg == ir::ocl::groupid1  ||
-        reg == ir::ocl::groupid2  ||
-        reg == ir::ocl::barrierid ||
-        reg == ir::ocl::threadn   ||
-        reg == ir::ocl::numgroup0 ||
-        reg == ir::ocl::numgroup1 ||
-        reg == ir::ocl::numgroup2 ||
-        reg == ir::ocl::lsize0    ||
-        reg == ir::ocl::lsize1    ||
-        reg == ir::ocl::lsize2    ||
-        reg == ir::ocl::gsize0    ||
-        reg == ir::ocl::gsize1    ||
-        reg == ir::ocl::gsize2    ||
-        reg == ir::ocl::goffset0  ||
-        reg == ir::ocl::goffset1  ||
-        reg == ir::ocl::goffset2  ||
-        reg == ir::ocl::workdim   ||
-        reg == ir::ocl::emask     ||
-        reg == ir::ocl::notemask  ||
-        reg == ir::ocl::barriermask
-      )
-      return true;
-    return false;
-  }
-
 } /* namespace gbe */
 
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index ac940bd..384a2fb 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -68,8 +68,6 @@ namespace gbe
     INLINE const ir::Liveness &getLiveness(void) const { return *liveness; }
     /*! Tells if the register is used */
     bool isRegUsed(const ir::Register &reg) const;
-    /*! Indicate if a register is scalar or not */
-    bool isScalarReg(const ir::Register &reg) const;
     /*! Get the kernel we are currently compiling */
     INLINE Kernel *getKernel(void) const { return this->kernel; }
     /*! Get the function we are currently compiling */
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 9689ac5..f6848b2 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1609,7 +1609,7 @@ namespace gbe
 
   void GenContext::emitIndirectMoveInstruction(const SelectionInstruction &insn) {
     GenRegister src = ra->genReg(insn.src(0));
-    if(isScalarReg(src.reg()))
+    if(sel->isScalarReg(src.reg()))
       src = GenRegister::retype(src, GEN_TYPE_UW);
     else
       src = GenRegister::unpacked_uw(src.nr, src.subnr / typeSize(GEN_TYPE_UW));
@@ -1751,7 +1751,7 @@ namespace gbe
     const GenRegister data = ra->genReg(insn.src(1));
     const uint32_t bti = insn.extra.function;
     p->MOV(src, addr);
-    p->WRITE64(src, data, bti, elemNum, isScalarReg(data.reg()));
+    p->WRITE64(src, data, bti, elemNum, sel->isScalarReg(data.reg()));
   }
 
   void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 663ca64..d0b87cd 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -318,6 +318,43 @@ namespace gbe
     INLINE ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
     /*! spill a register (insert spill/unspill instructions) */
     INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
+    /*! indicate whether a register is a scalar/uniform register. */
+    INLINE bool isScalarReg(const ir::Register &reg) const {
+#if 0
+      printf("reg %d ", reg.value());
+      printf("uniform: %d ", getRegisterData(reg).isUniform());
+      if (ctx.getFunction().getArg(reg) != NULL) { printf("true function arg\n"); return true; }
+      if (ctx.getFunction().getPushLocation(reg) != NULL) { printf("true push location.\n"); return true; }
+      if (reg == ir::ocl::groupid0  ||
+          reg == ir::ocl::groupid1  ||
+          reg == ir::ocl::groupid2  ||
+          reg == ir::ocl::barrierid ||
+          reg == ir::ocl::threadn   ||
+          reg == ir::ocl::numgroup0 ||
+          reg == ir::ocl::numgroup1 ||
+          reg == ir::ocl::numgroup2 ||
+          reg == ir::ocl::lsize0    ||
+          reg == ir::ocl::lsize1    ||
+          reg == ir::ocl::lsize2    ||
+          reg == ir::ocl::gsize0    ||
+          reg == ir::ocl::gsize1    ||
+          reg == ir::ocl::gsize2    ||
+          reg == ir::ocl::goffset0  ||
+          reg == ir::ocl::goffset1  ||
+          reg == ir::ocl::goffset2  ||
+          reg == ir::ocl::workdim   ||
+          reg == ir::ocl::emask     ||
+          reg == ir::ocl::notemask  ||
+          reg == ir::ocl::barriermask
+        ) {
+        printf("special reg.\n");
+        return true;
+      }
+      return false;
+#endif
+      const ir::RegisterData &regData = getRegisterData(reg);
+      return regData.isUniform();
+    }
     /*! Implement public class */
     INLINE uint32_t getRegNum(void) const { return file.regNum(); }
     /*! Implements public interface */
@@ -856,7 +893,7 @@ namespace gbe
   }
 
   bool Selection::Opaque::isScalarOrBool(ir::Register reg) const {
-    if (ctx.isScalarReg(reg))
+    if (isScalarReg(reg))
       return true;
     else {
       const ir::RegisterFamily family = file.get(reg).family;
@@ -1530,6 +1567,10 @@ namespace gbe
     return this->opaque->spillRegs(spilledRegs, registerPool);
   }
 
+  bool Selection::isScalarReg(const ir::Register &reg) const {
+    return this->opaque->isScalarReg(reg);
+  }
+
   SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) {
     return this->opaque->create(opcode, dstNum, srcNum);
   }
@@ -2497,7 +2538,7 @@ namespace gbe
                  insn.getAddressSpace() == MEM_CONSTANT ||
                  insn.getAddressSpace() == MEM_PRIVATE ||
                  insn.getAddressSpace() == MEM_LOCAL);
-      GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
+      GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false);
       const Type type = insn.getValueType();
       const uint32_t elemSize = getByteScatterGatherSize(type);
       if (insn.getAddressSpace() == MEM_CONSTANT) {
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index f4f3c57..0fde1df 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -216,6 +216,8 @@ namespace gbe
     ir::Register replaceDst(SelectionInstruction *insn, uint32_t regID);
     /*! spill a register (insert spill/unspill instructions) */
     bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool);
+    /*! Indicate if a register is scalar or not */
+    bool isScalarReg(const ir::Register &reg) const;
     /*! Create a new selection instruction */
     SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum);
     /*! List of emitted blocks */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f28bf9a..f446a5b 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -223,7 +223,7 @@ namespace gbe
     const ir::Register reg = interval.reg;
     if (RA.contains(reg) == true)
       return true; // already allocated
-    GBE_ASSERT(ctx.isScalarReg(reg) == false);
+    GBE_ASSERT(ctx.sel->isScalarReg(reg) == false);
     uint32_t regSize;
     ir::RegisterFamily family;
     getRegAttrib(reg, regSize, &family);
diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
index 0fd6803..f35dec2 100644
--- a/backend/src/ir/context.cpp
+++ b/backend/src/ir/context.cpp
@@ -92,9 +92,9 @@ namespace ir {
     usedLabels = elem.usedLabels;
   }
 
-  Register Context::reg(RegisterFamily family) {
+  Register Context::reg(RegisterFamily family, bool uniform) {
     GBE_ASSERTM(fn != NULL, "No function currently defined");
-    return fn->newRegister(family);
+    return fn->newRegister(family, uniform);
   }
 
   LabelIndex Context::label(void) {
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index adeaf6f..3c4ff97 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -61,7 +61,7 @@ namespace ir {
     /*! Append a new pushed constant */
     void appendPushedConstant(Register reg, const PushLocation &pushed);
     /*! Create a new register with the given family for the current function */
-    Register reg(RegisterFamily family);
+    Register reg(RegisterFamily family, bool uniform = false);
     /*! Create a new immediate value */
     template <typename T> INLINE ImmediateIndex newImmediate(T value) {
       const Immediate imm(value);
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index ad4773e..f142062 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -164,8 +164,8 @@ namespace ir {
     /*! Get the function profile */
     INLINE Profile getProfile(void) const { return profile; }
     /*! Get a new valid register */
-    INLINE Register newRegister(RegisterFamily family) {
-      return this->file.append(family);
+    INLINE Register newRegister(RegisterFamily family, bool uniform = false) {
+      return this->file.append(family, uniform);
     }
     /*! Get the function name */
     const std::string &getName(void) const { return name; }
diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp
index ad1ea32..8042711 100644
--- a/backend/src/ir/lowering.cpp
+++ b/backend/src/ir/lowering.cpp
@@ -239,7 +239,8 @@ namespace ir {
           if(inserted.contains(argLocation)) {
             pushed = argLocation.getRegister();
           } else {
-            pushed = fn->newRegister(family);
+            // pushed register should be uniform register.
+            pushed = fn->newRegister(family, true);
             this->appendPushedConstant(pushed, argLocation);
             inserted.insert(argLocation);
           }
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 708bc4d..0a64d81 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -45,42 +45,42 @@ namespace ir {
     };
 
 #if GBE_DEBUG
-#define DECL_NEW_REG(FAMILY, REG) \
-   r = fn.newRegister(FAMILY_DWORD); \
+#define DECL_NEW_REG(FAMILY, REG, UNIFORM) \
+   r = fn.newRegister(FAMILY_DWORD, UNIFORM); \
    GBE_ASSERT(r == REG);
 #else
-#define DECL_NEW_REG(FAMILY, REG) \
-   fn.newRegister(FAMILY_DWORD);
+#define DECL_NEW_REG(FAMILY, REG, UNIFORM) \
+   fn.newRegister(FAMILY_DWORD, UNIFORM);
 #endif /* GBE_DEBUG */
     static void init(Function &fn) {
       IF_DEBUG(Register r);
-      DECL_NEW_REG(FAMILY_DWORD, lid0);
-      DECL_NEW_REG(FAMILY_DWORD, lid1);
-      DECL_NEW_REG(FAMILY_DWORD, lid2);
-      DECL_NEW_REG(FAMILY_DWORD, groupid0);
-      DECL_NEW_REG(FAMILY_DWORD, groupid1);
-      DECL_NEW_REG(FAMILY_DWORD, groupid2);
-      DECL_NEW_REG(FAMILY_DWORD, numgroup0);
-      DECL_NEW_REG(FAMILY_DWORD, numgroup1);
-      DECL_NEW_REG(FAMILY_DWORD, numgroup2);
-      DECL_NEW_REG(FAMILY_DWORD, lsize0);
-      DECL_NEW_REG(FAMILY_DWORD, lsize1);
-      DECL_NEW_REG(FAMILY_DWORD, lsize2);
-      DECL_NEW_REG(FAMILY_DWORD, gsize0);
-      DECL_NEW_REG(FAMILY_DWORD, gsize1);
-      DECL_NEW_REG(FAMILY_DWORD, gsize2);
-      DECL_NEW_REG(FAMILY_DWORD, goffset0);
-      DECL_NEW_REG(FAMILY_DWORD, goffset1);
-      DECL_NEW_REG(FAMILY_DWORD, goffset2);
-      DECL_NEW_REG(FAMILY_DWORD, stackptr);
-      DECL_NEW_REG(FAMILY_WORD, blockip);
-      DECL_NEW_REG(FAMILY_DWORD, barrierid);
-      DECL_NEW_REG(FAMILY_DWORD, threadn);
-      DECL_NEW_REG(FAMILY_DWORD, workdim);
-      DECL_NEW_REG(FAMILY_WORD, emask);
-      DECL_NEW_REG(FAMILY_WORD, notemask);
-      DECL_NEW_REG(FAMILY_WORD, barriermask);
-      DECL_NEW_REG(FAMILY_WORD, retVal);
+      DECL_NEW_REG(FAMILY_DWORD, lid0, 0);
+      DECL_NEW_REG(FAMILY_DWORD, lid1, 0);
+      DECL_NEW_REG(FAMILY_DWORD, lid2, 0);
+      DECL_NEW_REG(FAMILY_DWORD, groupid0, 1);
+      DECL_NEW_REG(FAMILY_DWORD, groupid1, 1);
+      DECL_NEW_REG(FAMILY_DWORD, groupid2, 1);
+      DECL_NEW_REG(FAMILY_DWORD, numgroup0, 1);
+      DECL_NEW_REG(FAMILY_DWORD, numgroup1, 1);
+      DECL_NEW_REG(FAMILY_DWORD, numgroup2, 1);
+      DECL_NEW_REG(FAMILY_DWORD, lsize0, 1);
+      DECL_NEW_REG(FAMILY_DWORD, lsize1, 1);
+      DECL_NEW_REG(FAMILY_DWORD, lsize2, 1);
+      DECL_NEW_REG(FAMILY_DWORD, gsize0, 1);
+      DECL_NEW_REG(FAMILY_DWORD, gsize1, 1);
+      DECL_NEW_REG(FAMILY_DWORD, gsize2, 1);
+      DECL_NEW_REG(FAMILY_DWORD, goffset0, 1);
+      DECL_NEW_REG(FAMILY_DWORD, goffset1, 1);
+      DECL_NEW_REG(FAMILY_DWORD, goffset2, 1);
+      DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
+      DECL_NEW_REG(FAMILY_WORD,  blockip, 0);
+      DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
+      DECL_NEW_REG(FAMILY_DWORD, threadn, 1);
+      DECL_NEW_REG(FAMILY_DWORD, workdim, 1);
+      DECL_NEW_REG(FAMILY_WORD, emask, 1);
+      DECL_NEW_REG(FAMILY_WORD, notemask, 1);
+      DECL_NEW_REG(FAMILY_WORD, barriermask, 1);
+      DECL_NEW_REG(FAMILY_WORD, retVal, 1);
     }
 #undef DECL_NEW_REG
 
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index 4f36c2e..340ebc8 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -70,17 +70,22 @@ namespace ir {
   {
   public:
     /*! Build a register. All fields will be immutable */
-    INLINE RegisterData(RegisterFamily family = FAMILY_DWORD) : family(family) {}
+    INLINE RegisterData(RegisterFamily family,
+                        bool uniform = false) : family(family), uniform(uniform) {}
     /*! Copy constructor */
-    INLINE RegisterData(const RegisterData &other) : family(other.family) {}
+    INLINE RegisterData(const RegisterData &other) : family(other.family), uniform(other.uniform) {}
     /*! Copy operator */
     INLINE RegisterData &operator= (const RegisterData &other) {
       this->family = other.family;
+      this->uniform = other.uniform;
       return *this;
     }
     /*! Nothing really happens here */
     INLINE ~RegisterData(void) {}
     RegisterFamily family;            //!< Register size or if it is a flag
+    INLINE const bool isUniform() const { return uniform; }
+  private:
+    bool uniform;
     GBE_CLASS(RegisterData);
   };
 
@@ -107,11 +112,11 @@ namespace ir {
   {
   public:
     /*! Return the index of a newly allocated register */
-    INLINE Register append(RegisterFamily family) {
+    INLINE Register append(RegisterFamily family, bool uniform = false) {
       GBE_ASSERTM(regNum() < MAX_INDEX,
                   "Too many defined registers (only 65535 are supported)");
       const uint16_t index = regNum();
-      const RegisterData reg(family);
+      const RegisterData reg(family, uniform);
       regs.push_back(reg);
       return Register(index);
     }
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 467e240..5a2ba16 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -329,7 +329,7 @@ namespace gbe
       scalarMap[key] = reg;
     }
     /*! Allocate a new scalar register */
-    ir::Register newScalar(Value *value, Value *key = NULL, uint32_t index = 0u)
+    ir::Register newScalar(Value *value, Value *key = NULL, uint32_t index = 0u, bool uniform = false)
     {
       // we don't allow normal constant, but GlobalValue is a special case,
       // it needs a register to store its address
@@ -342,7 +342,7 @@ namespace gbe
         case Type::DoubleTyID:
         case Type::PointerTyID:
           GBE_ASSERT(index == 0);
-          return this->newScalar(value, key, type, index);
+          return this->_newScalar(value, key, type, index, uniform);
           break;
         case Type::VectorTyID:
         {
@@ -353,7 +353,7 @@ namespace gbe
               elementTypeID != Type::FloatTyID &&
               elementTypeID != Type::DoubleTyID)
             GBE_ASSERTM(false, "Vectors of elements are not supported");
-            return this->newScalar(value, key, elementType, index);
+            return this->_newScalar(value, key, elementType, index, uniform);
           break;
         }
         default: NOT_SUPPORTED;
@@ -411,9 +411,9 @@ namespace gbe
     /*! This creates a scalar register for a Value (index is the vector index when
      *  the value is a vector of scalars)
      */
-    ir::Register newScalar(Value *value, Value *key, Type *type, uint32_t index) {
+    ir::Register _newScalar(Value *value, Value *key, Type *type, uint32_t index, bool uniform) {
       const ir::RegisterFamily family = getFamily(ctx, type);
-      const ir::Register reg = ctx.reg(family);
+      const ir::Register reg = ctx.reg(family, uniform);
       key = key == NULL ? value : key;
       this->insertRegister(reg, key, index);
       return reg;
@@ -507,7 +507,7 @@ namespace gbe
     /*! Each block end may require to emit MOVs for further PHIs */
     void emitMovForPHI(BasicBlock *curr, BasicBlock *succ);
     /*! Alocate one or several registers (if vector) for the value */
-    INLINE void newRegister(Value *value, Value *key = NULL);
+    INLINE void newRegister(Value *value, Value *key = NULL, bool uniform = false);
     /*! get the register for a llvm::Constant */
     ir::Register getConstantRegister(Constant *c, uint32_t index = 0);
     /*! Return a valid register from an operand (can use LOADI to make one) */
@@ -867,7 +867,7 @@ namespace gbe
     return processConstant<ir::ImmediateIndex>(CPV, NewImmediateFunctor(ctx), index);
   }
 
-  void GenWriter::newRegister(Value *value, Value *key) {
+  void GenWriter::newRegister(Value *value, Value *key, bool uniform) {
     auto type = value->getType();
     auto typeID = type->getTypeID();
     switch (typeID) {
@@ -875,14 +875,14 @@ namespace gbe
       case Type::FloatTyID:
       case Type::DoubleTyID:
       case Type::PointerTyID:
-        regTranslator.newScalar(value, key);
+        regTranslator.newScalar(value, key, 0, uniform);
         break;
       case Type::VectorTyID:
       {
         auto vectorType = cast<VectorType>(type);
         const uint32_t elemNum = vectorType->getNumElements();
         for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
-          regTranslator.newScalar(value, key, elemID);
+          regTranslator.newScalar(value, key, elemID, uniform);
         break;
       }
       default: NOT_SUPPORTED;
@@ -1123,13 +1123,12 @@ namespace gbe
         const std::string &argName = I->getName().str();
         Type *type = I->getType();
 
-        //add support for vector argument
+        // function arguments are uniform values.
+        this->newRegister(I, NULL, true);
+        // add support for vector argument.
         if(type->isVectorTy()) {
           VectorType *vectorType = cast<VectorType>(type);
-
-          this->newRegister(I);
           ir::Register reg = getRegister(I, 0);
-
           Type *elemType = vectorType->getElementType();
           const uint32_t elemSize = getTypeByteSize(unit, elemType);
           const uint32_t elemNum = vectorType->getNumElements();
@@ -1147,7 +1146,7 @@ namespace gbe
 
         GBE_ASSERTM(isScalarType(type) == true,
                     "vector type in the function argument is not supported yet");
-        const ir::Register reg = regTranslator.newScalar(I);
+        const ir::Register reg = getRegister(I);
         if (type->isPointerTy() == false)
           ctx.input(argName, ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type), getAlignmentByte(unit, type));
         else {
-- 
1.8.3.2



More information about the Beignet mailing list