[Beignet] [PATCH 6/7] Support global constant arrays

Homer Hsing homer.xing at intel.com
Wed May 1 18:00:31 PDT 2013


Version 3.
Support global constant arrays defined outside any kernel.

Example:

constant int h[] = {71,72,73,74,75,76,77};
kernel void k(global int *dst) {
  int i = get_global_id(0);
  dst[i] = h[i % 7];
}

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/context.cpp            | 12 ++++-
 backend/src/backend/gen_reg_allocation.cpp |  1 +
 backend/src/llvm/llvm_gen_backend.cpp      | 86 ++++++++++++++++++++++++++++++
 src/cl_command_queue_gen7.c                |  8 +++
 4 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 91d8d8c..c636b48 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -419,6 +419,15 @@ namespace gbe
       }
     });
 #undef INSERT_REG
+    this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, 0, sizeof(int));
+    specialRegs.insert(ir::ocl::constoffst);
+
+    // Insert serialized global constant arrays if used
+    const ir::ConstantSet& constantSet = unit.getConstantSet();
+    if (constantSet.getConstantNum()) {
+      size_t size = constantSet.getDataSize();
+      this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_DATA, 0, size);
+    }
 
     // Insert the number of threads
     this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
@@ -591,7 +600,8 @@ namespace gbe
         reg == ir::ocl::gsize2    ||
         reg == ir::ocl::goffset0  ||
         reg == ir::ocl::goffset1  ||
-        reg == ir::ocl::goffset2)
+        reg == ir::ocl::goffset2  ||
+        reg == ir::ocl::constoffst)
       return true;
     return false;
   }
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 10e4ab6..8c9f358 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -523,6 +523,7 @@ namespace gbe
     allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2);
     allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr);
     allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn);
+    allocatePayloadReg(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, ocl::constoffst);
 
     // Group and barrier IDs are always allocated by the hardware in r0
     RA.insert(std::make_pair(ocl::groupid0,  1*sizeof(float))); // r0.1
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 42265ee..6acf60f 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -455,6 +455,8 @@ namespace gbe
 
     virtual bool doInitialization(Module &M);
 
+    void collectGlobalConstant(void) const;
+
     bool runOnFunction(Function &F) {
      // Do not codegen any 'available_externally' functions at all, they have
      // definitions outside the translation unit.
@@ -550,11 +552,60 @@ namespace gbe
 
   char GenWriter::ID = 0;
 
+  void GenWriter::collectGlobalConstant(void) const {
+    const Module::GlobalListType &globalList = TheModule->getGlobalList();
+    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+      const GlobalVariable &v = *i;
+      const char *name = v.getName().data();
+      unsigned addrSpace = v.getType()->getAddressSpace();
+      if(addrSpace == ir::AddressSpace::MEM_CONSTANT) {
+        GBE_ASSERT(v.hasInitializer());
+        const Constant *c = v.getInitializer();
+        GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
+        const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
+        GBE_ASSERT(cda);
+        unsigned len = cda->getNumElements();
+        uint64_t elementSize = cda->getElementByteSize();
+        Type::TypeID typeID = cda->getElementType()->getTypeID();
+        if(typeID == Type::TypeID::IntegerTyID)
+          elementSize = sizeof(unsigned);
+        void *mem = malloc(elementSize * len);
+        for(unsigned j = 0; j < len; j ++) {
+          switch(typeID) {
+            case Type::TypeID::FloatTyID:
+             {
+              float f = cda->getElementAsFloat(j);
+              memcpy((float *)mem + j, &f, elementSize);
+             }
+              break;
+            case Type::TypeID::DoubleTyID:
+             {
+              double d = cda->getElementAsDouble(j);
+              memcpy((double *)mem + j, &d, elementSize);
+             }
+              break;
+            case Type::TypeID::IntegerTyID:
+             {
+              unsigned u = (unsigned) cda->getElementAsInteger(j);
+              memcpy((unsigned *)mem + j, &u, elementSize);
+             }
+              break;
+            default:
+              NOT_IMPLEMENTED;
+          }
+        }
+        unit.newConstant((char *)mem, name, elementSize * len, sizeof(unsigned));
+        free(mem);
+      }
+    }
+  }
+
   bool GenWriter::doInitialization(Module &M) {
     FunctionPass::doInitialization(M);
 
     // Initialize
     TheModule = &M;
+    collectGlobalConstant();
     return false;
   }
 
@@ -704,6 +755,17 @@ namespace gbe
   }
 
   ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
+    if (dyn_cast<ConstantExpr>(value)) {
+      ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
+      if(ce->isCast()) {
+        GBE_ASSERT(ce->getOpcode() == Instruction::PtrToInt);
+        const Value *pointer = ce->getOperand(0);
+        GBE_ASSERT(pointer->hasName());
+        auto name = pointer->getName().str();
+        uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
+        return ir::Register(reg);
+      }
+    }
     Constant *CPV = dyn_cast<Constant>(value);
     if (CPV) {
       GBE_ASSERT(isa<GlobalValue>(CPV) == false);
@@ -1075,6 +1137,30 @@ namespace gbe
     this->regTranslator.clear();
     this->labelMap.clear();
     this->emitFunctionPrototype(F);
+    
+    // Allocate a virtual register for each global constant array
+    const Module::GlobalListType &globalList = TheModule->getGlobalList();
+    size_t j = 0;
+    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+      const GlobalVariable &v = *i;
+      unsigned addrSpace = v.getType()->getAddressSpace();
+      if(addrSpace != ir::AddressSpace::MEM_CONSTANT)
+        continue;
+      GBE_ASSERT(v.hasInitializer());
+      const Constant *c = v.getInitializer();
+      GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
+      const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
+      GBE_ASSERT(cda);
+      ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+      ir::Constant &con = unit.getConstantSet().getConstant(j ++);
+      con.setReg(reg.value());
+      if(con.getOffset() != 0) {
+        ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
+        ctx.ADD(ir::TYPE_S32, reg, ir::ocl::constoffst, reg);
+      } else {
+        ctx.MOV(ir::TYPE_S32, reg, ir::ocl::constoffst);
+      }
+    }
 
     // Visit all the instructions and emit the IR registers or the value to
     // value mapping when a new register is not needed
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 9402549..108684f 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -120,6 +120,7 @@ cl_curbe_fill(cl_kernel ker,
   UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]);
   UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]);
   UPLOAD(GBE_CURBE_THREAD_NUM, thread_n);
+  UPLOAD(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0) + 32);
 #undef UPLOAD
 
   /* Write identity for the stack pointer. This is required by the stack pointer
@@ -132,6 +133,13 @@ cl_curbe_fill(cl_kernel ker,
     for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
   }
 
+  /* Write global constant arrays */
+  if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0)) >= 0) {
+    /* Write the global constant arrays */
+    gbe_program prog = ker->program->opaque;
+    gbe_program_get_global_constant_data(prog, ker->curbe + offset);
+  }
+
   /* Handle the various offsets to SLM */
   const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
   int32_t arg, slm_offset = 0;
-- 
1.8.1.2



More information about the Beignet mailing list