[Beignet] [PATCH 6/7] Support global constant arrays
Homer Hsing
homer.xing at intel.com
Wed May 1 18:00:31 PDT 2013
Version 3.
Support global constant arrays defined outside any kernel.
Example:
constant int h[] = {71,72,73,74,75,76,77};
kernel void k(global int *dst) {
int i = get_global_id(0);
dst[i] = h[i % 7];
}
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/context.cpp | 12 ++++-
backend/src/backend/gen_reg_allocation.cpp | 1 +
backend/src/llvm/llvm_gen_backend.cpp | 86 ++++++++++++++++++++++++++++++
src/cl_command_queue_gen7.c | 8 +++
4 files changed, 106 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 91d8d8c..c636b48 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -419,6 +419,15 @@ namespace gbe
}
});
#undef INSERT_REG
+ this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, 0, sizeof(int));
+ specialRegs.insert(ir::ocl::constoffst);
+
+ // Insert serialized global constant arrays if used
+ const ir::ConstantSet& constantSet = unit.getConstantSet();
+ if (constantSet.getConstantNum()) {
+ size_t size = constantSet.getDataSize();
+ this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_DATA, 0, size);
+ }
// Insert the number of threads
this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
@@ -591,7 +600,8 @@ namespace gbe
reg == ir::ocl::gsize2 ||
reg == ir::ocl::goffset0 ||
reg == ir::ocl::goffset1 ||
- reg == ir::ocl::goffset2)
+ reg == ir::ocl::goffset2 ||
+ reg == ir::ocl::constoffst)
return true;
return false;
}
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 10e4ab6..8c9f358 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -523,6 +523,7 @@ namespace gbe
allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2);
allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr);
allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn);
+ allocatePayloadReg(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, ocl::constoffst);
// Group and barrier IDs are always allocated by the hardware in r0
RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 42265ee..6acf60f 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -455,6 +455,8 @@ namespace gbe
virtual bool doInitialization(Module &M);
+ void collectGlobalConstant(void) const;
+
bool runOnFunction(Function &F) {
// Do not codegen any 'available_externally' functions at all, they have
// definitions outside the translation unit.
@@ -550,11 +552,60 @@ namespace gbe
char GenWriter::ID = 0;
+ void GenWriter::collectGlobalConstant(void) const {
+ const Module::GlobalListType &globalList = TheModule->getGlobalList();
+ for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+ const GlobalVariable &v = *i;
+ const char *name = v.getName().data();
+ unsigned addrSpace = v.getType()->getAddressSpace();
+ if(addrSpace == ir::AddressSpace::MEM_CONSTANT) {
+ GBE_ASSERT(v.hasInitializer());
+ const Constant *c = v.getInitializer();
+ GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
+ const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
+ GBE_ASSERT(cda);
+ unsigned len = cda->getNumElements();
+ uint64_t elementSize = cda->getElementByteSize();
+ Type::TypeID typeID = cda->getElementType()->getTypeID();
+ if(typeID == Type::TypeID::IntegerTyID)
+ elementSize = sizeof(unsigned);
+ void *mem = malloc(elementSize * len);
+ for(unsigned j = 0; j < len; j ++) {
+ switch(typeID) {
+ case Type::TypeID::FloatTyID:
+ {
+ float f = cda->getElementAsFloat(j);
+ memcpy((float *)mem + j, &f, elementSize);
+ }
+ break;
+ case Type::TypeID::DoubleTyID:
+ {
+ double d = cda->getElementAsDouble(j);
+ memcpy((double *)mem + j, &d, elementSize);
+ }
+ break;
+ case Type::TypeID::IntegerTyID:
+ {
+ unsigned u = (unsigned) cda->getElementAsInteger(j);
+ memcpy((unsigned *)mem + j, &u, elementSize);
+ }
+ break;
+ default:
+ NOT_IMPLEMENTED;
+ }
+ }
+ unit.newConstant((char *)mem, name, elementSize * len, sizeof(unsigned));
+ free(mem);
+ }
+ }
+ }
+
bool GenWriter::doInitialization(Module &M) {
FunctionPass::doInitialization(M);
// Initialize
TheModule = &M;
+ collectGlobalConstant();
return false;
}
@@ -704,6 +755,17 @@ namespace gbe
}
ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
+ if (dyn_cast<ConstantExpr>(value)) {
+ ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
+ if(ce->isCast()) {
+ GBE_ASSERT(ce->getOpcode() == Instruction::PtrToInt);
+ const Value *pointer = ce->getOperand(0);
+ GBE_ASSERT(pointer->hasName());
+ auto name = pointer->getName().str();
+ uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
+ return ir::Register(reg);
+ }
+ }
Constant *CPV = dyn_cast<Constant>(value);
if (CPV) {
GBE_ASSERT(isa<GlobalValue>(CPV) == false);
@@ -1075,6 +1137,30 @@ namespace gbe
this->regTranslator.clear();
this->labelMap.clear();
this->emitFunctionPrototype(F);
+
+ // Allocate a virtual register for each global constant array
+ const Module::GlobalListType &globalList = TheModule->getGlobalList();
+ size_t j = 0;
+ for(auto i = globalList.begin(); i != globalList.end(); i ++) {
+ const GlobalVariable &v = *i;
+ unsigned addrSpace = v.getType()->getAddressSpace();
+ if(addrSpace != ir::AddressSpace::MEM_CONSTANT)
+ continue;
+ GBE_ASSERT(v.hasInitializer());
+ const Constant *c = v.getInitializer();
+ GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
+ const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
+ GBE_ASSERT(cda);
+ ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ir::Constant &con = unit.getConstantSet().getConstant(j ++);
+ con.setReg(reg.value());
+ if(con.getOffset() != 0) {
+ ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
+ ctx.ADD(ir::TYPE_S32, reg, ir::ocl::constoffst, reg);
+ } else {
+ ctx.MOV(ir::TYPE_S32, reg, ir::ocl::constoffst);
+ }
+ }
// Visit all the instructions and emit the IR registers or the value to
// value mapping when a new register is not needed
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 9402549..108684f 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -120,6 +120,7 @@ cl_curbe_fill(cl_kernel ker,
UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]);
UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]);
UPLOAD(GBE_CURBE_THREAD_NUM, thread_n);
+ UPLOAD(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0) + 32);
#undef UPLOAD
/* Write identity for the stack pointer. This is required by the stack pointer
@@ -132,6 +133,13 @@ cl_curbe_fill(cl_kernel ker,
for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
}
+ /* Write global constant arrays */
+ if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0)) >= 0) {
+ /* Write the global constant arrays */
+ gbe_program prog = ker->program->opaque;
+ gbe_program_get_global_constant_data(prog, ker->curbe + offset);
+ }
+
/* Handle the various offsets to SLM */
const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
int32_t arg, slm_offset = 0;
--
1.8.1.2
More information about the Beignet
mailing list