[Beignet] [PATCH 6/7] Support global constant arrays

Zhigang Gong zhigang.gong at linux.intel.com
Sat Apr 27 03:44:46 PDT 2013


On Thu, Apr 25, 2013 at 02:21:43PM +0800, Homer Hsing wrote:
> Support global constant arrays defined outside any kernel.
> 
> Example:
> 
> constant int h[] = {71,72,73,74,75,76,77};
> kernel void k(global int *dst) {
>   int i = get_global_id(0);
>   dst[i] = h[i % 7];
> }
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/context.cpp            | 12 ++++-
>  backend/src/backend/gen_reg_allocation.cpp |  1 +
>  backend/src/llvm/llvm_gen_backend.cpp      | 82 ++++++++++++++++++++++++++++++
>  src/cl_command_queue_gen7.c                |  8 +++
>  4 files changed, 102 insertions(+), 1 deletion(-)
> 
> diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
> index 91d8d8c..c636b48 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -419,6 +419,15 @@ namespace gbe
>        }
>      });
>  #undef INSERT_REG
> +    this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, 0, sizeof(int));
> +    specialRegs.insert(ir::ocl::constoffst);
> +
> +    // Insert serialized global constant arrays if used
> +    const ir::ConstantSet& constantSet = unit.getConstantSet();
> +    if (constantSet.getConstantNum()) {
> +      size_t size = constantSet.getDataSize();
> +      this->newCurbeEntry(GBE_CURBE_GLOBAL_CONSTANT_DATA, 0, size);
> +    }
>  
>      // Insert the number of threads
>      this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t));
> @@ -591,7 +600,8 @@ namespace gbe
>          reg == ir::ocl::gsize2    ||
>          reg == ir::ocl::goffset0  ||
>          reg == ir::ocl::goffset1  ||
> -        reg == ir::ocl::goffset2)
> +        reg == ir::ocl::goffset2  ||
> +        reg == ir::ocl::constoffst)
>        return true;
>      return false;
>    }
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index 10e4ab6..8c9f358 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -523,6 +523,7 @@ namespace gbe
>      allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, ocl::numgroup2);
>      allocatePayloadReg(GBE_CURBE_STACK_POINTER, ocl::stackptr);
>      allocatePayloadReg(GBE_CURBE_THREAD_NUM, ocl::threadn);
> +    allocatePayloadReg(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, ocl::constoffst);
>  
>      // Group and barrier IDs are always allocated by the hardware in r0
>      RA.insert(std::make_pair(ocl::groupid0,  1*sizeof(float))); // r0.1
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 42265ee..353dd93 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -455,6 +455,8 @@ namespace gbe
>  
>      virtual bool doInitialization(Module &M);
>  
> +    void collectGlobalConstant(void) const;
> +
>      bool runOnFunction(Function &F) {
>       // Do not codegen any 'available_externally' functions at all, they have
>       // definitions outside the translation unit.
> @@ -550,11 +552,60 @@ namespace gbe
>  
>    char GenWriter::ID = 0;
>  
> +  void GenWriter::collectGlobalConstant(void) const {
> +    const Module::GlobalListType &globalList = TheModule->getGlobalList();
> +    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
> +      const GlobalVariable &v = *i;
> +      const char *name = v.getName().data();
> +      unsigned addrSpace = v.getType()->getAddressSpace();
> +      if(addrSpace == ir::AddressSpace::MEM_CONSTANT) {
> +        GBE_ASSERT(v.hasInitializer());
> +        const Constant *c = v.getInitializer();
> +        GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
> +        const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
> +        GBE_ASSERT(cda);
> +        unsigned len = cda->getNumElements();
> +        uint64_t elementSize = cda->getElementByteSize();
> +        Type::TypeID typeID = cda->getElementType()->getTypeID();
> +        if(typeID == Type::TypeID::IntegerTyID)
> +          elementSize = sizeof(unsigned);
> +        void *mem = malloc(elementSize * len);
> +        for(unsigned j = 0; j < len; j ++) {
> +          switch(typeID) {
> +            case Type::TypeID::FloatTyID:
> +             {
> +              float f = cda->getElementAsFloat(j);
> +              memcpy((float *)mem + j, &f, elementSize);
> +             }
> +              break;
> +            case Type::TypeID::DoubleTyID:
> +             {
> +              double d = cda->getElementAsDouble(j);
> +              memcpy((double *)mem + j, &d, elementSize);
> +             }
> +              break;
> +            case Type::TypeID::IntegerTyID:
> +             {
> +              unsigned u = (unsigned) cda->getElementAsInteger(j);
> +              memcpy((unsigned *)mem + j, &u, elementSize);
> +             }
> +              break;
> +            default:
> +              NOT_IMPLEMENTED;
> +          }
> +        }
> +        unit.newConstant((char *)mem, name, elementSize * len, sizeof(unsigned));
> +        free(mem);
> +      }
> +    }
> +  }
> +
>    bool GenWriter::doInitialization(Module &M) {
>      FunctionPass::doInitialization(M);
>  
>      // Initialize
>      TheModule = &M;
> +    collectGlobalConstant();
>      return false;
>    }
>  
> @@ -704,6 +755,17 @@ namespace gbe
>    }
>  
>    ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
> +    if (dyn_cast<ConstantExpr>(value)) {
> +      ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
> +      if(ce->isCast()) {
> +        GBE_ASSERT(ce->getOpcode() == Instruction::PtrToInt);
> +        const Value *pointer = ce->getOperand(0);
> +        GBE_ASSERT(pointer->hasName());
> +        auto name = pointer->getName().str();
> +        uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
> +        return ir::Register(reg);
> +      }
> +    }
>      Constant *CPV = dyn_cast<Constant>(value);
>      if (CPV) {
>        GBE_ASSERT(isa<GlobalValue>(CPV) == false);
> @@ -1075,6 +1137,26 @@ namespace gbe
>      this->regTranslator.clear();
>      this->labelMap.clear();
>      this->emitFunctionPrototype(F);
> +    
> +    // Allocate a virtual register for each global constant array
> +    const Module::GlobalListType &globalList = TheModule->getGlobalList();
> +    size_t j = 0;
> +    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
> +      const GlobalVariable &v = *i;
> +      unsigned addrSpace = v.getType()->getAddressSpace();
> +      if(addrSpace != ir::AddressSpace::MEM_CONSTANT)
> +        continue;
> +      GBE_ASSERT(v.hasInitializer());
> +      const Constant *c = v.getInitializer();
> +      GBE_ASSERT(c->getType()->getTypeID() == Type::ArrayTyID);
> +      const ConstantDataArray *cda = dyn_cast<ConstantDataArray>(c);
> +      GBE_ASSERT(cda);
> +      ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
> +      ir::Constant &con = unit.getConstantSet().getConstant(j ++);
> +      con.setReg(reg.value());
> +      ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
> +      ctx.ADD(ir::TYPE_S32, reg, ir::ocl::constoffst, reg);
Only one comment here, if the con.getOffset() is zero, then we can save one instruction.
And furthermore, if the offset is zero, can we just set the reg as ir::ocl::constoffst's proxy register,
then we can save two instructions here? I'm not very sure of this. But at least, you can save at least
one instruction if the offset is zero. For all the rest of the patch, LGTM. Thanks for the patch.

> +    }
>  
>      // Visit all the instructions and emit the IR registers or the value to
>      // value mapping when a new register is not needed
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index 9402549..108684f 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -120,6 +120,7 @@ cl_curbe_fill(cl_kernel ker,
>    UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]);
>    UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]);
>    UPLOAD(GBE_CURBE_THREAD_NUM, thread_n);
> +  UPLOAD(GBE_CURBE_GLOBAL_CONSTANT_OFFSET, gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0) + 32);
>  #undef UPLOAD
>  
>    /* Write identity for the stack pointer. This is required by the stack pointer
> @@ -132,6 +133,13 @@ cl_curbe_fill(cl_kernel ker,
>      for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
>    }
>  
> +  /* Write global constant arrays */
> +  if ((offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_GLOBAL_CONSTANT_DATA, 0)) >= 0) {
> +    /* Write the global constant arrays */
> +    gbe_program prog = ker->program->opaque;
> +    gbe_program_get_global_constant_data(prog, ker->curbe + offset);
> +  }
> +
>    /* Handle the various offsets to SLM */
>    const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
>    int32_t arg, slm_offset = 0;
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list