[Beignet] [PATCH 1/2] GBE: Support local variable inside kernel function.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Oct 14 02:41:33 PDT 2013


Pushed, thanks.

On Thu, Oct 10, 2013 at 03:13:50PM +0800, Ruiling Song wrote:
> As Clang treat local variable in similar way like global constant,
> (they are treated as Global variable in each own address space)
> we refine the previous constant implementation in order to
> share same code between local variable and global constant.
> 
> We will allocate an address register for each GlobalVariable
> (constant or local) through calling newRegister().
> In later step, through getRegister() we will get a proper
> register derived from the allocated address register.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/context.cpp       |    1 +
>  backend/src/backend/program.cpp       |   10 ++-
>  backend/src/backend/program.h         |    3 +
>  backend/src/backend/program.hpp       |    3 +
>  backend/src/ir/function.cpp           |    2 +-
>  backend/src/ir/function.hpp           |    5 ++
>  backend/src/llvm/llvm_gen_backend.cpp |  157 ++++++++++++++++++++++-----------
>  kernels/compiler_local_slm.cl         |   28 ++++--
>  src/cl_command_queue_gen7.c           |    3 +-
>  utests/CMakeLists.txt                 |    1 +
>  utests/compiler_local_slm.cpp         |   30 ++++++-
>  11 files changed, 179 insertions(+), 64 deletions(-)
> 
> diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
> index cbd38f1..bc15761 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -632,6 +632,7 @@ namespace gbe
>    void Context::handleSLM(void) {
>      const bool useSLM = fn.getUseSLM();
>      kernel->useSLM = useSLM;
> +    kernel->slmSize = fn.getSLMSize();
>    }
>  
>    bool Context::isScalarReg(const ir::Register &reg) const {
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index 6550eac..24029c7 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -75,7 +75,7 @@
>  namespace gbe {
>  
>    Kernel::Kernel(const std::string &name) :
> -    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), ctx(NULL), samplerSet(NULL), imageSet(NULL)
> +    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), slmSize(0), ctx(NULL), samplerSet(NULL), imageSet(NULL)
>    {}
>    Kernel::~Kernel(void) {
>      if(ctx) GBE_DELETE(ctx);
> @@ -709,6 +709,12 @@ namespace gbe {
>      return kernel->getUseSLM() ? 1 : 0;
>    }
>  
> +  static int32_t kernelGetSLMSize(gbe_kernel genKernel) {
> +    if (genKernel == NULL) return 0;
> +    const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
> +    return kernel->getSLMSize();
> +  }
> +
>    static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) {
>      if (genKernel == NULL) return -1;
>      gbe::Kernel *kernel = (gbe::Kernel*) genKernel;
> @@ -776,6 +782,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_scratch_size_cb *gbe_kernel_get_scratch_size =
>  GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
> +GBE_EXPORT_SYMBOL gbe_kernel_get_slm_size_cb *gbe_kernel_get_slm_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_sampler_data_cb *gbe_kernel_get_sampler_data = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_image_size_cb *gbe_kernel_get_image_size = NULL;
> @@ -810,6 +817,7 @@ namespace gbe
>        gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
>        gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
>        gbe_kernel_use_slm = gbe::kernelUseSLM;
> +      gbe_kernel_get_slm_size = gbe::kernelGetSLMSize;
>        gbe_kernel_get_sampler_size = gbe::kernelGetSamplerSize;
>        gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData;
>        gbe_kernel_get_image_size = gbe::kernelGetImageSize;
> diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
> index 8774344..10fcc49 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -218,6 +218,9 @@ extern gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_
>  /*! Says if SLM is used. Required to reconfigure the L3 complex */
>  typedef int32_t (gbe_kernel_use_slm_cb)(gbe_kernel);
>  extern gbe_kernel_use_slm_cb *gbe_kernel_use_slm;
> +/*! Get slm size needed for kernel local variables */
> +typedef int32_t (gbe_kernel_get_slm_size_cb)(gbe_kernel);
> +extern gbe_kernel_get_slm_size_cb *gbe_kernel_get_slm_size;
>  
>  #ifdef __cplusplus
>  }
> diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
> index 28a792d..895cd01 100644
> --- a/backend/src/backend/program.hpp
> +++ b/backend/src/backend/program.hpp
> @@ -104,6 +104,8 @@ namespace gbe {
>      INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
>      /*! Says if SLM is needed for it */
>      INLINE bool getUseSLM(void) const { return this->useSLM; }
> +    /*! get slm size for kernel local variable */
> +    INLINE uint32_t getSLMSize(void) const { return this->slmSize; }
>      /*! set constant buffer size and return the cb curbe offset */
>      int32_t setConstBufSize(uint32_t argID, size_t sz) {
>        if(argID >= argNum) return -1;
> @@ -169,6 +171,7 @@ namespace gbe {
>      uint32_t stackSize;        //!< Stack size (may be 0 if unused)
>      uint32_t scratchSize;      //!< Scratch memory size (may be 0 if unused)
>      bool useSLM;               //!< SLM requires a special HW config
> +    uint32_t slmSize;          //!< slm size for kernel variable
>      Context *ctx;              //!< Save context after compiler to alloc constant buffer curbe
>      ir::SamplerSet *samplerSet;//!< Copy from the corresponding function.
>      ir::ImageSet *imageSet;    //!< Copy from the corresponding function.
> diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
> index 88aae08..c15c292 100644
> --- a/backend/src/ir/function.cpp
> +++ b/backend/src/ir/function.cpp
> @@ -43,7 +43,7 @@ namespace ir {
>    ///////////////////////////////////////////////////////////////////////////
>  
>    Function::Function(const std::string &name, const Unit &unit, Profile profile) :
> -    name(name), unit(unit), profile(profile), simdWidth(0), useSLM(false)
> +    name(name), unit(unit), profile(profile), simdWidth(0), useSLM(false), slmSize(0)
>    {
>      initProfile(*this);
>      samplerSet = GBE_NEW(SamplerSet);
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
> index 6e712cd..3d4733d 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -301,6 +301,10 @@ namespace ir {
>      INLINE bool getUseSLM(void) const { return this->useSLM; }
>      /*! Change the SLM config for the function */
>      INLINE bool setUseSLM(bool useSLM) { return this->useSLM = useSLM; }
> +    /*! get SLM size needed for local variable inside kernel function */
> +    INLINE uint32_t getSLMSize(void) const { return this->slmSize; }
> +    /*! set slm size needed for local variable inside kernel function */
> +    INLINE void setSLMSize(uint32_t size) { this->slmSize = size; }
>      /*! Get sampler set in this function */
>      SamplerSet* getSamplerSet(void) const {return samplerSet; }
>      /*! Get image set in this function */
> @@ -320,6 +324,7 @@ namespace ir {
>      LocationMap locationMap;        //!< Pushed function arguments (loc->reg)
>      uint32_t simdWidth;             //!< 8 or 16 if forced, 0 otherwise
>      bool useSLM;                    //!< Is SLM required?
> +    uint32_t slmSize;               //!< local variable size inside kernel function
>      SamplerSet *samplerSet;          //!< samplers used in this function.
>      ImageSet* imageSet;              //!< Image set in this function's arguments..
>      GBE_CLASS(Function);            //!< Use custom allocator
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 5b6857d..7af5bb8 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -321,7 +321,9 @@ namespace gbe
>      /*! Allocate a new scalar register */
>      ir::Register newScalar(Value *value, Value *key = NULL, uint32_t index = 0u)
>      {
> -      GBE_ASSERT(dyn_cast<Constant>(value) == NULL);
> +      // we don't allow normal constant, but GlobalValue is a special case,
> +      // it needs a register to store its address
> +      GBE_ASSERT(! (isa<Constant>(value) && !isa<GlobalValue>(value)));
>        Type *type = value->getType();
>        auto typeID = type->getTypeID();
>        switch (typeID) {
> @@ -477,7 +479,8 @@ namespace gbe
>      }
>  
>      virtual bool doFinalization(Module &M) { return false; }
> -
> +    /*! handle global variable register allocation (local, constant space) */
> +    void allocateGlobalVariableRegister(Function &F);
>      /*! Emit the complete function code and declaration */
>      void emitFunction(Function &F);
>      /*! Handle input and output function parameters */
> @@ -488,6 +491,8 @@ namespace gbe
>      void emitMovForPHI(BasicBlock *curr, BasicBlock *succ);
>      /*! Alocate one or several registers (if vector) for the value */
>      INLINE void newRegister(Value *value, Value *key = NULL);
> +    /*! get the register for a llvm::Constant */
> +    ir::Register getConstantRegister(Constant *c, uint32_t index = 0);
>      /*! Return a valid register from an operand (can use LOADI to make one) */
>      INLINE ir::Register getRegister(Value *value, uint32_t index = 0);
>      /*! Create a new immediate from a constant */
> @@ -838,40 +843,46 @@ namespace gbe
>      };
>    }
>  
> -  ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
> -    //the real value may be constant, so get real value before constant check
> -    regTranslator.getRealValue(value, elemID);
> +  ir::Register GenWriter::getConstantRegister(Constant *c, uint32_t elemID) {
> +    GBE_ASSERT(c != NULL);
>  
> -    if (dyn_cast<ConstantExpr>(value)) {
> -      ConstantExpr *ce = dyn_cast<ConstantExpr>(value);
> -      if(ce->isCast()) {
> -        GBE_ASSERT(ce->getOpcode() == Instruction::PtrToInt);
> -        const Value *pointer = ce->getOperand(0);
> -        GBE_ASSERT(pointer->hasName());
> -        auto name = pointer->getName().str();
> -        uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
> -        return ir::Register(reg);
> -      }
> +    if(isa<GlobalValue>(c)) {
> +      return regTranslator.getScalar(c, elemID);
>      }
> -    Constant *CPV = dyn_cast<Constant>(value);
> -    if (CPV) {
> -      if (isa<GlobalValue>(CPV)) {
> -        auto name = CPV->getName().str();
> -        uint16_t reg = unit.getConstantSet().getConstant(name).getReg();
> -        return ir::Register(reg);
> -      }
> -      if (isa<ConstantExpr>(CPV)) {
> +
> +    if(isa<ConstantExpr>(c)) {
> +      ConstantExpr * ce = dyn_cast<ConstantExpr>(c);
> +
> +      if(ce->isCast()) {
> +        Value* op = ce->getOperand(0);
> +        ir::Register pointer_reg;
> +        if(isa<ConstantExpr>(op)) {
> +          // try to get the real pointer register, for case like:
> +          // store i64 ptrtoint (i8 addrspace(3)* getelementptr inbounds ...
> +          // in which ptrtoint and getelementptr are ConstantExpr.
> +          pointer_reg = getConstantRegister(dyn_cast<Constant>(op), elemID);
> +        } else {
> +          pointer_reg = regTranslator.getScalar(op, elemID);
> +        }
> +        // if ptrToInt request another type other than 32bit, convert as requested
> +        ir::Type dstType = getType(ctx, ce->getType());
> +        if(ce->getOpcode() == Instruction::PtrToInt && ir::TYPE_S32 != dstType) {
> +          ir::Register tmp = ctx.reg(getFamily(dstType));
> +          ctx.CVT(dstType, ir::TYPE_S32, tmp, pointer_reg);
> +          return tmp;
> +        }
> +        return pointer_reg;
> +      } else {
>          uint32_t TypeIndex;
>          uint32_t constantOffset = 0;
>          uint32_t offset = 0;
> -        ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV);
>  
>          // currently only GetElementPtr is handled
> -        GBE_ASSERT(CE->getOpcode() == Instruction::GetElementPtr);
> -        Value *pointer = CE->getOperand(0);
> +        GBE_ASSERT(ce->getOpcode() == Instruction::GetElementPtr);
> +        Value *pointer = ce->getOperand(0);
>          CompositeType* CompTy = cast<CompositeType>(pointer->getType());
> -        for(uint32_t op=1; op<CE->getNumOperands(); ++op) {
> -          ConstantInt* ConstOP = dyn_cast<ConstantInt>(CE->getOperand(op));
> +        for(uint32_t op=1; op<ce->getNumOperands(); ++op) {
> +          ConstantInt* ConstOP = dyn_cast<ConstantInt>(ce->getOperand(op));
>            GBE_ASSERT(ConstOP);
>            TypeIndex = ConstOP->getZExtValue();
>            for(uint32_t ty_i=0; ty_i<TypeIndex; ty_i++)
> @@ -889,21 +900,30 @@ namespace gbe
>            CompTy = dyn_cast<CompositeType>(CompTy->getTypeAtIndex(TypeIndex));
>          }
>  
> -        const std::string &pointer_name = pointer->getName().str();
> -        ir::Register pointer_reg = ir::Register(unit.getConstantSet().getConstant(pointer_name).getReg());
> +        ir::Register pointer_reg;
> +        pointer_reg = regTranslator.getScalar(pointer, elemID);
>          ir::Register offset_reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
>          ctx.LOADI(ir::Type::TYPE_S32, offset_reg, ctx.newIntegerImmediate(constantOffset, ir::Type::TYPE_S32));
>          ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
>          ctx.ADD(ir::Type::TYPE_S32, reg, pointer_reg, offset_reg);
>          return reg;
>        }
> -      const ir::ImmediateIndex immIndex = this->newImmediate(CPV, elemID);
> -      const ir::Immediate imm = ctx.getImmediate(immIndex);
> -      const ir::Register reg = ctx.reg(getFamily(imm.type));
> -      ctx.LOADI(imm.type, reg, immIndex);
> -      return reg;
>      }
> -    else
> +
> +    const ir::ImmediateIndex immIndex = this->newImmediate(c, elemID);
> +    const ir::Immediate imm = ctx.getImmediate(immIndex);
> +    const ir::Register reg = ctx.reg(getFamily(imm.type));
> +    ctx.LOADI(imm.type, reg, immIndex);
> +    return reg;
> +  }
> +
> +  ir::Register GenWriter::getRegister(Value *value, uint32_t elemID) {
> +    //the real value may be constant, so get real value before constant check
> +    regTranslator.getRealValue(value, elemID);
> +    if(isa<Constant>(value)) {
> +      Constant *c = dyn_cast<Constant>(value);
> +      return getConstantRegister(c, elemID);
> +    } else
>        return regTranslator.getScalar(value, elemID);
>    }
>  
> @@ -1273,6 +1293,55 @@ namespace gbe
>    BVAR(OCL_OPTIMIZE_PHI_MOVES, true);
>    BVAR(OCL_OPTIMIZE_LOADI, true);
>  
> +  void GenWriter::allocateGlobalVariableRegister(Function &F)
> +  {
> +    // Allocate a address register for each global variable
> +    const Module::GlobalListType &globalList = TheModule->getGlobalList();
> +    size_t j = 0;
> +    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
> +      const GlobalVariable &v = *i;
> +      if(!v.isConstantUsed()) continue;
> +
> +      ir::AddressSpace addrSpace = addressSpaceLLVMToGen(v.getType()->getAddressSpace());
> +      if(addrSpace == ir::MEM_LOCAL) {
> +        ir::Function &f = ctx.getFunction();
> +        f.setUseSLM(true);
> +        const Constant *c = v.getInitializer();
> +        Type *ty = c->getType();
> +        uint32_t oldSlm = f.getSLMSize();
> +        uint32_t align = 8 * getAlignmentByte(unit, ty);
> +        uint32_t padding = getPadding(oldSlm*8, align);
> +
> +        f.setSLMSize(oldSlm + padding/8 + getTypeByteSize(unit, ty));
> +        const Value * parent = cast<Value>(&v);
> +        // local variable can only be used in one kernel function. so, don't need to check its all uses.
> +        // loop through the Constant to find the instruction that use the global variable
> +        do {
> +          Value::const_use_iterator it = parent->use_begin();
> +          parent = cast<Value>(*it);
> +        } while(isa<Constant>(parent));
> +
> +        const Instruction * insn = cast<Instruction>(parent);
> +        const BasicBlock * bb = insn->getParent();
> +        const Function * func = bb->getParent();
> +        if(func != &F) continue;
> +
> +        this->newRegister(const_cast<GlobalVariable*>(&v));
> +        ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
> +        ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(oldSlm + padding/8, ir::TYPE_S32));
> +      } else if(addrSpace == ir::MEM_CONSTANT) {
> +        GBE_ASSERT(v.hasInitializer());
> +        this->newRegister(const_cast<GlobalVariable*>(&v));
> +        ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
> +        ir::Constant &con = unit.getConstantSet().getConstant(j ++);
> +        ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
> +      } else {
> +        GBE_ASSERT(0);
> +      }
> +    }
> +
> +  }
> +
>    void GenWriter::emitFunction(Function &F)
>    {
>      switch (F.getCallingConv()) {
> @@ -1293,21 +1362,7 @@ namespace gbe
>      this->labelMap.clear();
>      this->emitFunctionPrototype(F);
>  
> -    // Allocate a virtual register for each global constant array
> -    const Module::GlobalListType &globalList = TheModule->getGlobalList();
> -    size_t j = 0;
> -    for(auto i = globalList.begin(); i != globalList.end(); i ++) {
> -      const GlobalVariable &v = *i;
> -      unsigned addrSpace = v.getType()->getAddressSpace();
> -      if(addrSpace != ir::AddressSpace::MEM_CONSTANT)
> -        continue;
> -      GBE_ASSERT(v.hasInitializer());
> -      ir::Register reg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
> -      ir::Constant &con = unit.getConstantSet().getConstant(j ++);
> -      con.setReg(reg.value());
> -      ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
> -    }
> -
> +    this->allocateGlobalVariableRegister(F);
>      // Visit all the instructions and emit the IR registers or the value to
>      // value mapping when a new register is not needed
>      pass = PASS_EMIT_REGISTERS;
> diff --git a/kernels/compiler_local_slm.cl b/kernels/compiler_local_slm.cl
> index 1a4b175..52c078c 100644
> --- a/kernels/compiler_local_slm.cl
> +++ b/kernels/compiler_local_slm.cl
> @@ -1,10 +1,24 @@
> -#if 0
> -__kernel void compiler_local_slm(__global int *dst, __local int *hop) {
> -#else
> +struct Test{
> +  char t0;
> +  int t1;
> +};
> +
> +constant int two= 2;
> +
>  __kernel void compiler_local_slm(__global int *dst) {
> -  __local int hop[10];
> -#endif
> -  hop[get_global_id(0)] = get_local_id(1);
> -  dst[get_global_id(0)] = hop[get_local_id(0)];
> +  __local int hop[16];
> +  __local char a;
> +  __local struct Test c;
> +
> +  c.t1 = get_group_id(0);
> +  a = two;// seems clang currently has a bug if I write 'a=2;' so currently workaroud it.
> +  hop[get_local_id(0)] = get_local_id(0);
> +  barrier(CLK_LOCAL_MEM_FENCE);
> +  dst[get_global_id(0)] = hop[get_local_id(0)] + (int)a + hop[1] + c.t1;
>  }
>  
> +__kernel void compiler_local_slm1(__global ulong *dst) {
> +  __local int hop[16];
> +  dst[1] = (ulong)&hop[1];
> +  dst[0] = (ulong)&hop[0];
> +}
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index b85c0cd..be7bcef 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -200,7 +200,8 @@ cl_curbe_fill(cl_kernel ker,
>    }
>    /* Handle the various offsets to SLM */
>    const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
> -  int32_t arg, slm_offset = 0;
> +  /* align so that we kernel argument get good alignment */
> +  int32_t arg, slm_offset = ALIGN(gbe_kernel_get_slm_size(ker->opaque), 32);
>    for (arg = 0; arg < arg_n; ++arg) {
>      const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
>      if (type != GBE_ARG_LOCAL_PTR)
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index a24c490..daa4d6f 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -96,6 +96,7 @@ set (utests_sources
>    compiler_local_memory_barrier.cpp
>    compiler_local_memory_barrier_wg64.cpp
>    compiler_local_memory_barrier_2.cpp
> +  compiler_local_slm.cpp
>    compiler_movforphi_undef.cpp
>    compiler_volatile.cpp
>    compiler_copy_image1.cpp
> diff --git a/utests/compiler_local_slm.cpp b/utests/compiler_local_slm.cpp
> index aa9a2fe..48a072f 100644
> --- a/utests/compiler_local_slm.cpp
> +++ b/utests/compiler_local_slm.cpp
> @@ -2,9 +2,33 @@
>  
>  void compiler_local_slm(void)
>  {
> -  // Setup kernel and buffers
> -  OCL_CREATE_KERNEL("compiler_local_slm");
> +  const size_t n = 32;
> +  OCL_CREATE_KERNEL_FROM_FILE("compiler_local_slm", "compiler_local_slm");
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  globals[0] = n;
> +  locals[0] = 16;
> +  OCL_NDRANGE(1);
> +  OCL_MAP_BUFFER(0);
> +  for (uint32_t i = 0; i < n; ++i)
> +//    std::cout << ((int32_t*)buf_data[0])[i] << std::endl;
> +    OCL_ASSERT(((int32_t*)buf_data[0])[i] == (i%16 + 2 + 1+ i/16));
> +  OCL_UNMAP_BUFFER(0);
>  }
>  
> +void compiler_local_slm1(void)
> +{
> +  const size_t n = 2;
> +  OCL_CREATE_KERNEL_FROM_FILE("compiler_local_slm", "compiler_local_slm1");
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL);
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  globals[0] = 1;
> +  locals[0] = 1;
> +  OCL_NDRANGE(1);
> +  OCL_MAP_BUFFER(0);
> +  uint64_t * ptr = (uint64_t*)buf_data[0];
> +  OCL_ASSERT((ptr[1] -ptr[0])  == 4);
> +  OCL_UNMAP_BUFFER(0);
> +}
>  MAKE_UTEST_FROM_FUNCTION(compiler_local_slm);
> -
> +MAKE_UTEST_FROM_FUNCTION(compiler_local_slm1);
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list