[Beignet] [PATCH 2/3] GBE: Fix alignment according to OCL spec

Ruiling Song ruiling.song at intel.com
Thu Nov 7 22:20:15 PST 2013


The patch simply store a 'align' for each kernel argument.
Then the runtime could align the kernel argument address to 'align'.
This patch works for constant and local address space.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/context.cpp       |    2 ++
 backend/src/backend/program.cpp       |   10 ++++++++
 backend/src/backend/program.h         |    4 +++
 backend/src/backend/program.hpp       |    4 +++
 backend/src/ir/context.cpp            |    4 +--
 backend/src/ir/context.hpp            |    2 +-
 backend/src/ir/function.hpp           |    5 ++--
 backend/src/llvm/llvm_gen_backend.cpp |   20 ++++++++-------
 src/cl_command_queue_gen7.c           |   43 +++++++++++++++++++++------------
 9 files changed, 65 insertions(+), 29 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index cbeef24..2609e7a 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -534,6 +534,8 @@ namespace gbe
       kernel->args = NULL;
     for (uint32_t argID = 0; argID < kernel->argNum; ++argID) {
       const auto &arg = fn.getArg(argID);
+
+      kernel->args[argID].align = arg.align;
       switch (arg.type) {
         case ir::FunctionArgument::VALUE:
         case ir::FunctionArgument::STRUCTURE:
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 25be484..e692186 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -229,6 +229,7 @@ namespace gbe {
       KernelArgument& arg = args[i];
       OUT_UPDATE_SZ(arg.type);
       OUT_UPDATE_SZ(arg.size);
+      OUT_UPDATE_SZ(arg.align);
       OUT_UPDATE_SZ(arg.bufSize);
     }
 
@@ -315,6 +316,7 @@ namespace gbe {
       KernelArgument& arg = args[i];
       IN_UPDATE_SZ(arg.type);
       IN_UPDATE_SZ(arg.size);
+      IN_UPDATE_SZ(arg.align);
       IN_UPDATE_SZ(arg.bufSize);
     }
 
@@ -422,6 +424,7 @@ namespace gbe {
       outs << spaces_nl << "  Arg " << i << ":\n";
       outs << spaces_nl << "      type value: "<< arg.type << "\n";
       outs << spaces_nl << "      size: "<< arg.size << "\n";
+      outs << spaces_nl << "      align: "<< arg.align << "\n";
       outs << spaces_nl << "      bufSize: "<< arg.bufSize << "\n";
     }
 
@@ -696,6 +699,11 @@ namespace gbe {
     return kernel->getArgSize(argID);
   }
 
+  static uint32_t kernelGetArgAlign(gbe_kernel genKernel, uint32_t argID) {
+    if (genKernel == NULL) return 0u;
+    const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+    return kernel->getArgAlign(argID);
+  }
   static gbe_arg_type kernelGetArgType(gbe_kernel genKernel, uint32_t argID) {
     if (genKernel == NULL) return GBE_ARG_INVALID;
     const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
@@ -803,6 +811,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_code_size_cb *gbe_kernel_get_code_size = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_arg_align_cb *gbe_kernel_get_arg_align = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
@@ -838,6 +847,7 @@ namespace gbe
       gbe_kernel_get_arg_num = gbe::kernelGetArgNum;
       gbe_kernel_get_arg_size = gbe::kernelGetArgSize;
       gbe_kernel_get_arg_type = gbe::kernelGetArgType;
+      gbe_kernel_get_arg_align = gbe::kernelGetArgAlign;
       gbe_kernel_get_simd_width = gbe::kernelGetSIMDWidth;
       gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
       gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 10fcc49..b0149da 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -185,6 +185,10 @@ extern gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size;
 typedef enum gbe_arg_type (gbe_kernel_get_arg_type_cb)(gbe_kernel, uint32_t argID);
 extern gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type;
 
+/*! Get the align of the given argument */
+typedef uint32_t (gbe_kernel_get_arg_align_cb)(gbe_kernel, uint32_t argID);
+extern gbe_kernel_get_arg_align_cb *gbe_kernel_get_arg_align;
+
 /*! Get the simd width for the kernel */
 typedef uint32_t (gbe_kernel_get_simd_width_cb)(gbe_kernel);
 extern gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width;
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 9b33b7c..dd76210 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -47,6 +47,7 @@ namespace gbe {
   struct KernelArgument {
     gbe_arg_type type; //!< Pointer, structure, image, regular value?
     uint32_t size;     //!< Size of the argument
+    uint32_t align;    //!< addr alignment of the argument
     uint32_t bufSize;  //!< Contant buffer size
   };
 
@@ -88,6 +89,9 @@ namespace gbe {
     INLINE uint32_t getArgSize(uint32_t argID) const {
       return argID >= argNum ? 0u : args[argID].size;
     }
+    INLINE uint32_t getArgAlign(uint32_t argID) const {
+      return argID >= argNum ? 0u : args[argID].align;
+    }
     /*! Return the type of the given argument */
     INLINE gbe_arg_type getArgType(uint32_t argID) const {
       return argID >= argNum ? GBE_ARG_INVALID : args[argID].type;
diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp
index 400a2a0..d6815e1 100644
--- a/backend/src/ir/context.cpp
+++ b/backend/src/ir/context.cpp
@@ -105,10 +105,10 @@ namespace ir {
     return index;
   }
 
-  void Context::input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elementSize) {
+  void Context::input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elementSize, uint32_t align) {
     GBE_ASSERTM(fn != NULL, "No function currently defined");
     GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register");
-    FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name);
+    FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name, align);
     fn->args.push_back(arg);
   }
 
diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp
index 242beaa..adeaf6f 100644
--- a/backend/src/ir/context.hpp
+++ b/backend/src/ir/context.hpp
@@ -107,7 +107,7 @@ namespace ir {
     /*! Create a new label for the current function */
     LabelIndex label(void);
     /*! Append a new input register for the function */
-    void input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elemSz = 0u);
+    void input(const std::string &name, FunctionArgument::Type type, Register reg, uint32_t elemSz = 0u, uint32_t align = 0);
     /*! Append a new output register for the function */
     void output(Register reg);
     /*! Get the immediate value */
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 3d4733d..84d2504 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -106,11 +106,12 @@ namespace ir {
       SAMPLER           = 6
     };
     /*! Create a function input argument */
-    INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name) :
-      type(type), reg(reg), size(size), name(name) {}
+    INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name, uint32_t align) :
+      type(type), reg(reg), size(size), align(align), name(name) {}
     Type type;     //!< Gives the type of argument we have
     Register reg;  //!< Holds the argument
     uint32_t size; //!< == sizeof(void*) for ptr, sizeof(elem) for the rest
+    uint32_t align; //!< address alignment for the argument
     const std::string name; //!< Holds the function name for IR output
     GBE_STRUCT(FunctionArgument); // Use custom allocator
   };
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index a104df4..cb3511f 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -692,7 +692,8 @@ namespace gbe
         void* mem = malloc(size);
         uint32_t offset = 0;
         getConstantData(c, mem, offset);
-        unit.newConstant((char *)mem, name, size, sizeof(unsigned));
+        uint32_t alignment = getAlignmentByte(unit, type);
+        unit.newConstant((char *)mem, name, size, alignment);
         free(mem);
       }
     }
@@ -1079,7 +1080,7 @@ namespace gbe
           const uint32_t elemSize = getTypeByteSize(unit, elemType);
           const uint32_t elemNum = vectorType->getNumElements();
           //vector's elemType always scalar type
-          ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize);
+          ctx.input(argName, ir::FunctionArgument::VALUE, reg, elemNum*elemSize, getAlignmentByte(unit, type));
 
           ir::Function& fn = ctx.getFunction();
           for(uint32_t i=1; i < elemNum; i++) {
@@ -1094,37 +1095,38 @@ namespace gbe
                     "vector type in the function argument is not supported yet");
         const ir::Register reg = regTranslator.newScalar(I);
         if (type->isPointerTy() == false)
-          ctx.input(argName, ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type));
+          ctx.input(argName, ir::FunctionArgument::VALUE, reg, getTypeByteSize(unit, type), getAlignmentByte(unit, type));
         else {
           PointerType *pointerType = dyn_cast<PointerType>(type);
+          Type *pointed = pointerType->getElementType();
           // By value structure
 #if LLVM_VERSION_MINOR <= 1
           if (PAL.paramHasAttr(argID+1, Attribute::ByVal)) {
 #else
           if (I->hasByValAttr()) {
 #endif /* LLVM_VERSION_MINOR <= 1 */
-            Type *pointed = pointerType->getElementType();
             const size_t structSize = getTypeByteSize(unit, pointed);
-            ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, structSize);
+            ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, structSize, getAlignmentByte(unit, type));
           }
           // Regular user provided pointer (global, local or constant)
           else {
             const uint32_t addr = pointerType->getAddressSpace();
             const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(addr);
             const uint32_t ptrSize = getTypeByteSize(unit, type);
+            const uint32_t align = getAlignmentByte(unit, pointed);
               switch (addrSpace) {
               case ir::MEM_GLOBAL:
-                ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, ptrSize);
+                ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, ptrSize, align);
               break;
               case ir::MEM_LOCAL:
-                ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, ptrSize);
+                ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, ptrSize, align);
                 ctx.getFunction().setUseSLM(true);
               break;
               case ir::MEM_CONSTANT:
-                ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, ptrSize);
+                ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, ptrSize, align);
               break;
               case ir::IMAGE:
-                ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize);
+                ctx.input(argName, ir::FunctionArgument::IMAGE, reg, ptrSize, align);
                 ctx.getFunction().getImageSet()->append(reg, &ctx);
               break;
               default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index fb7a8a3..0788975 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -98,37 +98,47 @@ error:
 static void
 cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker)
 {
-  /* calculate constant buffer size */
+  /* calculate constant buffer size
+   * we need raw_size & aligned_size
+   */
   GET_QUEUE_THREAD_GPGPU(queue);
   int32_t arg;
-  size_t offset;
+  size_t offset = 0;
+  uint32_t raw_size = 0, aligned_size =0;
   gbe_program prog = ker->program->opaque;
   const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
   size_t global_const_size = gbe_program_get_global_constant_size(prog);
-  uint32_t constant_buf_size = 0;
+  aligned_size = raw_size = global_const_size;
+  /* Reserve 8 bytes to get rid of 0 address */
+  if(global_const_size == 0) aligned_size = 8;
+
   for (arg = 0; arg < arg_n; ++arg) {
     const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
     if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) {
+      uint32_t alignment = gbe_kernel_get_arg_align(ker->opaque, arg);
+      assert(alignment != 0);
       cl_mem mem = ker->args[arg].mem;
-      constant_buf_size += ALIGN(mem->size, 4);
+      raw_size += mem->size;
+      aligned_size = ALIGN(aligned_size, alignment);
+      aligned_size += mem->size;
     }
   }
-  if(global_const_size == 0 && constant_buf_size == 0)
+  if(raw_size == 0)
      return;
 
-  cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, constant_buf_size + global_const_size + 4);
+  cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, aligned_size);
   cl_buffer_map(bo, 1);
   char * cst_addr = cl_buffer_get_virtual(bo);
-  offset = 0;
+
+  /* upload the global constant data */
   if (global_const_size > 0) {
-    /* Write the global constant arrays */
     gbe_program_get_global_constant_data(prog, (char*)(cst_addr+offset));
+    offset += global_const_size;
   }
-  offset += ALIGN(global_const_size, 4);
 
+  /* reserve 8 bytes to get rid of 0 address */
   if(global_const_size == 0) {
-    /* reserve 4 bytes to get rid of 0 address */
-    offset += 4;
+    offset = 8;
   }
 
   /* upload constant buffer argument */
@@ -137,7 +147,8 @@ cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker)
     const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
     if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) {
       cl_mem mem = ker->args[arg].mem;
-
+      uint32_t alignment = gbe_kernel_get_arg_align(ker->opaque, arg);
+      offset = ALIGN(offset, alignment);
       curbe_offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg);
       assert(curbe_offset >= 0);
       *(uint32_t *) (ker->curbe + curbe_offset) = offset;
@@ -146,7 +157,7 @@ cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker)
       void * addr = cl_buffer_get_virtual(mem->bo);
       memcpy(cst_addr + offset, addr, mem->size);
       cl_buffer_unmap(mem->bo);
-      offset += ALIGN(mem->size, 4);
+      offset += mem->size;
     }
   }
   cl_buffer_unmap(bo);
@@ -201,12 +212,14 @@ cl_curbe_fill(cl_kernel ker,
   }
   /* Handle the various offsets to SLM */
   const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
-  /* align so that we kernel argument get good alignment */
-  int32_t arg, slm_offset = ALIGN(gbe_kernel_get_slm_size(ker->opaque), 32);
+  int32_t arg, slm_offset = gbe_kernel_get_slm_size(ker->opaque);
   for (arg = 0; arg < arg_n; ++arg) {
     const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
     if (type != GBE_ARG_LOCAL_PTR)
       continue;
+    uint32_t align = gbe_kernel_get_arg_align(ker->opaque, arg);
+    assert(align != 0);
+    slm_offset = ALIGN(slm_offset, align);
     offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg);
     assert(offset >= 0);
     uint32_t *slmptr = (uint32_t *) (ker->curbe + offset);
-- 
1.7.9.5



More information about the Beignet mailing list