[Beignet] [PATCH 1/2] loose the alignment limitation for host_ptr of CL_MEM_USE_HOST_PTR

Guo Yejun yejun.guo at intel.com
Wed Jan 21 21:31:39 PST 2015


the current limitation is both host_ptr and buffer size should be
page aligned, loose the limitation of host_ptr to be cache line
size (64byte) alignment, and no limitation for the size.

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 src/cl_command_queue.c |  8 ++++++--
 src/cl_mem.c           | 17 +++++++++++++++--
 src/cl_mem.h           |  1 +
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 89afa07..f843548 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -133,6 +133,10 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
     int id = k->images[i].arg_idx;
     struct _cl_mem_image *image;
     assert(interp_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE);
+
+    //currently, user ptr is not supported for cl image, so offset should be always zero
+    assert(k->args[id].mem->offset == 0);
+
     image = cl_mem_image(k->args[id].mem);
     set_image_info(k->curbe, &k->images[i], image);
     cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset,
@@ -166,9 +170,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
     offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
     if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
       struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
-      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
+      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
     } else {
-      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
+      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
     }
   }
 
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3225fd2..36ef2ce 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -266,6 +266,7 @@ cl_mem_allocate(enum cl_mem_type type,
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
   mem->is_userptr = 0;
+  mem->offset = 0;
 
   if (sz != 0) {
     /* Pinning will require stricter alignment rules */
@@ -279,15 +280,21 @@ cl_mem_allocate(enum cl_mem_type type,
 #ifdef HAS_USERPTR
     if (ctx->device->host_unified_memory) {
       int page_size = getpagesize();
+      int cacheline_size = 0;
+      cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL);
+
       /* currently only cl buf is supported, will add cl image support later */
       if (type == CL_MEM_BUFFER_TYPE) {
         if (flags & CL_MEM_USE_HOST_PTR) {
           assert(host_ptr != NULL);
           /* userptr not support tiling */
           if (!is_tiled) {
-            if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
+            if (ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) {
+              void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1)));
+              mem->offset = host_ptr - aligned_host_ptr;
               mem->is_userptr = 1;
-              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+              size_t aligned_sz = ALIGN((mem->offset + sz), page_size);
+              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
             }
           }
         }
@@ -514,6 +521,8 @@ cl_mem_new_sub_buffer(cl_mem buffer,
   mem->ref_n = 1;
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
+  mem->offset = buffer->offset;
+  mem->is_userptr = buffer->is_userptr;
   sub_buf->parent = (struct _cl_mem_buffer*)buffer;
 
   cl_mem_add_ref(buffer);
@@ -1853,6 +1862,10 @@ cl_mem_unmap_gtt(cl_mem mem)
 LOCAL void*
 cl_mem_map_auto(cl_mem mem, int write)
 {
+  //if mem is not created from userptr, the offset should be always zero.
+  if (!mem->is_userptr)
+    assert(mem->offset == 0);
+
   if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
     return cl_mem_map_gtt(mem);
   else {
diff --git a/src/cl_mem.h b/src/cl_mem.h
index fd50220..e027f15 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -94,6 +94,7 @@ typedef  struct _cl_mem {
   uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
   cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
   uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
+  size_t offset;            /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
 } _cl_mem;
 
 struct _cl_mem_image {
-- 
1.9.1



More information about the Beignet mailing list