[Beignet] [PATCH V2 1/2] remove the page align limitation for host_ptr of CL_MEM_USE_HOST_PTR

Guo Yejun yejun.guo at intel.com
Mon Dec 22 00:54:59 PST 2014


the current limitation is both value and size of host_ptr should
be page aligned, remove the limitation by recording the offset to
the page starting address inside the driver.

v2: add assert for cl image path since only cl buffer is supported now.
    add assert to ensure the offset is zero for the case user ptr not used.
    separate the changes for driver and utest.

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 src/cl_command_queue.c |  8 ++++++--
 src/cl_mem.c           | 16 ++++++++++++----
 src/cl_mem.h           |  1 +
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 12530d7..28a4abd 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -137,6 +137,10 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
     int id = k->images[i].arg_idx;
     struct _cl_mem_image *image;
     assert(interp_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE);
+
+    //currently, user ptr is not supported for cl image, so offset should be always zero
+    assert(k->args[id].mem->offset == 0);
+
     image = cl_mem_image(k->args[id].mem);
     set_image_info(k->curbe, &k->images[i], image);
     cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset,
@@ -170,9 +174,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
     offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
     if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
       struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
-      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
+      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
     } else {
-      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
+      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
     }
   }
 
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3225fd2..89f230e 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -266,6 +266,7 @@ cl_mem_allocate(enum cl_mem_type type,
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
   mem->is_userptr = 0;
+  mem->offset = 0;
 
   if (sz != 0) {
     /* Pinning will require stricter alignment rules */
@@ -285,10 +286,11 @@ cl_mem_allocate(enum cl_mem_type type,
           assert(host_ptr != NULL);
           /* userptr not support tiling */
           if (!is_tiled) {
-            if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
-              mem->is_userptr = 1;
-              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
-            }
+            void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1)));
+            mem->offset = host_ptr - aligned_host_ptr;
+            mem->is_userptr = 1;
+            size_t aligned_sz = ALIGN((mem->offset + sz), page_size);
+            mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
           }
         }
         else if (flags & CL_MEM_ALLOC_HOST_PTR) {
@@ -514,6 +516,8 @@ cl_mem_new_sub_buffer(cl_mem buffer,
   mem->ref_n = 1;
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
+  mem->offset = buffer->offset;
+  mem->is_userptr = buffer->is_userptr;
   sub_buf->parent = (struct _cl_mem_buffer*)buffer;
 
   cl_mem_add_ref(buffer);
@@ -1853,6 +1857,10 @@ cl_mem_unmap_gtt(cl_mem mem)
 LOCAL void*
 cl_mem_map_auto(cl_mem mem, int write)
 {
+  //if mem is not created from userptr, the offset should be always zero.
+  if (!mem->is_userptr)
+    assert(mem->offset == 0);
+
   if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
     return cl_mem_map_gtt(mem);
   else {
diff --git a/src/cl_mem.h b/src/cl_mem.h
index fd50220..e027f15 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -94,6 +94,7 @@ typedef  struct _cl_mem {
   uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
   cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
   uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
+  size_t offset;            /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
 } _cl_mem;
 
 struct _cl_mem_image {
-- 
1.9.1



More information about the Beignet mailing list