[Beignet] [PATCH] remove the page align limitation for host_ptr of CL_MEM_USE_HOST_PTR

Guo Yejun yejun.guo at intel.com
Wed Dec 10 18:26:02 PST 2014


the current limitation is both value and size of host_ptr should
be page aligned, remove the limitation by recording the offset to
the page starting address inside the driver.

tests verified: beignet/utest, beignet/benchmark and conformance/basic,
conformance/buffers, conformance/mem_host_flags

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 benchmark/benchmark_use_host_ptr_buffer.cpp | 13 ++++++++++---
 src/cl_command_queue.c                      |  4 ++--
 src/cl_mem.c                                | 12 ++++++++----
 src/cl_mem.h                                |  1 +
 utests/runtime_use_host_ptr_buffer.cpp      | 15 +++++++++++----
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/benchmark/benchmark_use_host_ptr_buffer.cpp b/benchmark/benchmark_use_host_ptr_buffer.cpp
index 7ede576..0021290 100644
--- a/benchmark/benchmark_use_host_ptr_buffer.cpp
+++ b/benchmark/benchmark_use_host_ptr_buffer.cpp
@@ -5,13 +5,20 @@ int benchmark_use_host_ptr_buffer(void)
 {
   struct timeval start,stop;
 
-  const size_t n = 4096*4096;
+  const size_t n = 4096*4096 + 256;
 
   // Setup kernel and buffers
   OCL_CREATE_KERNEL("runtime_use_host_ptr_buffer");
 
-  int ret = posix_memalign(&buf_data[0], 4096, sizeof(uint32_t) * n);
-  OCL_ASSERT(ret == 0);
+  buf_data[0] = malloc(sizeof(uint32_t) * n);
+
+  //it does not matter if buf_data[0] is page aligned or not,
+  //here, just to test the case that it is not page aligned.
+  while ((unsigned long)buf_data[0] % 4096 == 0)
+  {
+    free(buf_data[0]);
+    buf_data[0] = malloc(sizeof(uint32_t) * n);
+  }
 
   for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i;
   OCL_CREATE_BUFFER(buf[0], CL_MEM_USE_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 12530d7..62fd810 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -170,9 +170,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
     offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
     if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
       struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
-      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
+      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
     } else {
-      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
+      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
     }
   }
 
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3055bea..3b3421c 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -254,6 +254,7 @@ cl_mem_allocate(enum cl_mem_type type,
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
   mem->is_userptr = 0;
+  mem->offset = 0;
 
   if (sz != 0) {
     /* Pinning will require stricter alignment rules */
@@ -273,10 +274,11 @@ cl_mem_allocate(enum cl_mem_type type,
           assert(host_ptr != NULL);
           /* userptr not support tiling */
           if (!is_tiled) {
-            if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
-              mem->is_userptr = 1;
-              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
-            }
+            void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1)));
+            mem->offset = host_ptr - aligned_host_ptr;
+            mem->is_userptr = 1;
+            size_t aligned_sz = ALIGN((mem->offset + sz), page_size);
+            mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
           }
         }
         else if (flags & CL_MEM_ALLOC_HOST_PTR) {
@@ -502,6 +504,8 @@ cl_mem_new_sub_buffer(cl_mem buffer,
   mem->ref_n = 1;
   mem->magic = CL_MAGIC_MEM_HEADER;
   mem->flags = flags;
+  mem->offset = buffer->offset;
+  mem->is_userptr = buffer->is_userptr;
   sub_buf->parent = (struct _cl_mem_buffer*)buffer;
 
   cl_mem_add_ref(buffer);
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1641dcc..ffe46a3 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -93,6 +93,7 @@ typedef  struct _cl_mem {
   uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
   cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
   uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
+  size_t offset;            /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
 } _cl_mem;
 
 struct _cl_mem_image {
diff --git a/utests/runtime_use_host_ptr_buffer.cpp b/utests/runtime_use_host_ptr_buffer.cpp
index 79273c3..4ae5379 100644
--- a/utests/runtime_use_host_ptr_buffer.cpp
+++ b/utests/runtime_use_host_ptr_buffer.cpp
@@ -2,13 +2,20 @@
 
 static void runtime_use_host_ptr_buffer(void)
 {
-  const size_t n = 4096*100;
+  const size_t n = 4096*10 + 1111;
 
   // Setup kernel and buffers
   OCL_CREATE_KERNEL("runtime_use_host_ptr_buffer");
 
-  int ret = posix_memalign(&buf_data[0], 4096, sizeof(uint32_t) * n);
-  OCL_ASSERT(ret == 0);
+  buf_data[0] = malloc(sizeof(uint32_t) * n);
+
+  //it does not matter if buf_data[0] is page aligned or not,
+  //here, just to test the case that it is not page aligned.
+  while ((unsigned long)buf_data[0] % 4096 == 0)
+  {
+    free(buf_data[0]);
+    buf_data[0] = malloc(sizeof(uint32_t) * n);
+  }
 
   for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i;
   OCL_CREATE_BUFFER(buf[0], CL_MEM_USE_HOST_PTR, n * sizeof(uint32_t), buf_data[0]);
@@ -16,7 +23,7 @@ static void runtime_use_host_ptr_buffer(void)
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
   globals[0] = n;
-  locals[0] = 256;
+  locals[0] = 1;
   OCL_NDRANGE(1);
 
   // Check result
-- 
1.9.1



More information about the Beignet mailing list