[Beignet] [PATCH 1/2] enable CL_MEM_ALLOC_HOST_PTR with user_ptr to avoid copy between GPU/CPU

Guo Yejun yejun.guo at intel.com
Mon Dec 1 17:31:01 PST 2014


when user ptr is enabled, allocates page aligned system memory for
CL_MEM_ALLOC_HOST_PTR inside the driver and wraps it as GPU memory
to avoid the copy between GPU and CPU.

and also do some code refine for the relative user_ptr code.

tests verified: beignet/utest, conformance/basic, buffers, mem_host_flags

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 src/cl_device_id.c |  8 ++++----
 src/cl_mem.c       | 37 +++++++++++++++++++++++++++----------
 src/cl_mem.h       |  4 ++--
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 5ef0bde..711f8ae 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -27,6 +27,7 @@
 #include "cl_thread.h"
 #include "CL/cl.h"
 #include "cl_gbe_loader.h"
+#include "cl_alloc.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -407,15 +408,14 @@ brw_gt3_break:
   cl_buffer_mgr bufmgr = cl_driver_get_bufmgr(dummy);
 
   const size_t sz = 4096;
-  void* host_ptr = NULL;
-  int err = posix_memalign(&host_ptr, 4096, sz);
-  if (err == 0) {
+  void* host_ptr = cl_aligned_malloc(sz, 4096);;
+  if (host_ptr != NULL) {
     cl_buffer bo = cl_buffer_alloc_userptr(bufmgr, "CL memory object", host_ptr, sz, 0);
     if (bo == NULL)
       ret->host_unified_memory = CL_FALSE;
     else
       cl_buffer_unreference(bo);
-    free(host_ptr);
+    cl_free(host_ptr);
   }
   else
     ret->host_unified_memory = CL_FALSE;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 1f1b668..7528964 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -266,16 +266,26 @@ cl_mem_allocate(enum cl_mem_type type,
 
 #ifdef HAS_USERPTR
     if (ctx->device->host_unified_memory) {
+      int page_size = getpagesize();
       /* currently only cl buf is supported, will add cl image support later */
-      if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
-        /* userptr not support tiling */
-        if (!is_tiled) {
-          int page_size = getpagesize();
-          if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
-            mem->is_userptr = 1;
-            mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+      if (type == CL_MEM_BUFFER_TYPE) {
+        if (flags & CL_MEM_USE_HOST_PTR) {
+          assert(host_ptr != NULL);
+          /* userptr not support tiling */
+          if (!is_tiled) {
+            if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
+              mem->is_userptr = 1;
+              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
+            }
           }
         }
+        else if (flags & CL_MEM_ALLOC_HOST_PTR) {
+          const size_t alignedSZ = ALIGN(sz, page_size);
+          void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
+          mem->host_ptr = internal_host_ptr;
+          mem->is_userptr = 1;
+          mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0);
+        }
       }
     }
 
@@ -416,13 +426,17 @@ cl_mem_new_buffer(cl_context ctx,
     goto error;
 
   /* Copy the data if required */
-  if (flags & CL_MEM_COPY_HOST_PTR)
-    cl_buffer_subdata(mem->bo, 0, sz, data);
+  if (flags & CL_MEM_COPY_HOST_PTR) {
+    if (mem->is_userptr)
+      memcpy(mem->host_ptr, data, sz);
+    else
+      cl_buffer_subdata(mem->bo, 0, sz, data);
+  }
 
   if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
     cl_buffer_subdata(mem->bo, 0, sz, data);
 
-  if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
+  if (flags & CL_MEM_USE_HOST_PTR)
     mem->host_ptr = data;
 
 exit:
@@ -1085,6 +1099,9 @@ cl_mem_delete(cl_mem mem)
     cl_buffer_unreference(mem->bo);
   }
 
+  if (mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR))
+    cl_free(mem->host_ptr);
+
   cl_free(mem);
 }
 
diff --git a/src/cl_mem.h b/src/cl_mem.h
index ac1175d..1641dcc 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -86,13 +86,13 @@ typedef  struct _cl_mem {
   size_t size;              /* original request size, not alignment size, used in constant buffer */
   cl_context ctx;           /* Context it belongs to */
   cl_mem_flags flags;       /* Flags specified at the creation time */
-  void * host_ptr;          /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
+  void * host_ptr;          /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR */
   cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
   int mapped_ptr_sz;        /* The array size of mapped_ptr. */
   int map_ref;              /* The mapped count. */
   uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
   cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
-  uint8_t is_userptr;    /* CL_MEM_USE_HOST_PTR is enabled*/
+  uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
 } _cl_mem;
 
 struct _cl_mem_image {
-- 
1.9.1



More information about the Beignet mailing list