[Beignet] [V2 PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API

junyan.he at inbox.com junyan.he at inbox.com
Thu Jul 11 23:31:14 PDT 2013


From: Junyan He <junyan.he at linux.intel.com>

In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
CL_MEM_USE_HOST_PTR flag support.
CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
1> The host_ptr specified in clCreateBuffer is guaranteed to
contain the latest bits in the region being mapped when the
clEnqueueMapBuffer command has completed.
2> The pointer value returned by clEnqueueMapBuffer will be
derived from the host_ptr specified when the buffer object is created.

We improve the clEnqueueMapBuffer to setup a map for the mapped
address and do the data sync problem based on the address when
mapped and unmapped.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 src/cl_api.c |  125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/cl_mem.c |   43 ++++++++++++++++----
 src/cl_mem.h |   10 +++++
 3 files changed, 169 insertions(+), 9 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 277f7a9..20cbc1e 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1470,7 +1470,9 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
                    cl_int *          errcode_ret)
 {
   void *ptr = NULL;
+  void *mem_ptr = NULL;
   cl_int err = CL_SUCCESS;
+  int slot = -1;
 
   CHECK_QUEUE(command_queue);
   CHECK_MEM(buffer);
@@ -1503,10 +1505,66 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
 
   ptr = (char*)ptr + offset;
 
+  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
+    assert(buffer->host_ptr);
+    memcpy(buffer->host_ptr + offset, ptr, size);
+    mem_ptr = buffer->host_ptr + offset;
+  } else {
+    mem_ptr = ptr;
+  }
+
+  /* Record the mapped address. */
+  if (!buffer->mapped_ptr_sz) {
+    buffer->mapped_ptr_sz = 16;
+    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
+          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
+    if (!buffer->mapped_ptr) {
+      cl_mem_unmap_auto (buffer);
+      err = CL_OUT_OF_HOST_MEMORY;
+      ptr = NULL;
+      goto error;
+    }
+
+    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+    slot = 0;
+  } else {
+    int i = 0;
+    for (; i < buffer->mapped_ptr_sz; i++) {
+      if (buffer->mapped_ptr[i].ptr == NULL) {
+        slot = i;
+        break;
+      }
+    }
+
+    if (i == buffer->mapped_ptr_sz) {
+      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
+      if (!new_ptr) {
+        cl_mem_unmap_auto (buffer);
+        err = CL_OUT_OF_HOST_MEMORY;
+        ptr = NULL;
+        goto error;
+      }
+      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+      memcpy(new_ptr, buffer->mapped_ptr,
+             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+      slot = buffer->mapped_ptr_sz;
+      buffer->mapped_ptr_sz *= 2;
+      free(buffer->mapped_ptr);
+      buffer->mapped_ptr = new_ptr;
+    }
+  }
+
+  assert(slot != -1);
+  buffer->mapped_ptr[slot].ptr = mem_ptr;
+  buffer->mapped_ptr[slot].v_ptr = ptr;
+  buffer->mapped_ptr[slot].size = size;
+  buffer->map_ref++;
+
 error:
   if (errcode_ret)
     *errcode_ret = err;
-  return ptr;
+  return mem_ptr;
 }
 
 void *
@@ -1581,7 +1639,70 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
                         const cl_event *  event_wait_list,
                         cl_event *        event)
 {
-  return cl_mem_unmap_auto(memobj);
+  cl_int err = CL_SUCCESS;
+  int i;
+  size_t mapped_size = 0;
+  void * v_ptr = NULL;
+
+  CHECK_QUEUE(command_queue);
+  CHECK_MEM(memobj);
+  if (command_queue->ctx != memobj->ctx) {
+    err = CL_INVALID_CONTEXT;
+    goto error;
+  }
+
+  assert(memobj->mapped_ptr_sz >= memobj->map_ref);
+  INVALID_VALUE_IF(!mapped_ptr);
+  for (i = 0; i < memobj->mapped_ptr_sz; i++) {
+    if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
+      memobj->mapped_ptr[i].ptr = NULL;
+      mapped_size = memobj->mapped_ptr[i].size;
+      v_ptr = memobj->mapped_ptr[i].v_ptr;
+      memobj->mapped_ptr[i].size = 0;
+      memobj->mapped_ptr[i].v_ptr = NULL;
+      memobj->map_ref--;
+      break;
+    }
+  }
+  /* can not find a mapped address? */
+  INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
+
+  if (memobj->flags & CL_MEM_USE_HOST_PTR) {
+    assert(mapped_ptr >= memobj->host_ptr &&
+      mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
+    /* Sync the data. */
+    memcpy(v_ptr, mapped_ptr, mapped_size);
+  } else {
+    assert(v_ptr == mapped_ptr);
+  }
+
+  cl_mem_unmap_auto(memobj);
+
+  /* shrink the mapped slot. */
+  if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
+    int j = 0;
+    cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+	sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
+    if (!new_ptr) {
+      /* Just do nothing. */
+      goto error;
+    }
+    memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
+
+    for (i = 0; i < memobj->mapped_ptr_sz; i++) {
+      if (memobj->mapped_ptr[i].ptr) {
+        new_ptr[j] = memobj->mapped_ptr[i];
+        j++;
+        assert(j < memobj->mapped_ptr_sz/2);
+      }
+    }
+    memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
+    free(memobj->mapped_ptr);
+    memobj->mapped_ptr = new_ptr;
+  }
+
+error:
+  return err;
 }
 
 cl_int
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 064ecb3..9691ba3 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -157,10 +157,6 @@ cl_mem_allocate(cl_context ctx,
   cl_ulong max_mem_size;
 
   assert(ctx);
-  FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
-            "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
-  FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
-            "CL_MEM_USE_HOST_PTR unsupported");   /* XXX */
 
   if ((err = cl_get_device_info(ctx->device,
                                 CL_DEVICE_MAX_MEM_ALLOC_SIZE,
@@ -223,11 +219,35 @@ cl_mem_new(cl_context ctx,
            void *data,
            cl_int *errcode_ret)
 {
+  /* Possible mem type combination:
+       CL_MEM_ALLOC_HOST_PTR
+       CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
+       CL_MEM_USE_HOST_PTR
+       CL_MEM_COPY_HOST_PTR   */
+
   cl_int err = CL_SUCCESS;
   cl_mem mem = NULL;
 
-  /* Check flags consistency */
-  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
+  /* This flag is valid only if host_ptr is not NULL */
+  if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
+                flags & CL_MEM_USE_HOST_PTR) &&
+                data == NULL)) {
+    err = CL_INVALID_HOST_PTR;
+    goto error;
+  }
+
+  /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
+     are mutually exclusive. */
+  if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
+               flags & CL_MEM_USE_HOST_PTR)) {
+    err = CL_INVALID_HOST_PTR;
+    goto error;
+  }
+
+  /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
+     are mutually exclusive. */
+  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
+               flags & CL_MEM_USE_HOST_PTR)) {
     err = CL_INVALID_HOST_PTR;
     goto error;
   }
@@ -238,9 +258,12 @@ cl_mem_new(cl_context ctx,
     goto error;
 
   /* Copy the data if required */
-  if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
+  if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
     cl_buffer_subdata(mem->bo, 0, sz, data);
 
+  if (flags & CL_MEM_USE_HOST_PTR)
+    mem->host_ptr = data;
+
 exit:
   if (errcode_ret)
     *errcode_ret = err;
@@ -469,6 +492,12 @@ cl_mem_delete(cl_mem mem)
   pthread_mutex_unlock(&mem->ctx->buffer_lock);
   cl_context_delete(mem->ctx);
 
+  /* Someone still mapped? */
+  assert(!mem->map_ref);
+
+  if (mem->mapped_ptr)
+    free(mem->mapped_ptr);
+
   cl_free(mem);
 }
 
diff --git a/src/cl_mem.h b/src/cl_mem.h
index c63bf6c..66518a6 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
   CL_TILE_Y  = 2
 } cl_image_tiling_t;
 
+typedef struct _cl_mapped_ptr {
+  void * ptr;
+  void * v_ptr;
+  size_t size;
+}cl_mapped_ptr;
+
 /* Used for buffers and images */
 struct _cl_mem {
   DEFINE_ICD(dispatch)
@@ -68,6 +74,10 @@ struct _cl_mem {
   uint32_t intel_fmt;       /* format to provide in the surface state */
   uint32_t bpp;             /* number of bytes per pixel */
   cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
+  void * host_ptr;          /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
+  cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
+  int mapped_ptr_sz;        /* The array size of mapped_ptr. */
+  int map_ref;              /* The mapped count. */
 };
 
 /* Query information about a memory object */
-- 
1.7.9.5



More information about the Beignet mailing list