[Beignet] [V2 PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API

Zhigang Gong zhigang.gong at linux.intel.com
Mon Jul 15 23:10:02 PDT 2013


Tested OK, pushed, thanks.

On Fri, Jul 12, 2013 at 02:33:07PM +0800, He Junyan wrote:
> Some small mistake in rebase.
> I am improving the enqueue  read and enqueue write API
> and will send a unit test for all the enqueue mem related APIs
> 
> On 07/12/2013 02:31 PM, junyan.he at inbox.com wrote:
> >From: Junyan He <junyan.he at linux.intel.com>
> >
> >In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
> >CL_MEM_USE_HOST_PTR flag support.
> >CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
> >CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
> >1> The host_ptr specified in clCreateBuffer is guaranteed to
> >contain the latest bits in the region being mapped when the
> >clEnqueueMapBuffer command has completed.
> >2> The pointer value returned by clEnqueueMapBuffer will be
> >derived from the host_ptr specified when the buffer object is created.
> >
> >We improve the clEnqueueMapBuffer to setup a map for the mapped
> >address and do the data sync problem based on the address when
> >mapped and unmapped.
> >
> >Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> >---
> >  src/cl_api.c |  125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  src/cl_mem.c |   43 ++++++++++++++++----
> >  src/cl_mem.h |   10 +++++
> >  3 files changed, 169 insertions(+), 9 deletions(-)
> >
> >diff --git a/src/cl_api.c b/src/cl_api.c
> >index 277f7a9..20cbc1e 100644
> >--- a/src/cl_api.c
> >+++ b/src/cl_api.c
> >@@ -1470,7 +1470,9 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
> >                     cl_int *          errcode_ret)
> >  {
> >    void *ptr = NULL;
> >+  void *mem_ptr = NULL;
> >    cl_int err = CL_SUCCESS;
> >+  int slot = -1;
> >    CHECK_QUEUE(command_queue);
> >    CHECK_MEM(buffer);
> >@@ -1503,10 +1505,66 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
> >    ptr = (char*)ptr + offset;
> >+  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> >+    assert(buffer->host_ptr);
> >+    memcpy(buffer->host_ptr + offset, ptr, size);
> >+    mem_ptr = buffer->host_ptr + offset;
> >+  } else {
> >+    mem_ptr = ptr;
> >+  }
> >+
> >+  /* Record the mapped address. */
> >+  if (!buffer->mapped_ptr_sz) {
> >+    buffer->mapped_ptr_sz = 16;
> >+    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> >+          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> >+    if (!buffer->mapped_ptr) {
> >+      cl_mem_unmap_auto (buffer);
> >+      err = CL_OUT_OF_HOST_MEMORY;
> >+      ptr = NULL;
> >+      goto error;
> >+    }
> >+
> >+    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> >+    slot = 0;
> >+  } else {
> >+    int i = 0;
> >+    for (; i < buffer->mapped_ptr_sz; i++) {
> >+      if (buffer->mapped_ptr[i].ptr == NULL) {
> >+        slot = i;
> >+        break;
> >+      }
> >+    }
> >+
> >+    if (i == buffer->mapped_ptr_sz) {
> >+      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> >+          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> >+      if (!new_ptr) {
> >+        cl_mem_unmap_auto (buffer);
> >+        err = CL_OUT_OF_HOST_MEMORY;
> >+        ptr = NULL;
> >+        goto error;
> >+      }
> >+      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> >+      memcpy(new_ptr, buffer->mapped_ptr,
> >+             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> >+      slot = buffer->mapped_ptr_sz;
> >+      buffer->mapped_ptr_sz *= 2;
> >+      free(buffer->mapped_ptr);
> >+      buffer->mapped_ptr = new_ptr;
> >+    }
> >+  }
> >+
> >+  assert(slot != -1);
> >+  buffer->mapped_ptr[slot].ptr = mem_ptr;
> >+  buffer->mapped_ptr[slot].v_ptr = ptr;
> >+  buffer->mapped_ptr[slot].size = size;
> >+  buffer->map_ref++;
> >+
> >  error:
> >    if (errcode_ret)
> >      *errcode_ret = err;
> >-  return ptr;
> >+  return mem_ptr;
> >  }
> >  void *
> >@@ -1581,7 +1639,70 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
> >                          const cl_event *  event_wait_list,
> >                          cl_event *        event)
> >  {
> >-  return cl_mem_unmap_auto(memobj);
> >+  cl_int err = CL_SUCCESS;
> >+  int i;
> >+  size_t mapped_size = 0;
> >+  void * v_ptr = NULL;
> >+
> >+  CHECK_QUEUE(command_queue);
> >+  CHECK_MEM(memobj);
> >+  if (command_queue->ctx != memobj->ctx) {
> >+    err = CL_INVALID_CONTEXT;
> >+    goto error;
> >+  }
> >+
> >+  assert(memobj->mapped_ptr_sz >= memobj->map_ref);
> >+  INVALID_VALUE_IF(!mapped_ptr);
> >+  for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> >+    if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
> >+      memobj->mapped_ptr[i].ptr = NULL;
> >+      mapped_size = memobj->mapped_ptr[i].size;
> >+      v_ptr = memobj->mapped_ptr[i].v_ptr;
> >+      memobj->mapped_ptr[i].size = 0;
> >+      memobj->mapped_ptr[i].v_ptr = NULL;
> >+      memobj->map_ref--;
> >+      break;
> >+    }
> >+  }
> >+  /* can not find a mapped address? */
> >+  INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
> >+
> >+  if (memobj->flags & CL_MEM_USE_HOST_PTR) {
> >+    assert(mapped_ptr >= memobj->host_ptr &&
> >+      mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
> >+    /* Sync the data. */
> >+    memcpy(v_ptr, mapped_ptr, mapped_size);
> >+  } else {
> >+    assert(v_ptr == mapped_ptr);
> >+  }
> >+
> >+  cl_mem_unmap_auto(memobj);
> >+
> >+  /* shrink the mapped slot. */
> >+  if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> >+    int j = 0;
> >+    cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> >+	sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
> >+    if (!new_ptr) {
> >+      /* Just do nothing. */
> >+      goto error;
> >+    }
> >+    memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
> >+
> >+    for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> >+      if (memobj->mapped_ptr[i].ptr) {
> >+        new_ptr[j] = memobj->mapped_ptr[i];
> >+        j++;
> >+        assert(j < memobj->mapped_ptr_sz/2);
> >+      }
> >+    }
> >+    memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
> >+    free(memobj->mapped_ptr);
> >+    memobj->mapped_ptr = new_ptr;
> >+  }
> >+
> >+error:
> >+  return err;
> >  }
> >  cl_int
> >diff --git a/src/cl_mem.c b/src/cl_mem.c
> >index 064ecb3..9691ba3 100644
> >--- a/src/cl_mem.c
> >+++ b/src/cl_mem.c
> >@@ -157,10 +157,6 @@ cl_mem_allocate(cl_context ctx,
> >    cl_ulong max_mem_size;
> >    assert(ctx);
> >-  FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
> >-            "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
> >-  FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
> >-            "CL_MEM_USE_HOST_PTR unsupported");   /* XXX */
> >    if ((err = cl_get_device_info(ctx->device,
> >                                  CL_DEVICE_MAX_MEM_ALLOC_SIZE,
> >@@ -223,11 +219,35 @@ cl_mem_new(cl_context ctx,
> >             void *data,
> >             cl_int *errcode_ret)
> >  {
> >+  /* Possible mem type combination:
> >+       CL_MEM_ALLOC_HOST_PTR
> >+       CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
> >+       CL_MEM_USE_HOST_PTR
> >+       CL_MEM_COPY_HOST_PTR   */
> >+
> >    cl_int err = CL_SUCCESS;
> >    cl_mem mem = NULL;
> >-  /* Check flags consistency */
> >-  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
> >+  /* This flag is valid only if host_ptr is not NULL */
> >+  if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
> >+                flags & CL_MEM_USE_HOST_PTR) &&
> >+                data == NULL)) {
> >+    err = CL_INVALID_HOST_PTR;
> >+    goto error;
> >+  }
> >+
> >+  /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
> >+     are mutually exclusive. */
> >+  if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
> >+               flags & CL_MEM_USE_HOST_PTR)) {
> >+    err = CL_INVALID_HOST_PTR;
> >+    goto error;
> >+  }
> >+
> >+  /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
> >+     are mutually exclusive. */
> >+  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
> >+               flags & CL_MEM_USE_HOST_PTR)) {
> >      err = CL_INVALID_HOST_PTR;
> >      goto error;
> >    }
> >@@ -238,9 +258,12 @@ cl_mem_new(cl_context ctx,
> >      goto error;
> >    /* Copy the data if required */
> >-  if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
> >+  if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
> >      cl_buffer_subdata(mem->bo, 0, sz, data);
> >+  if (flags & CL_MEM_USE_HOST_PTR)
> >+    mem->host_ptr = data;
> >+
> >  exit:
> >    if (errcode_ret)
> >      *errcode_ret = err;
> >@@ -469,6 +492,12 @@ cl_mem_delete(cl_mem mem)
> >    pthread_mutex_unlock(&mem->ctx->buffer_lock);
> >    cl_context_delete(mem->ctx);
> >+  /* Someone still mapped? */
> >+  assert(!mem->map_ref);
> >+
> >+  if (mem->mapped_ptr)
> >+    free(mem->mapped_ptr);
> >+
> >    cl_free(mem);
> >  }
> >diff --git a/src/cl_mem.h b/src/cl_mem.h
> >index c63bf6c..66518a6 100644
> >--- a/src/cl_mem.h
> >+++ b/src/cl_mem.h
> >@@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
> >    CL_TILE_Y  = 2
> >  } cl_image_tiling_t;
> >+typedef struct _cl_mapped_ptr {
> >+  void * ptr;
> >+  void * v_ptr;
> >+  size_t size;
> >+}cl_mapped_ptr;
> >+
> >  /* Used for buffers and images */
> >  struct _cl_mem {
> >    DEFINE_ICD(dispatch)
> >@@ -68,6 +74,10 @@ struct _cl_mem {
> >    uint32_t intel_fmt;       /* format to provide in the surface state */
> >    uint32_t bpp;             /* number of bytes per pixel */
> >    cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
> >+  void * host_ptr;          /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
> >+  cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
> >+  int mapped_ptr_sz;        /* The array size of mapped_ptr. */
> >+  int map_ref;              /* The mapped count. */
> >  };
> >  /* Query information about a memory object */
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list