[Beignet] [V2 PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API

He Junyan junyan.he at inbox.com
Thu Jul 11 23:33:07 PDT 2013


Some small mistake in rebase.
I am improving the enqueue  read and enqueue write API
and will send a unit test for all the enqueue mem related APIs

On 07/12/2013 02:31 PM, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
> CL_MEM_USE_HOST_PTR flag support.
> CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
> CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
> 1> The host_ptr specified in clCreateBuffer is guaranteed to
> contain the latest bits in the region being mapped when the
> clEnqueueMapBuffer command has completed.
> 2> The pointer value returned by clEnqueueMapBuffer will be
> derived from the host_ptr specified when the buffer object is created.
>
> We improve the clEnqueueMapBuffer to setup a map for the mapped
> address and do the data sync problem based on the address when
> mapped and unmapped.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>   src/cl_api.c |  125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>   src/cl_mem.c |   43 ++++++++++++++++----
>   src/cl_mem.h |   10 +++++
>   3 files changed, 169 insertions(+), 9 deletions(-)
>
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 277f7a9..20cbc1e 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1470,7 +1470,9 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>                      cl_int *          errcode_ret)
>   {
>     void *ptr = NULL;
> +  void *mem_ptr = NULL;
>     cl_int err = CL_SUCCESS;
> +  int slot = -1;
>   
>     CHECK_QUEUE(command_queue);
>     CHECK_MEM(buffer);
> @@ -1503,10 +1505,66 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>   
>     ptr = (char*)ptr + offset;
>   
> +  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> +    assert(buffer->host_ptr);
> +    memcpy(buffer->host_ptr + offset, ptr, size);
> +    mem_ptr = buffer->host_ptr + offset;
> +  } else {
> +    mem_ptr = ptr;
> +  }
> +
> +  /* Record the mapped address. */
> +  if (!buffer->mapped_ptr_sz) {
> +    buffer->mapped_ptr_sz = 16;
> +    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> +          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> +    if (!buffer->mapped_ptr) {
> +      cl_mem_unmap_auto (buffer);
> +      err = CL_OUT_OF_HOST_MEMORY;
> +      ptr = NULL;
> +      goto error;
> +    }
> +
> +    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +    slot = 0;
> +  } else {
> +    int i = 0;
> +    for (; i < buffer->mapped_ptr_sz; i++) {
> +      if (buffer->mapped_ptr[i].ptr == NULL) {
> +        slot = i;
> +        break;
> +      }
> +    }
> +
> +    if (i == buffer->mapped_ptr_sz) {
> +      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> +          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> +      if (!new_ptr) {
> +        cl_mem_unmap_auto (buffer);
> +        err = CL_OUT_OF_HOST_MEMORY;
> +        ptr = NULL;
> +        goto error;
> +      }
> +      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +      memcpy(new_ptr, buffer->mapped_ptr,
> +             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +      slot = buffer->mapped_ptr_sz;
> +      buffer->mapped_ptr_sz *= 2;
> +      free(buffer->mapped_ptr);
> +      buffer->mapped_ptr = new_ptr;
> +    }
> +  }
> +
> +  assert(slot != -1);
> +  buffer->mapped_ptr[slot].ptr = mem_ptr;
> +  buffer->mapped_ptr[slot].v_ptr = ptr;
> +  buffer->mapped_ptr[slot].size = size;
> +  buffer->map_ref++;
> +
>   error:
>     if (errcode_ret)
>       *errcode_ret = err;
> -  return ptr;
> +  return mem_ptr;
>   }
>   
>   void *
> @@ -1581,7 +1639,70 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
>                           const cl_event *  event_wait_list,
>                           cl_event *        event)
>   {
> -  return cl_mem_unmap_auto(memobj);
> +  cl_int err = CL_SUCCESS;
> +  int i;
> +  size_t mapped_size = 0;
> +  void * v_ptr = NULL;
> +
> +  CHECK_QUEUE(command_queue);
> +  CHECK_MEM(memobj);
> +  if (command_queue->ctx != memobj->ctx) {
> +    err = CL_INVALID_CONTEXT;
> +    goto error;
> +  }
> +
> +  assert(memobj->mapped_ptr_sz >= memobj->map_ref);
> +  INVALID_VALUE_IF(!mapped_ptr);
> +  for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> +    if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
> +      memobj->mapped_ptr[i].ptr = NULL;
> +      mapped_size = memobj->mapped_ptr[i].size;
> +      v_ptr = memobj->mapped_ptr[i].v_ptr;
> +      memobj->mapped_ptr[i].size = 0;
> +      memobj->mapped_ptr[i].v_ptr = NULL;
> +      memobj->map_ref--;
> +      break;
> +    }
> +  }
> +  /* can not find a mapped address? */
> +  INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
> +
> +  if (memobj->flags & CL_MEM_USE_HOST_PTR) {
> +    assert(mapped_ptr >= memobj->host_ptr &&
> +      mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
> +    /* Sync the data. */
> +    memcpy(v_ptr, mapped_ptr, mapped_size);
> +  } else {
> +    assert(v_ptr == mapped_ptr);
> +  }
> +
> +  cl_mem_unmap_auto(memobj);
> +
> +  /* shrink the mapped slot. */
> +  if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> +    int j = 0;
> +    cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> +	sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
> +    if (!new_ptr) {
> +      /* Just do nothing. */
> +      goto error;
> +    }
> +    memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
> +
> +    for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> +      if (memobj->mapped_ptr[i].ptr) {
> +        new_ptr[j] = memobj->mapped_ptr[i];
> +        j++;
> +        assert(j < memobj->mapped_ptr_sz/2);
> +      }
> +    }
> +    memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
> +    free(memobj->mapped_ptr);
> +    memobj->mapped_ptr = new_ptr;
> +  }
> +
> +error:
> +  return err;
>   }
>   
>   cl_int
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 064ecb3..9691ba3 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -157,10 +157,6 @@ cl_mem_allocate(cl_context ctx,
>     cl_ulong max_mem_size;
>   
>     assert(ctx);
> -  FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
> -            "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
> -  FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
> -            "CL_MEM_USE_HOST_PTR unsupported");   /* XXX */
>   
>     if ((err = cl_get_device_info(ctx->device,
>                                   CL_DEVICE_MAX_MEM_ALLOC_SIZE,
> @@ -223,11 +219,35 @@ cl_mem_new(cl_context ctx,
>              void *data,
>              cl_int *errcode_ret)
>   {
> +  /* Possible mem type combination:
> +       CL_MEM_ALLOC_HOST_PTR
> +       CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
> +       CL_MEM_USE_HOST_PTR
> +       CL_MEM_COPY_HOST_PTR   */
> +
>     cl_int err = CL_SUCCESS;
>     cl_mem mem = NULL;
>   
> -  /* Check flags consistency */
> -  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
> +  /* This flag is valid only if host_ptr is not NULL */
> +  if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
> +                flags & CL_MEM_USE_HOST_PTR) &&
> +                data == NULL)) {
> +    err = CL_INVALID_HOST_PTR;
> +    goto error;
> +  }
> +
> +  /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
> +     are mutually exclusive. */
> +  if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
> +               flags & CL_MEM_USE_HOST_PTR)) {
> +    err = CL_INVALID_HOST_PTR;
> +    goto error;
> +  }
> +
> +  /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
> +     are mutually exclusive. */
> +  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
> +               flags & CL_MEM_USE_HOST_PTR)) {
>       err = CL_INVALID_HOST_PTR;
>       goto error;
>     }
> @@ -238,9 +258,12 @@ cl_mem_new(cl_context ctx,
>       goto error;
>   
>     /* Copy the data if required */
> -  if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
> +  if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
>       cl_buffer_subdata(mem->bo, 0, sz, data);
>   
> +  if (flags & CL_MEM_USE_HOST_PTR)
> +    mem->host_ptr = data;
> +
>   exit:
>     if (errcode_ret)
>       *errcode_ret = err;
> @@ -469,6 +492,12 @@ cl_mem_delete(cl_mem mem)
>     pthread_mutex_unlock(&mem->ctx->buffer_lock);
>     cl_context_delete(mem->ctx);
>   
> +  /* Someone still mapped? */
> +  assert(!mem->map_ref);
> +
> +  if (mem->mapped_ptr)
> +    free(mem->mapped_ptr);
> +
>     cl_free(mem);
>   }
>   
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index c63bf6c..66518a6 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
>     CL_TILE_Y  = 2
>   } cl_image_tiling_t;
>   
> +typedef struct _cl_mapped_ptr {
> +  void * ptr;
> +  void * v_ptr;
> +  size_t size;
> +}cl_mapped_ptr;
> +
>   /* Used for buffers and images */
>   struct _cl_mem {
>     DEFINE_ICD(dispatch)
> @@ -68,6 +74,10 @@ struct _cl_mem {
>     uint32_t intel_fmt;       /* format to provide in the surface state */
>     uint32_t bpp;             /* number of bytes per pixel */
>     cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
> +  void * host_ptr;          /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
> +  cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
> +  int mapped_ptr_sz;        /* The array size of mapped_ptr. */
> +  int map_ref;              /* The mapped count. */
>   };
>   
>   /* Query information about a memory object */





More information about the Beignet mailing list