[Beignet] [V2 PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API
He Junyan
junyan.he at inbox.com
Thu Jul 11 23:33:07 PDT 2013
Some small mistake in rebase.
I am improving the enqueue read and enqueue write API
and will send a unit test for all the enqueue mem related APIs
On 07/12/2013 02:31 PM, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
> CL_MEM_USE_HOST_PTR flag support.
> CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
> CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
> 1> The host_ptr specified in clCreateBuffer is guaranteed to
> contain the latest bits in the region being mapped when the
> clEnqueueMapBuffer command has completed.
> 2> The pointer value returned by clEnqueueMapBuffer will be
> derived from the host_ptr specified when the buffer object is created.
>
> We improve the clEnqueueMapBuffer to setup a map for the mapped
> address and do the data sync problem based on the address when
> mapped and unmapped.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> src/cl_api.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> src/cl_mem.c | 43 ++++++++++++++++----
> src/cl_mem.h | 10 +++++
> 3 files changed, 169 insertions(+), 9 deletions(-)
>
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 277f7a9..20cbc1e 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1470,7 +1470,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> cl_int * errcode_ret)
> {
> void *ptr = NULL;
> + void *mem_ptr = NULL;
> cl_int err = CL_SUCCESS;
> + int slot = -1;
>
> CHECK_QUEUE(command_queue);
> CHECK_MEM(buffer);
> @@ -1503,10 +1505,66 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
>
> ptr = (char*)ptr + offset;
>
> + if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> + assert(buffer->host_ptr);
> + memcpy(buffer->host_ptr + offset, ptr, size);
> + mem_ptr = buffer->host_ptr + offset;
> + } else {
> + mem_ptr = ptr;
> + }
> +
> + /* Record the mapped address. */
> + if (!buffer->mapped_ptr_sz) {
> + buffer->mapped_ptr_sz = 16;
> + buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> + if (!buffer->mapped_ptr) {
> + cl_mem_unmap_auto (buffer);
> + err = CL_OUT_OF_HOST_MEMORY;
> + ptr = NULL;
> + goto error;
> + }
> +
> + memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + slot = 0;
> + } else {
> + int i = 0;
> + for (; i < buffer->mapped_ptr_sz; i++) {
> + if (buffer->mapped_ptr[i].ptr == NULL) {
> + slot = i;
> + break;
> + }
> + }
> +
> + if (i == buffer->mapped_ptr_sz) {
> + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> + if (!new_ptr) {
> + cl_mem_unmap_auto (buffer);
> + err = CL_OUT_OF_HOST_MEMORY;
> + ptr = NULL;
> + goto error;
> + }
> + memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + memcpy(new_ptr, buffer->mapped_ptr,
> + buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + slot = buffer->mapped_ptr_sz;
> + buffer->mapped_ptr_sz *= 2;
> + free(buffer->mapped_ptr);
> + buffer->mapped_ptr = new_ptr;
> + }
> + }
> +
> + assert(slot != -1);
> + buffer->mapped_ptr[slot].ptr = mem_ptr;
> + buffer->mapped_ptr[slot].v_ptr = ptr;
> + buffer->mapped_ptr[slot].size = size;
> + buffer->map_ref++;
> +
> error:
> if (errcode_ret)
> *errcode_ret = err;
> - return ptr;
> + return mem_ptr;
> }
>
> void *
> @@ -1581,7 +1639,70 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
> const cl_event * event_wait_list,
> cl_event * event)
> {
> - return cl_mem_unmap_auto(memobj);
> + cl_int err = CL_SUCCESS;
> + int i;
> + size_t mapped_size = 0;
> + void * v_ptr = NULL;
> +
> + CHECK_QUEUE(command_queue);
> + CHECK_MEM(memobj);
> + if (command_queue->ctx != memobj->ctx) {
> + err = CL_INVALID_CONTEXT;
> + goto error;
> + }
> +
> + assert(memobj->mapped_ptr_sz >= memobj->map_ref);
> + INVALID_VALUE_IF(!mapped_ptr);
> + for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> + if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
> + memobj->mapped_ptr[i].ptr = NULL;
> + mapped_size = memobj->mapped_ptr[i].size;
> + v_ptr = memobj->mapped_ptr[i].v_ptr;
> + memobj->mapped_ptr[i].size = 0;
> + memobj->mapped_ptr[i].v_ptr = NULL;
> + memobj->map_ref--;
> + break;
> + }
> + }
> + /* can not find a mapped address? */
> + INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
> +
> + if (memobj->flags & CL_MEM_USE_HOST_PTR) {
> + assert(mapped_ptr >= memobj->host_ptr &&
> + mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
> + /* Sync the data. */
> + memcpy(v_ptr, mapped_ptr, mapped_size);
> + } else {
> + assert(v_ptr == mapped_ptr);
> + }
> +
> + cl_mem_unmap_auto(memobj);
> +
> + /* shrink the mapped slot. */
> + if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> + int j = 0;
> + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
> + if (!new_ptr) {
> + /* Just do nothing. */
> + goto error;
> + }
> + memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
> +
> + for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> + if (memobj->mapped_ptr[i].ptr) {
> + new_ptr[j] = memobj->mapped_ptr[i];
> + j++;
> + assert(j < memobj->mapped_ptr_sz/2);
> + }
> + }
> + memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
> + free(memobj->mapped_ptr);
> + memobj->mapped_ptr = new_ptr;
> + }
> +
> +error:
> + return err;
> }
>
> cl_int
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 064ecb3..9691ba3 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -157,10 +157,6 @@ cl_mem_allocate(cl_context ctx,
> cl_ulong max_mem_size;
>
> assert(ctx);
> - FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
> - "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
> - FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
> - "CL_MEM_USE_HOST_PTR unsupported"); /* XXX */
>
> if ((err = cl_get_device_info(ctx->device,
> CL_DEVICE_MAX_MEM_ALLOC_SIZE,
> @@ -223,11 +219,35 @@ cl_mem_new(cl_context ctx,
> void *data,
> cl_int *errcode_ret)
> {
> + /* Possible mem type combination:
> + CL_MEM_ALLOC_HOST_PTR
> + CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
> + CL_MEM_USE_HOST_PTR
> + CL_MEM_COPY_HOST_PTR */
> +
> cl_int err = CL_SUCCESS;
> cl_mem mem = NULL;
>
> - /* Check flags consistency */
> - if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
> + /* This flag is valid only if host_ptr is not NULL */
> + if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
> + flags & CL_MEM_USE_HOST_PTR) &&
> + data == NULL)) {
> + err = CL_INVALID_HOST_PTR;
> + goto error;
> + }
> +
> + /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
> + are mutually exclusive. */
> + if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
> + flags & CL_MEM_USE_HOST_PTR)) {
> + err = CL_INVALID_HOST_PTR;
> + goto error;
> + }
> +
> + /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
> + are mutually exclusive. */
> + if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
> + flags & CL_MEM_USE_HOST_PTR)) {
> err = CL_INVALID_HOST_PTR;
> goto error;
> }
> @@ -238,9 +258,12 @@ cl_mem_new(cl_context ctx,
> goto error;
>
> /* Copy the data if required */
> - if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
> + if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
> cl_buffer_subdata(mem->bo, 0, sz, data);
>
> + if (flags & CL_MEM_USE_HOST_PTR)
> + mem->host_ptr = data;
> +
> exit:
> if (errcode_ret)
> *errcode_ret = err;
> @@ -469,6 +492,12 @@ cl_mem_delete(cl_mem mem)
> pthread_mutex_unlock(&mem->ctx->buffer_lock);
> cl_context_delete(mem->ctx);
>
> + /* Someone still mapped? */
> + assert(!mem->map_ref);
> +
> + if (mem->mapped_ptr)
> + free(mem->mapped_ptr);
> +
> cl_free(mem);
> }
>
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index c63bf6c..66518a6 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
> CL_TILE_Y = 2
> } cl_image_tiling_t;
>
> +typedef struct _cl_mapped_ptr {
> + void * ptr;
> + void * v_ptr;
> + size_t size;
> +}cl_mapped_ptr;
> +
> /* Used for buffers and images */
> struct _cl_mem {
> DEFINE_ICD(dispatch)
> @@ -68,6 +74,10 @@ struct _cl_mem {
> uint32_t intel_fmt; /* format to provide in the surface state */
> uint32_t bpp; /* number of bytes per pixel */
> cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
> + void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
> + cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
> + int mapped_ptr_sz; /* The array size of mapped_ptr. */
> + int map_ref; /* The mapped count. */
> };
>
> /* Query information about a memory object */
More information about the Beignet
mailing list