[Beignet] [V2 PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API
junyan.he at inbox.com
junyan.he at inbox.com
Thu Jul 11 23:31:14 PDT 2013
From: Junyan He <junyan.he at linux.intel.com>
In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
CL_MEM_USE_HOST_PTR flag support.
CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
1> The host_ptr specified in clCreateBuffer is guaranteed to
contain the latest bits in the region being mapped when the
clEnqueueMapBuffer command has completed.
2> The pointer value returned by clEnqueueMapBuffer will be
derived from the host_ptr specified when the buffer object is created.
We improve the clEnqueueMapBuffer to setup a map for the mapped
address and do the data sync problem based on the address when
mapped and unmapped.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
src/cl_api.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
src/cl_mem.c | 43 ++++++++++++++++----
src/cl_mem.h | 10 +++++
3 files changed, 169 insertions(+), 9 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 277f7a9..20cbc1e 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1470,7 +1470,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
cl_int * errcode_ret)
{
void *ptr = NULL;
+ void *mem_ptr = NULL;
cl_int err = CL_SUCCESS;
+ int slot = -1;
CHECK_QUEUE(command_queue);
CHECK_MEM(buffer);
@@ -1503,10 +1505,66 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
ptr = (char*)ptr + offset;
+ if(buffer->flags & CL_MEM_USE_HOST_PTR) {
+ assert(buffer->host_ptr);
+ memcpy(buffer->host_ptr + offset, ptr, size);
+ mem_ptr = buffer->host_ptr + offset;
+ } else {
+ mem_ptr = ptr;
+ }
+
+ /* Record the mapped address. */
+ if (!buffer->mapped_ptr_sz) {
+ buffer->mapped_ptr_sz = 16;
+ buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
+ if (!buffer->mapped_ptr) {
+ cl_mem_unmap_auto (buffer);
+ err = CL_OUT_OF_HOST_MEMORY;
+ ptr = NULL;
+ goto error;
+ }
+
+ memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ slot = 0;
+ } else {
+ int i = 0;
+ for (; i < buffer->mapped_ptr_sz; i++) {
+ if (buffer->mapped_ptr[i].ptr == NULL) {
+ slot = i;
+ break;
+ }
+ }
+
+ if (i == buffer->mapped_ptr_sz) {
+ cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
+ if (!new_ptr) {
+ cl_mem_unmap_auto (buffer);
+ err = CL_OUT_OF_HOST_MEMORY;
+ ptr = NULL;
+ goto error;
+ }
+ memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ memcpy(new_ptr, buffer->mapped_ptr,
+ buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ slot = buffer->mapped_ptr_sz;
+ buffer->mapped_ptr_sz *= 2;
+ free(buffer->mapped_ptr);
+ buffer->mapped_ptr = new_ptr;
+ }
+ }
+
+ assert(slot != -1);
+ buffer->mapped_ptr[slot].ptr = mem_ptr;
+ buffer->mapped_ptr[slot].v_ptr = ptr;
+ buffer->mapped_ptr[slot].size = size;
+ buffer->map_ref++;
+
error:
if (errcode_ret)
*errcode_ret = err;
- return ptr;
+ return mem_ptr;
}
void *
@@ -1581,7 +1639,70 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
const cl_event * event_wait_list,
cl_event * event)
{
- return cl_mem_unmap_auto(memobj);
+ cl_int err = CL_SUCCESS;
+ int i;
+ size_t mapped_size = 0;
+ void * v_ptr = NULL;
+
+ CHECK_QUEUE(command_queue);
+ CHECK_MEM(memobj);
+ if (command_queue->ctx != memobj->ctx) {
+ err = CL_INVALID_CONTEXT;
+ goto error;
+ }
+
+ assert(memobj->mapped_ptr_sz >= memobj->map_ref);
+ INVALID_VALUE_IF(!mapped_ptr);
+ for (i = 0; i < memobj->mapped_ptr_sz; i++) {
+ if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
+ memobj->mapped_ptr[i].ptr = NULL;
+ mapped_size = memobj->mapped_ptr[i].size;
+ v_ptr = memobj->mapped_ptr[i].v_ptr;
+ memobj->mapped_ptr[i].size = 0;
+ memobj->mapped_ptr[i].v_ptr = NULL;
+ memobj->map_ref--;
+ break;
+ }
+ }
+ /* can not find a mapped address? */
+ INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
+
+ if (memobj->flags & CL_MEM_USE_HOST_PTR) {
+ assert(mapped_ptr >= memobj->host_ptr &&
+ mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
+ /* Sync the data. */
+ memcpy(v_ptr, mapped_ptr, mapped_size);
+ } else {
+ assert(v_ptr == mapped_ptr);
+ }
+
+ cl_mem_unmap_auto(memobj);
+
+ /* shrink the mapped slot. */
+ if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
+ int j = 0;
+ cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
+ if (!new_ptr) {
+ /* Just do nothing. */
+ goto error;
+ }
+ memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
+
+ for (i = 0; i < memobj->mapped_ptr_sz; i++) {
+ if (memobj->mapped_ptr[i].ptr) {
+ new_ptr[j] = memobj->mapped_ptr[i];
+ j++;
+ assert(j < memobj->mapped_ptr_sz/2);
+ }
+ }
+ memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
+ free(memobj->mapped_ptr);
+ memobj->mapped_ptr = new_ptr;
+ }
+
+error:
+ return err;
}
cl_int
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 064ecb3..9691ba3 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -157,10 +157,6 @@ cl_mem_allocate(cl_context ctx,
cl_ulong max_mem_size;
assert(ctx);
- FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
- "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
- FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
- "CL_MEM_USE_HOST_PTR unsupported"); /* XXX */
if ((err = cl_get_device_info(ctx->device,
CL_DEVICE_MAX_MEM_ALLOC_SIZE,
@@ -223,11 +219,35 @@ cl_mem_new(cl_context ctx,
void *data,
cl_int *errcode_ret)
{
+ /* Possible mem type combination:
+ CL_MEM_ALLOC_HOST_PTR
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
+ CL_MEM_USE_HOST_PTR
+ CL_MEM_COPY_HOST_PTR */
+
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
- /* Check flags consistency */
- if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
+ /* This flag is valid only if host_ptr is not NULL */
+ if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
+ flags & CL_MEM_USE_HOST_PTR) &&
+ data == NULL)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+
+ /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
+ are mutually exclusive. */
+ if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
+ flags & CL_MEM_USE_HOST_PTR)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+
+ /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
+ are mutually exclusive. */
+ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
+ flags & CL_MEM_USE_HOST_PTR)) {
err = CL_INVALID_HOST_PTR;
goto error;
}
@@ -238,9 +258,12 @@ cl_mem_new(cl_context ctx,
goto error;
/* Copy the data if required */
- if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
+ if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
cl_buffer_subdata(mem->bo, 0, sz, data);
+ if (flags & CL_MEM_USE_HOST_PTR)
+ mem->host_ptr = data;
+
exit:
if (errcode_ret)
*errcode_ret = err;
@@ -469,6 +492,12 @@ cl_mem_delete(cl_mem mem)
pthread_mutex_unlock(&mem->ctx->buffer_lock);
cl_context_delete(mem->ctx);
+ /* Someone still mapped? */
+ assert(!mem->map_ref);
+
+ if (mem->mapped_ptr)
+ free(mem->mapped_ptr);
+
cl_free(mem);
}
diff --git a/src/cl_mem.h b/src/cl_mem.h
index c63bf6c..66518a6 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
CL_TILE_Y = 2
} cl_image_tiling_t;
+typedef struct _cl_mapped_ptr {
+ void * ptr;
+ void * v_ptr;
+ size_t size;
+}cl_mapped_ptr;
+
/* Used for buffers and images */
struct _cl_mem {
DEFINE_ICD(dispatch)
@@ -68,6 +74,10 @@ struct _cl_mem {
uint32_t intel_fmt; /* format to provide in the surface state */
uint32_t bpp; /* number of bytes per pixel */
cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
+ void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
+ cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
+ int mapped_ptr_sz; /* The array size of mapped_ptr. */
+ int map_ref; /* The mapped count. */
};
/* Query information about a memory object */
--
1.7.9.5
More information about the Beignet
mailing list