[Beignet] [PATCH 2/3] Implement the clCreateSubBuffer API
junyan.he at inbox.com
junyan.he at inbox.com
Mon Nov 4 20:16:03 PST 2013
From: Junyan He <junyan.he at linux.intel.com>
---
src/cl_api.c | 44 ++++++++++++++----
src/cl_command_queue.c | 7 ++-
src/cl_driver.h | 2 +-
src/cl_enqueue.c | 29 ++++++++----
src/cl_mem.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++--
src/cl_mem.h | 10 +++-
6 files changed, 189 insertions(+), 23 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 70a8d09..acafd09 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -482,13 +482,17 @@ clCreateSubBuffer(cl_mem buffer,
const void * buffer_create_info,
cl_int * errcode_ret)
{
-#if 0
+ cl_mem mem = NULL;
cl_int err = CL_SUCCESS;
- CHECK_MEM (buffer);
- NOT_IMPLEMENTED;
+
+ CHECK_MEM(buffer);
+
+ mem = cl_mem_new_sub_buffer(buffer, flags, buffer_create_type,
+ buffer_create_info, &err);
error:
-#endif
- return NULL;
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
}
cl_mem
@@ -1575,7 +1579,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
err = CL_INVALID_VALUE;
goto error;
}
- if (dst_offset < 0 || dst_offset + cb > src_buffer->size) {
+ if (dst_offset < 0 || dst_offset + cb > dst_buffer->size) {
err = CL_INVALID_VALUE;
goto error;
}
@@ -1588,7 +1592,22 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
goto error;
}
- // TODO: Need to check the sub buffer cases.
+ /* Check sub overlap */
+ if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type == CL_MEM_SUBBUFFER_TYPE ) {
+ struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer;
+ struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer;
+ size_t src_sub_offset = src_b->sub_offset;
+ size_t dst_sub_offset = dst_b->sub_offset;
+
+ if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset
+ && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1)
+ && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset
+ && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) {
+ err = CL_MEM_COPY_OVERLAP;
+ goto error;
+ }
+ }
+
err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb);
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx);
@@ -2047,15 +2066,22 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
{
cl_int slot = -1;
int err = CL_SUCCESS;
+ size_t sub_offset = 0;
+
+ if(mem->type == CL_MEM_SUBBUFFER_TYPE) {
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ sub_offset = buffer->sub_offset;
+ }
+
if (!(*ptr = cl_mem_map_gtt_unsync(mem))) {
err = CL_MAP_FAILURE;
goto error;
}
- *ptr = (char*)(*ptr) + offset;
+ *ptr = (char*)(*ptr) + offset + sub_offset;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
//only calc ptr here, will do memcpy in enqueue
- *mem_ptr = mem->host_ptr + offset;
+ *mem_ptr = mem->host_ptr + offset + sub_offset;
} else {
*mem_ptr = *ptr;
}
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 768ef0c..3f9d95c 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -145,7 +145,12 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem)
continue;
offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
- cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
+ if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
+ cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, cc_llc_l3);
+ } else {
+ cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, 0, cc_llc_l3);
+ }
}
return CL_SUCCESS;
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 0e9b487..640bce7 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -99,7 +99,7 @@ typedef cl_gpgpu (cl_gpgpu_sync_cb)(cl_gpgpu);
extern cl_gpgpu_sync_cb *cl_gpgpu_sync;
/* Bind a regular unformatted buffer */
-typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint);
+typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, uint32_t cchint);
extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
/* bind samplers defined in both kernel and kernel args. */
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 0330691..070fd98 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -30,14 +30,18 @@
cl_int cl_enqueue_read_buffer(enqueue_data* data)
{
cl_int err = CL_SUCCESS;
+ cl_mem mem = data->mem_obj;
+ assert(mem->type == CL_MEM_BUFFER_TYPE ||
+ mem->type == CL_MEM_SUBBUFFER_TYPE);
void* src_ptr;
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
err = CL_MAP_FAILURE;
goto error;
}
- memcpy(data->ptr, (char*)src_ptr + data->offset, data->size);
+ memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset, data->size);
err = cl_mem_unmap_auto(data->mem_obj);
@@ -95,6 +99,10 @@ error:
cl_int cl_enqueue_write_buffer(enqueue_data *data)
{
cl_int err = CL_SUCCESS;
+ cl_mem mem = data->mem_obj;
+ assert(mem->type == CL_MEM_BUFFER_TYPE ||
+ mem->type == CL_MEM_SUBBUFFER_TYPE);
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
void* dst_ptr;
if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
@@ -102,7 +110,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
goto error;
}
- memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size);
+ memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size);
err = cl_mem_unmap_auto(data->mem_obj);
@@ -231,19 +239,23 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
{
void *ptr = NULL;
cl_int err = CL_SUCCESS;
- cl_mem buffer = data->mem_obj;
+ cl_mem mem = data->mem_obj;
+ assert(mem->type == CL_MEM_BUFFER_TYPE ||
+ mem->type == CL_MEM_SUBBUFFER_TYPE);
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+
//because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- if (!(ptr = cl_mem_map_gtt(buffer))) {
+ if (!(ptr = cl_mem_map_gtt(mem))) {
err = CL_MAP_FAILURE;
goto error;
}
- ptr = (char*)ptr + data->offset;
+ ptr = (char*)ptr + data->offset + buffer->sub_offset;
assert(data->ptr == ptr);
- if(buffer->flags & CL_MEM_USE_HOST_PTR) {
- assert(buffer->host_ptr);
- memcpy(buffer->host_ptr + data->offset, ptr, data->size);
+ if(mem->flags & CL_MEM_USE_HOST_PTR) {
+ assert(mem->host_ptr);
+ memcpy(mem->host_ptr + data->offset, ptr, data->size);
}
error:
@@ -386,6 +398,7 @@ cl_int cl_enqueue_handle(enqueue_data* data)
case EnqueueUnmapMemObject:
return cl_enqueue_unmap_mem_object(data);
case EnqueueCopyBufferRect:
+ case EnqueueCopyBuffer:
case EnqueueCopyImage:
case EnqueueCopyBufferToImage:
case EnqueueCopyImageToBuffer:
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b4c709d..00ef30d 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -52,7 +52,9 @@ static cl_mem_object_type
cl_get_mem_object_type(cl_mem mem)
{
switch (mem->type) {
- case CL_MEM_BUFFER_TYPE: return CL_MEM_OBJECT_BUFFER;
+ case CL_MEM_BUFFER_TYPE:
+ case CL_MEM_SUBBUFFER_TYPE:
+ return CL_MEM_OBJECT_BUFFER;
case CL_MEM_IMAGE_TYPE:
case CL_MEM_GL_IMAGE_TYPE:
{
@@ -329,6 +331,102 @@ error:
goto exit;
}
+LOCAL cl_mem
+cl_mem_new_sub_buffer(cl_mem buffer,
+ cl_mem_flags flags,
+ cl_buffer_create_type create_type,
+ const void *create_info,
+ cl_int *errcode_ret)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+ struct _cl_mem_buffer *sub_buf = NULL;
+
+ if (buffer->type != CL_MEM_BUFFER_TYPE) {
+ err = CL_INVALID_MEM_OBJECT;
+ goto error;
+ }
+
+ if (flags && (((buffer->flags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY)))
+ || ((buffer->flags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY)))
+ || (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)))) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (create_type != CL_BUFFER_CREATE_TYPE_REGION) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (!create_info) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ cl_buffer_region *info = (cl_buffer_region *)create_info;
+
+ if (!info->size) {
+ err = CL_INVALID_BUFFER_SIZE;
+ goto error;
+ }
+
+ if (info->origin > buffer->size || info->origin + info->size > buffer->size) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (info->origin & (buffer->ctx->device->mem_base_addr_align - 1)) {
+ err = CL_MISALIGNED_SUB_BUFFER_OFFSET;
+ goto error;
+ }
+
+ /* Now create the sub buffer and link it to the buffer. */
+ TRY_ALLOC (sub_buf, CALLOC(struct _cl_mem_buffer));
+ mem = &sub_buf->base;
+ mem->type = CL_MEM_SUBBUFFER_TYPE;
+ SET_ICD(mem->dispatch)
+ mem->ref_n = 1;
+ mem->magic = CL_MAGIC_MEM_HEADER;
+ mem->flags = flags;
+ sub_buf->parent = (struct _cl_mem_buffer*)buffer;
+
+ cl_mem_add_ref(buffer);
+ /* Append the buffer in the parent buffer list */
+ pthread_mutex_lock(&((struct _cl_mem_buffer*)buffer)->sub_lock);
+ sub_buf->sub_next = ((struct _cl_mem_buffer*)buffer)->subs;
+ if (((struct _cl_mem_buffer*)buffer)->subs != NULL)
+ ((struct _cl_mem_buffer*)buffer)->subs->sub_prev = sub_buf;
+ ((struct _cl_mem_buffer*)buffer)->subs = sub_buf;
+ pthread_mutex_unlock(&((struct _cl_mem_buffer*)buffer)->sub_lock);
+
+ mem->bo = buffer->bo;
+ mem->size = info->size;
+ sub_buf->sub_offset = info->origin;
+ if (buffer->flags & CL_MEM_USE_HOST_PTR || buffer->flags & CL_MEM_COPY_HOST_PTR) {
+ mem->host_ptr = buffer->host_ptr;
+ }
+
+ cl_context_add_ref(buffer->ctx);
+ mem->ctx = buffer->ctx;
+ /* Append the buffer in the context buffer list */
+ pthread_mutex_lock(&buffer->ctx->buffer_lock);
+ mem->next = buffer->ctx->buffers;
+ if (buffer->ctx->buffers != NULL)
+ buffer->ctx->buffers->prev = mem;
+ buffer->ctx->buffers = mem;
+ pthread_mutex_unlock(&buffer->ctx->buffer_lock);
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
+error:
+ cl_mem_delete(mem);
+ mem = NULL;
+ goto exit;
+}
+
void
cl_mem_copy_image_region(const size_t *origin, const size_t *region,
void *dst, size_t dst_row_pitch, size_t dst_slice_pitch,
@@ -546,8 +644,6 @@ cl_mem_delete(cl_mem mem)
cl_mem_gl_delete(cl_mem_gl_image(mem));
}
#endif
- if (LIKELY(mem->bo != NULL))
- cl_buffer_unreference(mem->bo);
/* Remove it from the list */
assert(mem->ctx);
@@ -586,6 +682,24 @@ cl_mem_delete(cl_mem mem)
}
}
+ /* Iff we are sub, do nothing for bo release. */
+ if (mem->type == CL_MEM_SUBBUFFER_TYPE) {
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ /* Remove it from the parent's list */
+ assert(buffer->parent);
+ pthread_mutex_lock(&buffer->parent->sub_lock);
+ if (buffer->sub_prev)
+ buffer->sub_prev->sub_next = buffer->sub_next;
+ if (buffer->sub_next)
+ buffer->sub_next->sub_prev = buffer->sub_prev;
+ if (buffer->parent->subs == buffer)
+ buffer->parent->subs = buffer->sub_next;
+ pthread_mutex_unlock(&buffer->parent->sub_lock);
+ cl_mem_delete((cl_mem )(buffer->parent));
+ } else if (LIKELY(mem->bo != NULL)) {
+ cl_buffer_unreference(mem->bo);
+ }
+
cl_free(mem);
}
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 75d5cf4..e325fa1 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -66,6 +66,7 @@ typedef struct _cl_mem_dstr_cb {
/* Used for buffers and images */
enum cl_mem_type {
CL_MEM_BUFFER_TYPE,
+ CL_MEM_SUBBUFFER_TYPE,
CL_MEM_IMAGE_TYPE,
CL_MEM_GL_IMAGE_TYPE,
};
@@ -137,7 +138,11 @@ cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h,
struct _cl_mem_buffer {
_cl_mem base;
- size_t offset;
+ struct _cl_mem_buffer* subs; /* Sub buf objects. */
+ size_t sub_offset; /* The sub start offset. */
+ struct _cl_mem_buffer* sub_prev, *sub_next;/* We chain the sub memory buffers together */
+ pthread_mutex_t sub_lock; /* Sub buffers list lock*/
+ struct _cl_mem_buffer* parent; /* Point to the parent buffer if is sub-buffer */
};
inline static struct _cl_mem_image *
@@ -170,6 +175,9 @@ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *)
/* Create a new memory object and initialize it with possible user data */
extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*);
+/* Create a new sub memory object */
+extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *);
+
/* Idem but this is an image */
extern cl_mem
cl_mem_new_image(cl_context context,
--
1.7.9.5
More information about the Beignet
mailing list