[Beignet] [PATCH V2 3/4] Implement the clEnqueueCopyBuffer API using internal binary kernel

junyan.he at inbox.com junyan.he at inbox.com
Wed Oct 9 00:55:36 PDT 2013


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 src/cl_api.c     |   51 +++++++++++++++++++++++++++++++++++++++++++++-
 src/cl_context.c |   26 ++++++++++++++++++++++++
 src/cl_context.h |   28 ++++++++++++++++----------
 src/cl_mem.c     |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/cl_mem.h     |    4 ++++
 5 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index ded0e0c..449c7ca 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1521,8 +1521,57 @@ clEnqueueCopyBuffer(cl_command_queue     command_queue,
                     const cl_event *     event_wait_list,
                     cl_event *           event)
 {
-  NOT_IMPLEMENTED;
+  cl_int err = CL_SUCCESS;
+  enqueue_data *data, no_wait_data = { 0 };
+
+  CHECK_QUEUE(command_queue);
+  CHECK_MEM(src_buffer);
+  CHECK_MEM(dst_buffer);
+
+  if (command_queue->ctx != src_buffer->ctx) {
+    err = CL_INVALID_CONTEXT;
+    goto error;
+  }
+
+  if (command_queue->ctx != dst_buffer->ctx) {
+    err = CL_INVALID_CONTEXT;
+    goto error;
+  }
+
+  if (src_offset < 0 || src_offset + cb > src_buffer->size) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+  if (dst_offset < 0 || dst_offset + cb > src_buffer->size) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  /* Check overlap */
+  if (src_buffer == dst_buffer
+         && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1)
+         && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) {
+    err = CL_MEM_COPY_OVERLAP;
+    goto error;
+  }
+
+  // TODO: Need to check the sub buffer cases.
+  err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb);
+
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx);
+
+  data = &no_wait_data;
+  data->type = EnqueueCopyBuffer;
+  data->queue = command_queue;
+
+  if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
+                   event, data, CL_COMMAND_COPY_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
+    err = cl_command_queue_flush(command_queue);
+  }
   return 0;
+
+error:
+  return err;
 }
 
 cl_int
diff --git a/src/cl_context.c b/src/cl_context.c
index 4f1c611..b62e946 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -268,3 +268,29 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern
 
   return ctx->internel_kernels[index];
 }
+
+cl_kernel
+cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
+                  const char * str_kernel, size_t size, const char * str_option)
+{
+  cl_int ret;
+  cl_int binary_status = CL_SUCCESS;
+  if (!ctx->internal_prgs[index])
+  {
+    ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device,
+      &size, (const unsigned char **)&str_kernel, &binary_status, &ret);
+
+    if (!ctx->internal_prgs[index])
+      return NULL;
+
+    ret = cl_program_build(ctx->internal_prgs[index], str_option);
+    if (ret != CL_SUCCESS)
+      return NULL;
+
+    ctx->internal_prgs[index]->is_built = 1;
+
+    ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]);
+  }
+
+  return ctx->internel_kernels[index];
+}
diff --git a/src/cl_context.h b/src/cl_context.h
index 7016733..29bcb9f 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -40,17 +40,19 @@ enum _cl_gl_context_type {
 };
 
 enum _cl_internal_ker_type {
-  CL_ENQUEUE_COPY_BUFFER = 0,
-  CL_ENQUEUE_COPY_BUFFER_RECT = 1,
-  CL_ENQUEUE_COPY_IMAGE_0 = 2,             //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_1 = 3,             //copy image 3d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_2 = 4,             //copy image 2d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_3 = 5,             //copy image 3d to image 3d
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0 = 6,   //copy image 2d to buffer
-  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1 = 7,   //copy image 3d tobuffer
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0 = 8,   //copy buffer to image 2d
-  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1 = 9,   //copy buffer to image 3d
-  CL_INTERNAL_KERNEL_MAX = 10
+  CL_ENQUEUE_COPY_BUFFER_ALIGN1 = 0,
+  CL_ENQUEUE_COPY_BUFFER_ALIGN4,
+  CL_ENQUEUE_COPY_BUFFER_ALIGN16,
+  CL_ENQUEUE_COPY_BUFFER_RECT,
+  CL_ENQUEUE_COPY_IMAGE_0,             //copy image 2d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_1,             //copy image 3d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2,             //copy image 2d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_3,             //copy image 3d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0,   //copy image 2d to buffer
+  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1,   //copy image 3d tobuffer
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0,   //copy buffer to image 2d
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1,   //copy buffer to image 3d
+  CL_INTERNAL_KERNEL_MAX
 };
 
 struct _cl_context_prop {
@@ -137,5 +139,9 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx);
 /* Get the internal used kernel */
 extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option);
 
+/* Get the internal used kernel from binary*/
+extern cl_kernel cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
+                  const char * str_kernel, size_t size, const char * str_option);
+
 #endif /* __CL_CONTEXT_H__ */
 
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 68753f1..a2dd545 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -601,6 +601,65 @@ cl_mem_add_ref(cl_mem mem)
 #define LOCAL_SZ_2   4
 
 LOCAL cl_int
+cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+            size_t src_offset, size_t dst_offset, size_t cb)
+{
+  cl_int ret;
+  cl_kernel ker;
+  size_t global_off[] = {0,0,0};
+  size_t global_sz[] = {1,1,1};
+  size_t local_sz[] = {1,1,1};
+
+  /* We use one kernel to copy the data. The kernel is lazily created. */
+  assert(src_buf->ctx == dst_buf->ctx);
+
+  if ((cb % 4) || (src_offset % 4) || (dst_offset % 4)) {
+    extern char cl_internal_copy_buf_align1_str[];
+    extern int cl_internal_copy_buf_align1_str_size;
+
+    ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN1,
+             cl_internal_copy_buf_align1_str, (size_t)cl_internal_copy_buf_align1_str_size, NULL);
+  } else if ((cb % 16) || (src_offset % 16) || (dst_offset % 16)) {
+    extern char cl_internal_copy_buf_align4_str[];
+    extern int cl_internal_copy_buf_align4_str_size;
+
+    ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4,
+             cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL);
+    cb = cb/4;
+    src_offset = src_offset/4;
+    dst_offset = dst_offset/4;
+  } else {
+    extern char cl_internal_copy_buf_align16_str[];
+    extern int cl_internal_copy_buf_align16_str_size;
+
+    ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16,
+             cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL);
+    cb = cb/16;
+    src_offset = src_offset/4;
+    dst_offset = dst_offset/4;
+  }
+
+  if (!ker)
+    return CL_OUT_OF_RESOURCES;
+
+  if (cb < LOCAL_SZ_0) {
+    local_sz[0] = 1;
+  } else {
+    local_sz[0] = LOCAL_SZ_0;
+  }
+  global_sz[0] = cb;
+
+  cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf);
+  cl_kernel_set_arg(ker, 1, sizeof(int), &src_offset);
+  cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf);
+  cl_kernel_set_arg(ker, 3, sizeof(int), &dst_offset);
+
+  ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+
+  return ret;
+}
+
+LOCAL cl_int
 cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
                        const size_t *src_origin, const size_t *dst_origin, const size_t *region,
                        size_t src_row_pitch, size_t src_slice_pitch,
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 3d72ed3..77a92ef 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -188,6 +188,10 @@ extern void cl_mem_gl_delete(struct _cl_mem_gl_image *);
 /* Add one more reference to this object */
 extern void cl_mem_add_ref(cl_mem);
 
+/* api clEnqueueCopyBuffer help function */
+extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+              size_t src_offset, size_t dst_offset, size_t cb);
+
 /* api clEnqueueCopyBufferRect help function */
 extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem,
                                      const size_t *, const size_t *, const size_t *,
-- 
1.7.9.5



More information about the Beignet mailing list