[Beignet] [PATCH 1/3] Implement the clEnqueueCopyBuffer API

junyan.he at inbox.com junyan.he at inbox.com
Mon Aug 5 03:40:23 PDT 2013


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 src/cl_api.c     |   41 +++++++++++++++++++++++++++++++++++--
 src/cl_context.c |   14 +++++++++++++
 src/cl_context.h |    2 ++
 src/cl_mem.c     |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/cl_mem.h     |    4 ++++
 5 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 146c010..f0f7a42 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1220,8 +1220,45 @@ clEnqueueCopyBuffer(cl_command_queue     command_queue,
                     const cl_event *     event_wait_list,
                     cl_event *           event)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_QUEUE(command_queue);
+  CHECK_MEM(src_buffer);
+  CHECK_MEM(dst_buffer);
+
+  if (command_queue->ctx != src_buffer->ctx) {
+    err = CL_INVALID_CONTEXT;
+    goto error;
+  }
+
+  if (command_queue->ctx != dst_buffer->ctx) {
+    err = CL_INVALID_CONTEXT;
+    goto error;
+  }
+
+  if (src_offset < 0 || src_offset + cb > src_buffer->size) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+  if (dst_offset < 0 || dst_offset + cb > src_buffer->size) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  /* Check overlap */
+  if (src_buffer == dst_buffer
+	  && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1)
+	  && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) {
+    err = CL_MEM_COPY_OVERLAP;
+    goto error;
+  }
+
+  // TODO: Need to check the sub buffer cases.
+
+  err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb);
+
+error:
+  return err;
 }
 
 cl_int
diff --git a/src/cl_context.c b/src/cl_context.c
index a48436c..e7e2908 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -21,6 +21,8 @@
 #include "cl_device_id.h"
 #include "cl_context.h"
 #include "cl_command_queue.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
 #include "cl_mem.h"
 #include "cl_alloc.h"
 #include "cl_utils.h"
@@ -189,6 +191,18 @@ cl_context_delete(cl_context ctx)
   if (atomic_dec(&ctx->ref_n) > 1)
     return;
 
+  /* delete the copy kernel program. */
+  if (ctx->cpy_ker) {
+    assert(ctx->cpy_ker->ref_n == 1);
+    cl_kernel_delete(ctx->cpy_ker);
+    ctx->cpy_ker = NULL;
+  }
+  if (ctx->cpy_prg) {
+    assert(ctx->cpy_prg->ref_n == 1);
+    cl_program_delete(ctx->cpy_prg);
+    ctx->cpy_prg = NULL;
+  }
+
   /* All object lists should have been freed. Otherwise, the reference counter
    * of the context cannot be 0
    */
diff --git a/src/cl_context.h b/src/cl_context.h
index 80bf777..429a54d 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -60,6 +60,8 @@ struct _cl_context {
   cl_device_id device;              /* All information about the GPU device */
   cl_command_queue queues;          /* All command queues currently allocated */
   cl_program programs;              /* All programs currently allocated */
+  cl_program cpy_prg;               /* The programs that are used internal for memcpy */
+  cl_kernel cpy_ker;                /* The kernel that are used internal for memcpy */
   cl_mem buffers;                   /* All memory object currently allocated */
   cl_sampler samplers;              /* All sampler object currently allocated */
   pthread_mutex_t queue_lock;       /* To allocate and deallocate queues */
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f794ce7..3021aa1 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -20,6 +20,9 @@
 #include "cl_mem.h"
 #include "cl_image.h"
 #include "cl_context.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
+#include "cl_command_queue.h"
 #include "cl_utils.h"
 #include "cl_alloc.h"
 #include "cl_device_id.h"
@@ -529,6 +532,62 @@ cl_mem_add_ref(cl_mem mem)
   atomic_inc(&mem->ref_n);
 }
 
+LOCAL cl_int
+cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+            size_t src_offset, size_t dst_offset, size_t cb)
+{
+  cl_int ret;
+  cl_kernel ker;
+  size_t global_off[] = {0,0,0};
+  size_t global_sz[] = {1,1,1};
+  size_t local_sz[] = {1,1,1};
+  global_sz[0] = cb;
+
+  static const char *cl_cpy_kernel_str =
+      "kernel void __cl_cpy_region ( \n"
+      "       global char* src, unsigned int src_offset, \n"
+      "       global char* dst, unsigned int dst_offset) { \n"
+      "  int i = get_global_id(0); \n"
+      "  dst[i+dst_offset] = src[i+src_offset]; \n"
+      "  dst[i+dst_offset+1] = src[i+src_offset+1]; \n"
+      "  dst[i+dst_offset+1] = src[i+src_offset+1]; \n"
+      "  dst[i+dst_offset+1] = src[i+src_offset+1]; \n"
+      "}";
+
+  /* We use one kernel to copy the data. The kernel is lazily created. */
+  assert(src_buf->ctx == dst_buf->ctx);
+  if (!src_buf->ctx->cpy_prg) {
+    size_t length = strlen(cl_cpy_kernel_str) + 1;
+    src_buf->ctx->cpy_prg = cl_program_create_from_source(src_buf->ctx, 1, &cl_cpy_kernel_str, &length, NULL);
+
+    if (!src_buf->ctx->cpy_prg)
+      return CL_OUT_OF_RESOURCES;
+
+    ret = cl_program_build(src_buf->ctx->cpy_prg, NULL);
+    if (ret != CL_SUCCESS)
+      return CL_OUT_OF_RESOURCES;
+
+    src_buf->ctx->cpy_prg->is_built = 1;
+
+    src_buf->ctx->cpy_ker = cl_kernel_dup(src_buf->ctx->cpy_prg->ker[0]);
+  }
+
+  /* setup the kernel and run. */
+  ker = src_buf->ctx->cpy_ker;
+  if (!ker)
+    return CL_OUT_OF_RESOURCES;
+
+  cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf);
+  cl_kernel_set_arg(ker, 1, sizeof(int), &src_offset);
+  cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf);
+  cl_kernel_set_arg(ker, 3, sizeof(int), &dst_offset);
+
+  ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+  cl_command_queue_finish(queue);
+
+  return ret;
+}
+
 LOCAL void*
 cl_mem_map(cl_mem mem)
 {
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1b1709a..fa33bae 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -114,6 +114,10 @@ extern void cl_mem_gl_delete(cl_mem);
 /* Add one more reference to this object */
 extern void cl_mem_add_ref(cl_mem);
 
+/* copy the buffer */
+extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+            size_t src_offset, size_t dst_offset, size_t cb);
+
 /* Directly map a memory object */
 extern void *cl_mem_map(cl_mem);
 
-- 
1.7.9.5



More information about the Beignet mailing list