[Beignet] [PATCH 1/3] Implement the clEnqueueCopyBuffer API
junyan.he at inbox.com
junyan.he at inbox.com
Mon Aug 5 03:40:23 PDT 2013
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
src/cl_api.c | 41 +++++++++++++++++++++++++++++++++++--
src/cl_context.c | 14 +++++++++++++
src/cl_context.h | 2 ++
src/cl_mem.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/cl_mem.h | 4 ++++
5 files changed, 118 insertions(+), 2 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 146c010..f0f7a42 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1220,8 +1220,45 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
const cl_event * event_wait_list,
cl_event * event)
{
- NOT_IMPLEMENTED;
- return 0;
+ cl_int err = CL_SUCCESS;
+
+ CHECK_QUEUE(command_queue);
+ CHECK_MEM(src_buffer);
+ CHECK_MEM(dst_buffer);
+
+ if (command_queue->ctx != src_buffer->ctx) {
+ err = CL_INVALID_CONTEXT;
+ goto error;
+ }
+
+ if (command_queue->ctx != dst_buffer->ctx) {
+ err = CL_INVALID_CONTEXT;
+ goto error;
+ }
+
+ if (src_offset < 0 || src_offset + cb > src_buffer->size) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (dst_offset < 0 || dst_offset + cb > src_buffer->size) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ /* Check overlap */
+ if (src_buffer == dst_buffer
+ && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1)
+ && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) {
+ err = CL_MEM_COPY_OVERLAP;
+ goto error;
+ }
+
+ // TODO: Need to check the sub buffer cases.
+
+ err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb);
+
+error:
+ return err;
}
cl_int
diff --git a/src/cl_context.c b/src/cl_context.c
index a48436c..e7e2908 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -21,6 +21,8 @@
#include "cl_device_id.h"
#include "cl_context.h"
#include "cl_command_queue.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
#include "cl_mem.h"
#include "cl_alloc.h"
#include "cl_utils.h"
@@ -189,6 +191,18 @@ cl_context_delete(cl_context ctx)
if (atomic_dec(&ctx->ref_n) > 1)
return;
+ /* delete the copy kernel program. */
+ if (ctx->cpy_ker) {
+ assert(ctx->cpy_ker->ref_n == 1);
+ cl_kernel_delete(ctx->cpy_ker);
+ ctx->cpy_ker = NULL;
+ }
+ if (ctx->cpy_prg) {
+ assert(ctx->cpy_prg->ref_n == 1);
+ cl_program_delete(ctx->cpy_prg);
+ ctx->cpy_prg = NULL;
+ }
+
/* All object lists should have been freed. Otherwise, the reference counter
* of the context cannot be 0
*/
diff --git a/src/cl_context.h b/src/cl_context.h
index 80bf777..429a54d 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -60,6 +60,8 @@ struct _cl_context {
cl_device_id device; /* All information about the GPU device */
cl_command_queue queues; /* All command queues currently allocated */
cl_program programs; /* All programs currently allocated */
+ cl_program cpy_prg; /* The programs that are used internal for memcpy */
+ cl_kernel cpy_ker; /* The kernel that are used internal for memcpy */
cl_mem buffers; /* All memory object currently allocated */
cl_sampler samplers; /* All sampler object currently allocated */
pthread_mutex_t queue_lock; /* To allocate and deallocate queues */
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f794ce7..3021aa1 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -20,6 +20,9 @@
#include "cl_mem.h"
#include "cl_image.h"
#include "cl_context.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
+#include "cl_command_queue.h"
#include "cl_utils.h"
#include "cl_alloc.h"
#include "cl_device_id.h"
@@ -529,6 +532,62 @@ cl_mem_add_ref(cl_mem mem)
atomic_inc(&mem->ref_n);
}
+LOCAL cl_int
+cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+ size_t src_offset, size_t dst_offset, size_t cb)
+{
+ cl_int ret;
+ cl_kernel ker;
+ size_t global_off[] = {0,0,0};
+ size_t global_sz[] = {1,1,1};
+ size_t local_sz[] = {1,1,1};
+ global_sz[0] = cb;
+
+ static const char *cl_cpy_kernel_str =
+ "kernel void __cl_cpy_region ( \n"
+ " global char* src, unsigned int src_offset, \n"
+ " global char* dst, unsigned int dst_offset) { \n"
+ " int i = get_global_id(0); \n"
+ " dst[i+dst_offset] = src[i+src_offset]; \n"
+ " dst[i+dst_offset+1] = src[i+src_offset+1]; \n"
+ " dst[i+dst_offset+1] = src[i+src_offset+1]; \n"
+ " dst[i+dst_offset+1] = src[i+src_offset+1]; \n"
+ "}";
+
+ /* We use one kernel to copy the data. The kernel is lazily created. */
+ assert(src_buf->ctx == dst_buf->ctx);
+ if (!src_buf->ctx->cpy_prg) {
+ size_t length = strlen(cl_cpy_kernel_str) + 1;
+ src_buf->ctx->cpy_prg = cl_program_create_from_source(src_buf->ctx, 1, &cl_cpy_kernel_str, &length, NULL);
+
+ if (!src_buf->ctx->cpy_prg)
+ return CL_OUT_OF_RESOURCES;
+
+ ret = cl_program_build(src_buf->ctx->cpy_prg, NULL);
+ if (ret != CL_SUCCESS)
+ return CL_OUT_OF_RESOURCES;
+
+ src_buf->ctx->cpy_prg->is_built = 1;
+
+ src_buf->ctx->cpy_ker = cl_kernel_dup(src_buf->ctx->cpy_prg->ker[0]);
+ }
+
+ /* setup the kernel and run. */
+ ker = src_buf->ctx->cpy_ker;
+ if (!ker)
+ return CL_OUT_OF_RESOURCES;
+
+ cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf);
+ cl_kernel_set_arg(ker, 1, sizeof(int), &src_offset);
+ cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf);
+ cl_kernel_set_arg(ker, 3, sizeof(int), &dst_offset);
+
+ ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ cl_command_queue_finish(queue);
+
+ return ret;
+}
+
LOCAL void*
cl_mem_map(cl_mem mem)
{
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1b1709a..fa33bae 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -114,6 +114,10 @@ extern void cl_mem_gl_delete(cl_mem);
/* Add one more reference to this object */
extern void cl_mem_add_ref(cl_mem);
+/* copy the buffer */
+extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+ size_t src_offset, size_t dst_offset, size_t cb);
+
/* Directly map a memory object */
extern void *cl_mem_map(cl_mem);
--
1.7.9.5
More information about the Beignet
mailing list