[Beignet] [PATCH 2/2] Add api clEnqueueCopyImage.

Yang Rong rong.r.yang at intel.com
Thu Sep 5 19:37:33 PDT 2013


Also do some mirror changes:
1. Add a image var name to macro CHECK_IMAGE.
2. Fix local size error in cl_mem_copy_buffer_rect.
3. Fix cl_enqueue_write_image typo.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_api.c     |  75 +++++++++++++++++++++++++++++----
 src/cl_context.c |  25 +++++++++++
 src/cl_context.h |   9 ++--
 src/cl_enqueue.c |   9 ++--
 src/cl_mem.c     | 123 +++++++++++++++++++++++++++++++++++++++++++++----------
 src/cl_mem.h     |   6 ++-
 src/cl_utils.h   |   8 ++--
 7 files changed, 214 insertions(+), 41 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index fda5c11..6097886 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1634,7 +1634,7 @@ clEnqueueReadImage(cl_command_queue      command_queue,
   enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
-  CHECK_IMAGE(mem);
+  CHECK_IMAGE(mem, image);
   if (command_queue->ctx != mem->ctx) {
      err = CL_INVALID_CONTEXT;
      goto error;
@@ -1716,7 +1716,7 @@ clEnqueueWriteImage(cl_command_queue     command_queue,
   enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
-  CHECK_IMAGE(mem);
+  CHECK_IMAGE(mem, image);
   if (command_queue->ctx != mem->ctx) {
     err = CL_INVALID_CONTEXT;
     goto error;
@@ -1783,8 +1783,8 @@ error:
 
 cl_int
 clEnqueueCopyImage(cl_command_queue      command_queue,
-                   cl_mem                src_image,
-                   cl_mem                dst_image,
+                   cl_mem                src_mem,
+                   cl_mem                dst_mem,
                    const size_t *        src_origin,
                    const size_t *        dst_origin,
                    const size_t *        region,
@@ -1792,8 +1792,69 @@ clEnqueueCopyImage(cl_command_queue      command_queue,
                    const cl_event *      event_wait_list,
                    cl_event *            event)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  enqueue_data *data, no_wait_data = { 0 };
+  cl_bool overlap = CL_TRUE;
+  cl_int i = 0;
+
+  CHECK_QUEUE(command_queue);
+  CHECK_IMAGE(src_mem, src_image);
+  CHECK_IMAGE(dst_mem, dst_image);
+  if (command_queue->ctx != src_mem->ctx ||
+      command_queue->ctx != dst_mem->ctx) {
+    err = CL_INVALID_CONTEXT;
+    goto error;
+  }
+
+  if (src_image->fmt.image_channel_order != dst_image->fmt.image_channel_order ||
+      src_image->fmt.image_channel_data_type != dst_image->fmt.image_channel_data_type) {
+    err = CL_IMAGE_FORMAT_MISMATCH;
+    goto error;
+  }
+
+  if (!src_origin || !region || src_origin[0] + region[0] > src_image->w ||
+      src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w ||
+      dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) ||
+      (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1))) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  if (src_image == dst_image) {
+    for(i = 0; i < 3; i++)
+      overlap = overlap && (src_origin[i] < dst_origin[i] + region[i])
+                        && (dst_origin[i] < src_origin[i] + region[i]);
+    if(overlap == CL_TRUE) {
+      err = CL_MEM_COPY_OVERLAP;
+      goto error;
+    }
+  }
+
+  cl_mem_kernel_copy_image(command_queue, src_image, dst_image, src_origin, dst_origin, region);
+
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx);
+
+  data = &no_wait_data;
+  data->type = EnqueueCopyImage;
+  data->queue = command_queue;
+
+  if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
+                   event, data, CL_COMMAND_COPY_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
+    err = cl_command_queue_flush(command_queue);
+  }
+
+error:
+  return err;
 }
 
 cl_int
@@ -1977,7 +2038,7 @@ clEnqueueMapImage(cl_command_queue   command_queue,
   enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
-  CHECK_IMAGE(mem);
+  CHECK_IMAGE(mem, image);
   if (command_queue->ctx != mem->ctx) {
     err = CL_INVALID_CONTEXT;
     goto error;
diff --git a/src/cl_context.c b/src/cl_context.c
index 822fdf5..4f1c611 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -26,6 +26,8 @@
 #include "cl_utils.h"
 #include "cl_driver.h"
 #include "cl_khr_icd.h"
+#include "cl_kernel.h"
+#include "cl_program.h"
 
 #include "CL/cl.h"
 #include "CL/cl_gl.h"
@@ -243,3 +245,26 @@ cl_context_get_bufmgr(cl_context ctx)
   return cl_driver_get_bufmgr(ctx->drv);
 }
 
+cl_kernel
+cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kernel, const char * str_option)
+{
+  cl_int ret;
+  if (!ctx->internal_prgs[index])
+  {
+    size_t length = strlen(str_kernel) + 1;
+    ctx->internal_prgs[index] = cl_program_create_from_source(ctx, 1, &str_kernel, &length, NULL);
+
+    if (!ctx->internal_prgs[index])
+      return NULL;
+
+    ret = cl_program_build(ctx->internal_prgs[index], str_option);
+    if (ret != CL_SUCCESS)
+      return NULL;
+
+    ctx->internal_prgs[index]->is_built = 1;
+
+    ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]);
+  }
+
+  return ctx->internel_kernels[index];
+}
diff --git a/src/cl_context.h b/src/cl_context.h
index 8b63104..8d6e704 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -43,9 +43,9 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_BUFFER = 0,
   CL_ENQUEUE_COPY_BUFFER_RECT = 1,
   CL_ENQUEUE_COPY_IMAGE_0 = 2,             //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_1 = 3,             //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_2 = 4,             //copy image 2d to image 2d
-  CL_ENQUEUE_COPY_IMAGE_3 = 5,             //copy image 2d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_1 = 3,             //copy image 3d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2 = 4,             //copy image 2d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_3 = 5,             //copy image 3d to image 3d
   CL_ENQUEUE_COPY_IMAGE_TO_BUFFER = 6,
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE = 7,
   CL_INVALID = 8
@@ -127,5 +127,8 @@ extern cl_int cl_context_ND_kernel(cl_context,
 /* Used for allocation */
 extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx);
 
+/* Get the internal used kernel */
+extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option);
+
 #endif /* __CL_CONTEXT_H__ */
 
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 3446ac3..989b044 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -164,7 +164,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
   void* src_ptr;
 
   cl_mem mem = data->mem_obj;
-  CHECK_IMAGE(mem);
+  CHECK_IMAGE(mem, image);
   const size_t* origin = data->origin;
   const size_t* region = data->region;
 
@@ -209,7 +209,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
   void* dst_ptr;
 
   cl_mem mem = data->mem_obj;
-  CHECK_IMAGE(mem);
+  CHECK_IMAGE(mem, image);
   const size_t *origin = data->origin;
   const size_t *region = data->region;
 
@@ -224,7 +224,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
   if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch &&
       (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch)))
   {
-    memcpy(dst_ptr, data->ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
+    memcpy(dst_ptr, data->const_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
   }
   else {
     cl_uint y, z;
@@ -236,7 +236,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
         src += data->row_pitch;
         dst += image->row_pitch;
       }
-      data->ptr = (char*)data->ptr + data->slice_pitch;
+      data->const_ptr = (char*)data->const_ptr + data->slice_pitch;
       dst_ptr = (char*)dst_ptr + image->slice_pitch;
     }
   }
@@ -373,6 +373,7 @@ cl_int cl_enqueue_handle(enqueue_data* data)
     case EnqueueUnmapMemObject:
       return cl_enqueue_unmap_mem_object(data);
     case EnqueueCopyBufferRect:
+    case EnqueueCopyImage:
     case EnqueueNDRangeKernel:
       cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);   //goto default
     default:
diff --git a/src/cl_mem.c b/src/cl_mem.c
index fb6dc90..064fbc4 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -25,7 +25,6 @@
 #include "cl_device_id.h"
 #include "cl_driver.h"
 #include "cl_khr_icd.h"
-#include "cl_program.h"
 #include "cl_kernel.h"
 #include "cl_command_queue.h"
 
@@ -110,7 +109,7 @@ cl_get_image_info(cl_mem mem,
                   size_t *param_value_size_ret)
 {
   int err;
-  CHECK_IMAGE(mem);
+  CHECK_IMAGE(mem, image);
 
   switch(param_name)
   {
@@ -553,7 +552,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
   cl_kernel ker;
   size_t global_off[] = {0,0,0};
   size_t global_sz[] = {1,1,1};
-  size_t local_sz[] = {LOCAL_SZ_2,LOCAL_SZ_1,LOCAL_SZ_0};
+  size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1};
   if(region[1] == 1) local_sz[1] = 1;
   if(region[2] == 1) local_sz[2] = 1;
   global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
@@ -563,7 +562,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
   cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0];
   cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0];
 
-  static const char *kernel_str =
+  static const char *str_kernel =
       "kernel void __cl_cpy_buffer_rect ( \n"
       "       global char* src, global char* dst, \n"
       "       unsigned int region0, unsigned int region1, unsigned int region2, \n"
@@ -583,25 +582,9 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
 
   /* We use one kernel to copy the data. The kernel is lazily created. */
   assert(src_buf->ctx == dst_buf->ctx);
-  if (!src_buf->ctx->internal_prgs[index])
-  {
-    size_t length = strlen(kernel_str) + 1;
-    src_buf->ctx->internal_prgs[index] = cl_program_create_from_source(src_buf->ctx, 1, &kernel_str, &length, NULL);
-
-    if (!src_buf->ctx->internal_prgs[index])
-      return CL_OUT_OF_RESOURCES;
-
-    ret = cl_program_build(src_buf->ctx->internal_prgs[index], NULL);
-    if (ret != CL_SUCCESS)
-      return CL_OUT_OF_RESOURCES;
-
-    src_buf->ctx->internal_prgs[index]->is_built = 1;
-
-    src_buf->ctx->internel_kernels[index] = cl_kernel_dup(src_buf->ctx->internal_prgs[index]->ker[0]);
-  }
 
   /* setup the kernel and run. */
-  ker = src_buf->ctx->internel_kernels[index];
+  ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL);
   if (!ker)
     return CL_OUT_OF_RESOURCES;
 
@@ -623,6 +606,102 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
   return ret;
 }
 
+LOCAL cl_int
+cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image, struct _cl_mem_image* dst_image,
+                         const size_t *src_origin, const size_t *dst_origin, const size_t *region) {
+  cl_int ret;
+  cl_kernel ker;
+  size_t global_off[] = {0,0,0};
+  size_t global_sz[] = {1,1,1};
+  size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
+  cl_int index = CL_ENQUEUE_COPY_IMAGE_0;
+  char option[40] = "";
+
+  if(region[1] == 1) local_sz[1] = 1;
+  if(region[2] == 1) local_sz[2] = 1;
+  global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
+  global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
+  global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
+
+  if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+    strcat(option, "-D SRC_IMAGE_3D");
+    index += 1;
+  }
+  if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+    strcat(option, " -D DST_IMAGE_3D");
+    index += 2;
+  }
+
+  static const char *str_kernel =
+      "#ifdef SRC_IMAGE_3D \n"
+      "  #define SRC_IMAGE_TYPE image3d_t \n"
+      "  #define SRC_COORD_TYPE int3 \n"
+      "#else \n"
+      "  #define SRC_IMAGE_TYPE image2d_t \n"
+      "  #define SRC_COORD_TYPE int2 \n"
+      "#endif \n"
+      "#ifdef DST_IMAGE_3D \n"
+      "  #define DST_IMAGE_TYPE image3d_t \n"
+      "  #define DST_COORD_TYPE int3 \n"
+      "#else \n"
+      "  #define DST_IMAGE_TYPE image2d_t \n"
+      "  #define DST_COORD_TYPE int2 \n"
+      "#endif \n"
+      "kernel void __cl_copy_image ( \n"
+      "       __read_only SRC_IMAGE_TYPE src_image, __write_only DST_IMAGE_TYPE dst_image, \n"
+      "       unsigned int region0, unsigned int region1, unsigned int region2, \n"
+      "       unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n"
+      "       unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { \n"
+      "  int i = get_global_id(0); \n"
+      "  int j = get_global_id(1); \n"
+      "  int k = get_global_id(2); \n"
+      "  int4 color; \n"
+      "  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n"
+      "  SRC_COORD_TYPE src_coord; \n"
+      "  DST_COORD_TYPE dst_coord; \n"
+      "  if((i >= region0) || (j>= region1) || (k>=region2)) \n"
+      "    return; \n"
+      "  src_coord.x = src_origin0 + i; \n"
+      "  src_coord.y = src_origin1 + j; \n"
+      "#ifdef SRC_IMAGE_3D \n"
+      "  src_coord.z = src_origin2 + k; \n"
+      "#endif \n"
+      "  dst_coord.x = dst_origin0 + i; \n"
+      "  dst_coord.y = dst_origin1 + j; \n"
+      "#ifdef SRC_IMAGE_3D \n"
+      "  dst_coord.z = dst_origin2 + k; \n"
+      "#endif \n"
+      "  color = read_imagei(src_image, sampler, src_coord); \n"
+      "  write_imagei(dst_image, src_coord, color); \n"
+      "}";
+
+
+  /* We use one kernel to copy the data. The kernel is lazily created. */
+  assert(src_image->base.ctx == dst_image->base.ctx);
+
+  /* setup the kernel and run. */
+  ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option);
+  if (!ker)
+    return CL_OUT_OF_RESOURCES;
+
+  cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image);
+  cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &dst_image);
+  cl_kernel_set_arg(ker, 2, sizeof(cl_int), &region[0]);
+  cl_kernel_set_arg(ker, 3, sizeof(cl_int), &region[1]);
+  cl_kernel_set_arg(ker, 4, sizeof(cl_int), &region[2]);
+  cl_kernel_set_arg(ker, 5, sizeof(cl_int), &src_origin[0]);
+  cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]);
+  cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]);
+  cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_origin[0]);
+  cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_origin[1]);
+  cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]);
+
+  ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+  cl_command_queue_finish(queue);
+
+  return ret;
+}
+
 LOCAL void*
 cl_mem_map(cl_mem mem)
 {
diff --git a/src/cl_mem.h b/src/cl_mem.h
index cf05252..0cb7fd7 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -166,10 +166,14 @@ extern void cl_mem_gl_delete(struct _cl_mem_image *);
 /* Add one more reference to this object */
 extern void cl_mem_add_ref(cl_mem);
 
-/* api clEnqueueCopy buffer rect help function */
+/* api clEnqueueCopyBufferRect help function */
 extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem,
                                      const size_t *, const size_t *, const size_t *,
                                      size_t, size_t, size_t, size_t);
+
+/* api clEnqueueCopyImage help function */
+extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*,
+                                      const size_t *, const size_t *, const size_t *);
 /* Directly map a memory object */
 extern void *cl_mem_map(cl_mem);
 
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 5c523b2..fa900a7 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -138,7 +138,7 @@ do {                                                        \
   }                                                         \
 } while (0)
 
-#define CHECK_IMAGE(MEM)                                    \
+#define CHECK_IMAGE(MEM, IMAGE)                             \
 CHECK_MEM(MEM);                                             \
 do {                                                        \
   if (UNLIKELY(!IS_IMAGE(MEM))) {                           \
@@ -146,13 +146,13 @@ do {                                                        \
     goto error;                                             \
   }                                                         \
 } while (0);                                                \
-struct _cl_mem_image *image;                                \
-image = cl_mem_image(MEM);                                  \
+struct _cl_mem_image *IMAGE;                                \
+IMAGE = cl_mem_image(MEM);                                  \
 
 #define CHECK_EVENT(EVENT)                                    \
   do {                                                        \
     if (UNLIKELY(EVENT == NULL)) {                            \
-      err = CL_INVALID_EVENT;                            \
+      err = CL_INVALID_EVENT;                                 \
       goto error;                                             \
     }                                                         \
     if (UNLIKELY(EVENT->magic != CL_MAGIC_EVENT_HEADER)) {    \
-- 
1.8.1.2



More information about the Beignet mailing list