[Beignet] [PATCH 2/3] Implement api clEnqueueCopyImageToBuffer.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Sep 12 22:16:19 PDT 2013


Just found one typo, should set color.x to buffer rather than color.
I will fix it and no need to send new version.

will push it latter. Thanks.

On Mon, Sep 09, 2013 at 04:10:22PM +0800, Yang Rong wrote:
> Also fix the function cl_mem_kernel_copy_image 3D image error.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_api.c     |  45 +++++++++++++++++++++++--
>  src/cl_context.h |   8 +++--
>  src/cl_enqueue.c |   1 +
>  src/cl_mem.c     | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  src/cl_mem.h     |   6 ++++
>  5 files changed, 150 insertions(+), 10 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index f014b41..ecc2f43 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1869,8 +1869,49 @@ clEnqueueCopyImageToBuffer(cl_command_queue  command_queue,
>                             const cl_event *  event_wait_list,
>                             cl_event *        event)
>  {
> -  NOT_IMPLEMENTED;
> -  return 0;
> +  cl_int err = CL_SUCCESS;
> +  enqueue_data *data, no_wait_data = { 0 };
> +
> +  CHECK_QUEUE(command_queue);
> +  CHECK_IMAGE(src_mem, src_image);
> +  CHECK_MEM(dst_buffer);
> +  if (command_queue->ctx != src_mem->ctx ||
> +      command_queue->ctx != dst_buffer->ctx) {
> +    err = CL_INVALID_CONTEXT;
> +    goto error;
> +  }
> +
> +  if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp > dst_buffer->size) {
> +    err = CL_INVALID_VALUE;
> +    goto error;
> +  }
> +
> +  if (!src_origin || !region || src_origin[0] + region[0] > src_image->w ||
> +      src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) {
> +    err = CL_INVALID_VALUE;
> +    goto error;
> +  }
> +
> +  if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) {
> +    err = CL_INVALID_VALUE;
> +    goto error;
> +  }
> +
> +  cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer, src_origin, dst_offset, region);
> +
> +  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx);
> +
> +  data = &no_wait_data;
> +  data->type = EnqueueCopyImageToBuffer;
> +  data->queue = command_queue;
> +
> +  if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> +                   event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
> +    err = cl_command_queue_flush(command_queue);
> +  }
> +
> +error:
> +  return err;
>  }
>  
>  cl_int
> diff --git a/src/cl_context.h b/src/cl_context.h
> index 0342ef4..7016733 100644
> --- a/src/cl_context.h
> +++ b/src/cl_context.h
> @@ -46,9 +46,11 @@ enum _cl_internal_ker_type {
>    CL_ENQUEUE_COPY_IMAGE_1 = 3,             //copy image 3d to image 2d
>    CL_ENQUEUE_COPY_IMAGE_2 = 4,             //copy image 2d to image 3d
>    CL_ENQUEUE_COPY_IMAGE_3 = 5,             //copy image 3d to image 3d
> -  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER = 6,
> -  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE = 7,
> -  CL_INTERNAL_KERNEL_MAX = 8
> +  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0 = 6,   //copy image 2d to buffer
> +  CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1 = 7,   //copy image 3d tobuffer
> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0 = 8,   //copy buffer to image 2d
> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1 = 9,   //copy buffer to image 3d
> +  CL_INTERNAL_KERNEL_MAX = 10
>  };
>  
>  struct _cl_context_prop {
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index 3c069fe..ef1a33a 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -400,6 +400,7 @@ cl_int cl_enqueue_handle(enqueue_data* data)
>        return cl_enqueue_unmap_mem_object(data);
>      case EnqueueCopyBufferRect:
>      case EnqueueCopyImage:
> +    case EnqueueCopyBufferToImage:
>      case EnqueueNDRangeKernel:
>        cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
>        return CL_SUCCESS;
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 203f47e..7290370 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -629,7 +629,6 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
>    cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_slice_pitch);
>  
>    ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
> -  cl_command_queue_finish(queue);
>  
>    return ret;
>  }
> @@ -663,14 +662,14 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
>    static const char *str_kernel =
>        "#ifdef SRC_IMAGE_3D \n"
>        "  #define SRC_IMAGE_TYPE image3d_t \n"
> -      "  #define SRC_COORD_TYPE int3 \n"
> +      "  #define SRC_COORD_TYPE int4 \n"
>        "#else \n"
>        "  #define SRC_IMAGE_TYPE image2d_t \n"
>        "  #define SRC_COORD_TYPE int2 \n"
>        "#endif \n"
>        "#ifdef DST_IMAGE_3D \n"
>        "  #define DST_IMAGE_TYPE image3d_t \n"
> -      "  #define DST_COORD_TYPE int3 \n"
> +      "  #define DST_COORD_TYPE int4 \n"
>        "#else \n"
>        "  #define DST_IMAGE_TYPE image2d_t \n"
>        "  #define DST_COORD_TYPE int2 \n"
> @@ -703,7 +702,6 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
>        "  write_imagei(dst_image, src_coord, color); \n"
>        "}";
>  
> -
>    /* We use one kernel to copy the data. The kernel is lazily created. */
>    assert(src_image->base.ctx == dst_image->base.ctx);
>  
> @@ -725,11 +723,103 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
>    cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]);
>  
>    ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
> -  cl_command_queue_finish(queue);
>  
>    return ret;
>  }
>  
> +LOCAL cl_int
> +cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, cl_mem buffer,
> +                         const size_t *src_origin, const size_t dst_offset, const size_t *region) {
> +  cl_int ret;
> +  cl_kernel ker;
> +  size_t global_off[] = {0,0,0};
> +  size_t global_sz[] = {1,1,1};
> +  size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
> +  cl_int index = CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0;
> +  char option[40] = "";
> +  uint32_t intel_fmt, bpp;
> +  cl_image_format fmt;
> +  size_t origin0, region0;
> +
> +  if(region[1] == 1) local_sz[1] = 1;
> +  if(region[2] == 1) local_sz[2] = 1;
> +  global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
> +  global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
> +  global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
> +
> +  if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> +    strcat(option, "-D IMAGE_3D");
> +    index += 1;
> +  }
> +
> +  static const char *str_kernel =
> +      "#ifdef IMAGE_3D \n"
> +      "  #define IMAGE_TYPE image3d_t \n"
> +      "  #define COORD_TYPE int4 \n"
> +      "#else \n"
> +      "  #define IMAGE_TYPE image2d_t \n"
> +      "  #define COORD_TYPE int2 \n"
> +      "#endif \n"
> +      "kernel void __cl_copy_image_to_buffer ( \n"
> +      "       __read_only IMAGE_TYPE image, global uchar* buffer, \n"
> +      "       unsigned int region0, unsigned int region1, unsigned int region2, \n"
> +      "       unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n"
> +      "       unsigned int dst_offset) { \n"
> +      "  int i = get_global_id(0); \n"
> +      "  int j = get_global_id(1); \n"
> +      "  int k = get_global_id(2); \n"
> +      "  uint4 color; \n"
> +      "  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n"
> +      "  COORD_TYPE src_coord; \n"
> +      "  if((i >= region0) || (j>= region1) || (k>=region2)) \n"
> +      "    return; \n"
> +      "  src_coord.x = src_origin0 + i; \n"
> +      "  src_coord.y = src_origin1 + j; \n"
> +      "#ifdef IMAGE_3D \n"
> +      "  src_coord.z = src_origin2 + k; \n"
> +      "#endif \n"
> +      "  color = read_imageui(image, sampler, src_coord); \n"
> +      "  dst_offset += (k * region1 + j) * region0 + i; \n"
> +      "  buffer[dst_offset] = color; \n"
> +      "}";
> +
> +  /* We use one kernel to copy the data. The kernel is lazily created. */
> +  assert(image->base.ctx == buffer->ctx);
> +
> +  fmt.image_channel_order = CL_R;
> +  fmt.image_channel_data_type = CL_UNSIGNED_INT8;
> +  intel_fmt = image->intel_fmt;
> +  bpp = image->bpp;
> +  image->intel_fmt = cl_image_get_intel_format(&fmt);
> +  image->w = image->w * image->bpp;
> +  image->bpp = 1;
> +  region0 = region[0] * bpp;
> +  origin0 = src_origin[0] * bpp;
> +  global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
> +
> +  /* setup the kernel and run. */
> +  ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option);
> +  if (!ker)
> +    return CL_OUT_OF_RESOURCES;
> +
> +  cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image);
> +  cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer);
> +  cl_kernel_set_arg(ker, 2, sizeof(cl_int), &region0);
> +  cl_kernel_set_arg(ker, 3, sizeof(cl_int), &region[1]);
> +  cl_kernel_set_arg(ker, 4, sizeof(cl_int), &region[2]);
> +  cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0);
> +  cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]);
> +  cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]);
> +  cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_offset);
> +
> +  ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
> +
> +  image->intel_fmt = intel_fmt;
> +  image->bpp = bpp;
> +  image->w = image->w / bpp;
> +
> +  return ret;
> +}
>  LOCAL void*
>  cl_mem_map(cl_mem mem)
>  {
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 530fe79..0a8c723 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -194,6 +194,12 @@ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem,
>  /* api clEnqueueCopyImage help function */
>  extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*,
>                                        const size_t *, const size_t *, const size_t *);
> +
> +
> +/* api clEnqueueCopyImage help function */
> +extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct _cl_mem_image*, cl_mem,
> +                                          const size_t *, const size_t, const size_t *);
> +
>  /* Directly map a memory object */
>  extern void *cl_mem_map(cl_mem);
>  
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list