[Beignet] [PATCH] Implement 1D/2D image array related cl_mem_kernel_copy_image in cl way instead of cpu way.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Feb 5 21:37:07 PST 2015


The patch LGTM, will push latter, thanks.


On Fri, Feb 06, 2015 at 11:52:30AM +0800, Chuanbo Weng wrote:
> Before this patch, cl_mem_kernel_copy_image do cpu memory copy in order
> to copy image array objects. This is very slow for large image size.
> This patch implement image array copy in cl way, which dramatically
> accelerate image array related clEnqueueCopyImage.
> clCopyImage case in OpenCL conformance test will not be blocked anymore.
> 
> Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
> ---
>  src/CMakeLists.txt                                 |  3 ++
>  src/cl_context.h                                   |  6 +++
>  src/cl_mem.c                                       | 43 ++++++++++++++++------
>  .../cl_internal_copy_image_1d_array_to_1d_array.cl | 21 +++++++++++
>  .../cl_internal_copy_image_2d_array_to_2d.cl       | 21 +++++++++++
>  .../cl_internal_copy_image_2d_array_to_2d_array.cl | 23 ++++++++++++
>  .../cl_internal_copy_image_2d_array_to_3d.cl       | 23 ++++++++++++
>  .../cl_internal_copy_image_2d_to_2d_array.cl       | 21 +++++++++++
>  .../cl_internal_copy_image_3d_to_2d_array.cl       | 23 ++++++++++++
>  9 files changed, 172 insertions(+), 12 deletions(-)
>  create mode 100644 src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
>  create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
>  create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
>  create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
>  create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
>  create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index a55f84d..939f58d 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -46,6 +46,9 @@ cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
>  cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4
>  cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d
>  cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
> +cl_internal_copy_image_2d_to_2d_array cl_internal_copy_image_1d_array_to_1d_array
> +cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2d
> +cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array
>  cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
>  cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
>  cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
> diff --git a/src/cl_context.h b/src/cl_context.h
> index 38ad2fd..2ea0a73 100644
> --- a/src/cl_context.h
> +++ b/src/cl_context.h
> @@ -53,6 +53,12 @@ enum _cl_internal_ker_type {
>    CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,             //copy image 3d to image 2d
>    CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,             //copy image 2d to image 3d
>    CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,             //copy image 3d to image 3d
> +  CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY,       //copy image 2d to image 2d array
> +  CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array
> +  CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array
> +  CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D,       //copy image 2d array to image 2d
> +  CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D,       //copy image 2d array to image 3d
> +  CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,       //copy image 3d to image 2d array
>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,   //copy image 2d to buffer
>    CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer
>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,   //copy buffer to image 2d
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 2ec89a4..2920bfe 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -1610,27 +1610,43 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
>        ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
>            cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
>      } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> +      extern char cl_internal_copy_image_2d_to_2d_array_str[];
> +      extern size_t cl_internal_copy_image_2d_to_2d_array_str_size;
>  
> -      cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> -      return CL_SUCCESS;
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY,
> +          cl_internal_copy_image_2d_to_2d_array_str, (size_t)cl_internal_copy_image_2d_to_2d_array_str_size, NULL);
>      }
>    } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
>      if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +      extern char cl_internal_copy_image_1d_array_to_1d_array_str[];
> +      extern size_t cl_internal_copy_image_1d_array_to_1d_array_str_size;
>  
> -      cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> -      return CL_SUCCESS;
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY,
> +          cl_internal_copy_image_1d_array_to_1d_array_str,
> +          (size_t)cl_internal_copy_image_1d_array_to_1d_array_str_size, NULL);
>      }
>    } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
>      if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> +      extern char cl_internal_copy_image_2d_array_to_2d_array_str[];
> +      extern size_t cl_internal_copy_image_2d_array_to_2d_array_str_size;
>  
> -      cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> -      return CL_SUCCESS;
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY,
> +          cl_internal_copy_image_2d_array_to_2d_array_str,
> +          (size_t)cl_internal_copy_image_2d_array_to_2d_array_str_size, NULL);
>      } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> -      cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> -      return CL_SUCCESS;
> +      extern char cl_internal_copy_image_2d_array_to_2d_str[];
> +      extern size_t cl_internal_copy_image_2d_array_to_2d_str_size;
> +
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D,
> +          cl_internal_copy_image_2d_array_to_2d_str,
> +          (size_t)cl_internal_copy_image_2d_array_to_2d_str_size, NULL);
>      } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> -      cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> -      return CL_SUCCESS;
> +      extern char cl_internal_copy_image_2d_array_to_3d_str[];
> +      extern size_t cl_internal_copy_image_2d_array_to_3d_str_size;
> +
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D,
> +          cl_internal_copy_image_2d_array_to_3d_str,
> +          (size_t)cl_internal_copy_image_2d_array_to_3d_str_size, NULL);
>      }
>    } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
>      if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> @@ -1646,8 +1662,11 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
>        ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
>            cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
>      } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> -      cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> -      return CL_SUCCESS;
> +      extern char cl_internal_copy_image_3d_to_2d_array_str[];
> +      extern size_t cl_internal_copy_image_3d_to_2d_array_str_size;
> +
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,
> +          cl_internal_copy_image_3d_to_2d_array_str, (size_t)cl_internal_copy_image_3d_to_2d_array_str_size, NULL);
>      }
>    }
>  
> diff --git a/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
> new file mode 100644
> index 0000000..0c7c6e2
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
> @@ -0,0 +1,21 @@
> +kernel void __cl_copy_image_1d_array_to_1d_array(__read_only image1d_array_t src_image, __write_only image1d_array_t dst_image,
> +                             unsigned int region0, unsigned int region1, unsigned int region2,
> +                             unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> +                             unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> +  int i = get_global_id(0);
> +  int k = get_global_id(2);
> +  int4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int2 src_coord;
> +  int2 dst_coord;
> +  if((i >= region0) || (k>=region2))
> +    return;
> +
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin2 + k;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin2 + k;
> +  color = read_imagei(src_image, sampler, src_coord);
> +  write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
> new file mode 100644
> index 0000000..89e36c0
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
> @@ -0,0 +1,21 @@
> +kernel void __cl_copy_image_2d_array_to_2d(__read_only image2d_array_t src_image, __write_only image2d_t dst_image,
> +                             unsigned int region0, unsigned int region1, unsigned int region2,
> +                             unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> +                             unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  int4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int4 src_coord;
> +  int2 dst_coord;
> +  if((i >= region0) || (j>= region1))
> +    return;
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin1 + j;
> +  src_coord.z = src_origin2;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin1 + j;
> +  color = read_imagei(src_image, sampler, src_coord);
> +  write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
> new file mode 100644
> index 0000000..3653660
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
> @@ -0,0 +1,23 @@
> +kernel void __cl_copy_image_2d_array_to_2d_array(__read_only image2d_array_t src_image, __write_only image2d_array_t dst_image,
> +                             unsigned int region0, unsigned int region1, unsigned int region2,
> +                             unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> +                             unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  int k = get_global_id(2);
> +  int4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int4 src_coord;
> +  int4 dst_coord;
> +  if((i >= region0) || (j>= region1) || (k>=region2))
> +    return;
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin1 + j;
> +  src_coord.z = src_origin2 + k;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin1 + j;
> +  dst_coord.z = dst_origin2 + k;
> +  color = read_imagei(src_image, sampler, src_coord);
> +  write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
> new file mode 100644
> index 0000000..424f6b5
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
> @@ -0,0 +1,23 @@
> +kernel void __cl_copy_image_2d_array_to_3d(__read_only image2d_array_t src_image, __write_only image3d_t dst_image,
> +                             unsigned int region0, unsigned int region1, unsigned int region2,
> +                             unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> +                             unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  int k = get_global_id(2);
> +  int4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int4 src_coord;
> +  int4 dst_coord;
> +  if((i >= region0) || (j>= region1) || (k>=region2))
> +    return;
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin1 + j;
> +  src_coord.z = src_origin2 + k;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin1 + j;
> +  dst_coord.z = dst_origin2 + k;
> +  color = read_imagei(src_image, sampler, src_coord);
> +  write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
> new file mode 100644
> index 0000000..4384f01
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
> @@ -0,0 +1,21 @@
> +kernel void __cl_copy_image_2d_to_2d_array(__read_only image2d_t src_image, __write_only image2d_array_t dst_image,
> +                                          unsigned int region0, unsigned int region1, unsigned int region2,
> +                                          unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> +                                          unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  int4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int2 src_coord;
> +  int4 dst_coord;
> +  if((i >= region0) || (j>= region1))
> +    return;
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin1 + j;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin1 + j;
> +  dst_coord.z = dst_origin2;
> +  color = read_imagei(src_image, sampler, src_coord);
> +  write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
> new file mode 100644
> index 0000000..8041a32
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
> @@ -0,0 +1,23 @@
> +kernel void __cl_copy_image_3d_to_2d_array(__read_only image3d_t src_image, __write_only image2d_array_t dst_image,
> +                             unsigned int region0, unsigned int region1, unsigned int region2,
> +                             unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> +                             unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  int k = get_global_id(2);
> +  int4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int4 src_coord;
> +  int4 dst_coord;
> +  if((i >= region0) || (j>= region1) || (k>=region2))
> +    return;
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin1 + j;
> +  src_coord.z = src_origin2 + k;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin1 + j;
> +  dst_coord.z = dst_origin2 + k;
> +  color = read_imagei(src_image, sampler, src_coord);
> +  write_imagei(dst_image, dst_coord, color);
> +}
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list