[Beignet] [PATCH] Implement 1D/2D image array related cl_mem_kernel_copy_image in cl way instead of cpu way.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu Feb 5 21:37:07 PST 2015
The patch LGTM, will push latter, thanks.
On Fri, Feb 06, 2015 at 11:52:30AM +0800, Chuanbo Weng wrote:
> Before this patch, cl_mem_kernel_copy_image do cpu memory copy in order
> to copy image array objects. This is very slow for large image size.
> This patch implement image array copy in cl way, which dramatically
> accelerate image array related clEnqueueCopyImage.
> clCopyImage case in OpenCL conformance test will not be blocked anymore.
>
> Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
> ---
> src/CMakeLists.txt | 3 ++
> src/cl_context.h | 6 +++
> src/cl_mem.c | 43 ++++++++++++++++------
> .../cl_internal_copy_image_1d_array_to_1d_array.cl | 21 +++++++++++
> .../cl_internal_copy_image_2d_array_to_2d.cl | 21 +++++++++++
> .../cl_internal_copy_image_2d_array_to_2d_array.cl | 23 ++++++++++++
> .../cl_internal_copy_image_2d_array_to_3d.cl | 23 ++++++++++++
> .../cl_internal_copy_image_2d_to_2d_array.cl | 21 +++++++++++
> .../cl_internal_copy_image_3d_to_2d_array.cl | 23 ++++++++++++
> 9 files changed, 172 insertions(+), 12 deletions(-)
> create mode 100644 src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
> create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
> create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
> create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
> create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
> create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
>
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index a55f84d..939f58d 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -46,6 +46,9 @@ cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
> cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4
> cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d
> cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
> +cl_internal_copy_image_2d_to_2d_array cl_internal_copy_image_1d_array_to_1d_array
> +cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2d
> +cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array
> cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
> cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
> cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
> diff --git a/src/cl_context.h b/src/cl_context.h
> index 38ad2fd..2ea0a73 100644
> --- a/src/cl_context.h
> +++ b/src/cl_context.h
> @@ -53,6 +53,12 @@ enum _cl_internal_ker_type {
> CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
> CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
> CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
> + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d array
> + CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array
> + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array
> + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image 2d
> + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image 3d
> + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array
> CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer
> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 2ec89a4..2920bfe 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -1610,27 +1610,43 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
> ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
> cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
> } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> + extern char cl_internal_copy_image_2d_to_2d_array_str[];
> + extern size_t cl_internal_copy_image_2d_to_2d_array_str_size;
>
> - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> - return CL_SUCCESS;
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY,
> + cl_internal_copy_image_2d_to_2d_array_str, (size_t)cl_internal_copy_image_2d_to_2d_array_str_size, NULL);
> }
> } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> + extern char cl_internal_copy_image_1d_array_to_1d_array_str[];
> + extern size_t cl_internal_copy_image_1d_array_to_1d_array_str_size;
>
> - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> - return CL_SUCCESS;
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY,
> + cl_internal_copy_image_1d_array_to_1d_array_str,
> + (size_t)cl_internal_copy_image_1d_array_to_1d_array_str_size, NULL);
> }
> } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> + extern char cl_internal_copy_image_2d_array_to_2d_array_str[];
> + extern size_t cl_internal_copy_image_2d_array_to_2d_array_str_size;
>
> - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> - return CL_SUCCESS;
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY,
> + cl_internal_copy_image_2d_array_to_2d_array_str,
> + (size_t)cl_internal_copy_image_2d_array_to_2d_array_str_size, NULL);
> } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> - return CL_SUCCESS;
> + extern char cl_internal_copy_image_2d_array_to_2d_str[];
> + extern size_t cl_internal_copy_image_2d_array_to_2d_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D,
> + cl_internal_copy_image_2d_array_to_2d_str,
> + (size_t)cl_internal_copy_image_2d_array_to_2d_str_size, NULL);
> } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> - return CL_SUCCESS;
> + extern char cl_internal_copy_image_2d_array_to_3d_str[];
> + extern size_t cl_internal_copy_image_2d_array_to_3d_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D,
> + cl_internal_copy_image_2d_array_to_3d_str,
> + (size_t)cl_internal_copy_image_2d_array_to_3d_str_size, NULL);
> }
> } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> @@ -1646,8 +1662,11 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
> ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
> cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
> } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
> - return CL_SUCCESS;
> + extern char cl_internal_copy_image_3d_to_2d_array_str[];
> + extern size_t cl_internal_copy_image_3d_to_2d_array_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,
> + cl_internal_copy_image_3d_to_2d_array_str, (size_t)cl_internal_copy_image_3d_to_2d_array_str_size, NULL);
> }
> }
>
> diff --git a/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
> new file mode 100644
> index 0000000..0c7c6e2
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
> @@ -0,0 +1,21 @@
> +kernel void __cl_copy_image_1d_array_to_1d_array(__read_only image1d_array_t src_image, __write_only image1d_array_t dst_image,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> + int i = get_global_id(0);
> + int k = get_global_id(2);
> + int4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int2 src_coord;
> + int2 dst_coord;
> + if((i >= region0) || (k>=region2))
> + return;
> +
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin2 + k;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin2 + k;
> + color = read_imagei(src_image, sampler, src_coord);
> + write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
> new file mode 100644
> index 0000000..89e36c0
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
> @@ -0,0 +1,21 @@
> +kernel void __cl_copy_image_2d_array_to_2d(__read_only image2d_array_t src_image, __write_only image2d_t dst_image,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int4 src_coord;
> + int2 dst_coord;
> + if((i >= region0) || (j>= region1))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + src_coord.z = src_origin2;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + color = read_imagei(src_image, sampler, src_coord);
> + write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
> new file mode 100644
> index 0000000..3653660
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
> @@ -0,0 +1,23 @@
> +kernel void __cl_copy_image_2d_array_to_2d_array(__read_only image2d_array_t src_image, __write_only image2d_array_t dst_image,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int4 src_coord;
> + int4 dst_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + src_coord.z = src_origin2 + k;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + dst_coord.z = dst_origin2 + k;
> + color = read_imagei(src_image, sampler, src_coord);
> + write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
> new file mode 100644
> index 0000000..424f6b5
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
> @@ -0,0 +1,23 @@
> +kernel void __cl_copy_image_2d_array_to_3d(__read_only image2d_array_t src_image, __write_only image3d_t dst_image,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int4 src_coord;
> + int4 dst_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + src_coord.z = src_origin2 + k;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + dst_coord.z = dst_origin2 + k;
> + color = read_imagei(src_image, sampler, src_coord);
> + write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
> new file mode 100644
> index 0000000..4384f01
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
> @@ -0,0 +1,21 @@
> +kernel void __cl_copy_image_2d_to_2d_array(__read_only image2d_t src_image, __write_only image2d_array_t dst_image,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int2 src_coord;
> + int4 dst_coord;
> + if((i >= region0) || (j>= region1))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + dst_coord.z = dst_origin2;
> + color = read_imagei(src_image, sampler, src_coord);
> + write_imagei(dst_image, dst_coord, color);
> +}
> diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
> new file mode 100644
> index 0000000..8041a32
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
> @@ -0,0 +1,23 @@
> +kernel void __cl_copy_image_3d_to_2d_array(__read_only image3d_t src_image, __write_only image2d_array_t dst_image,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
> + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int4 src_coord;
> + int4 dst_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + src_coord.z = src_origin2 + k;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + dst_coord.z = dst_origin2 + k;
> + color = read_imagei(src_image, sampler, src_coord);
> + write_imagei(dst_image, dst_coord, color);
> +}
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list