[Beignet] [PATCH 2/2] Use aligned16 and aligne4 kernel to copy for large 3D image with TILE_Y.
Yang, Rong R
rong.r.yang at intel.com
Wed Jun 14 07:36:49 UTC 2017
LGTM, except some format. I have run git clang-format by manual and pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> yan.wang at linux.intel.com
> Sent: Tuesday, June 13, 2017 16:32
> To: beignet at lists.freedesktop.org
> Cc: Yan Wang <yan.wang at linux.intel.com>
> Subject: [Beignet] [PATCH 2/2] Use aligned16 and aligne4 kernel to copy for
> large 3D image with TILE_Y.
>
> From: Yan Wang <yan.wang at linux.intel.com>
>
> It is similar with 2D image for avoiding extended image width truncated.
>
> Signed-off-by: Yan Wang <yan.wang at linux.intel.com>
> ---
> src/CMakeLists.txt | 2 +
> src/cl_context.h | 4 ++
> src/cl_mem.c | 46 +++++++++++++++++++---
> .../cl_internal_copy_buffer_to_image_3d_align16.cl | 19
> +++++++++ .../cl_internal_copy_buffer_to_image_3d_align4.cl | 19
> +++++++++ .../cl_internal_copy_image_3d_to_buffer_align16.cl | 20
> ++++++++++ .../cl_internal_copy_image_3d_to_buffer_align4.cl | 20
> ++++++++++
> 7 files changed, 125 insertions(+), 5 deletions(-) create mode 100644
> src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
> create mode 100644
> src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
> create mode 100644
> src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
> create mode 100644
> src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
>
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87ad48b..ecb98b9
> 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -54,6 +54,8 @@ cl_internal_copy_image_2d_array_to_3d
> cl_internal_copy_image_3d_to_2d_array
> cl_internal_copy_image_2d_to_buffer
> cl_internal_copy_image_2d_to_buffer_align16
> cl_internal_copy_image_3d_to_buffer
> cl_internal_copy_buffer_to_image_2d
> cl_internal_copy_buffer_to_image_2d_align16
> cl_internal_copy_buffer_to_image_3d
> cl_internal_copy_buffer_to_image_2d_align4
> cl_internal_copy_image_2d_to_buffer_align4
> +cl_internal_copy_buffer_to_image_3d_align4
> +cl_internal_copy_image_3d_to_buffer_align4
> +cl_internal_copy_buffer_to_image_3d_align16
> +cl_internal_copy_image_3d_to_buffer_align16
> cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
> cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
> cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git
> a/src/cl_context.h b/src/cl_context.h index 75bf895..b3a79bc 100644
> --- a/src/cl_context.h
> +++ b/src/cl_context.h
> @@ -64,10 +64,14 @@ enum _cl_internal_ker_type {
> CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
> CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
> + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,
> + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d
> + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,
> + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,
> CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern,
> pattern size=1
> CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern,
> pattern size=2
> CL_ENQUEUE_FILL_BUFFER_ALIGN4, //fill buffer with 4 aligne pattern,
> pattern size=4
> diff --git a/src/cl_mem.c b/src/cl_mem.c index b6dce3f..307db50 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -2162,13 +2162,13 @@ get_align_size_for_copy_kernel(struct
> _cl_mem_image* image, const size_t origin0
> const size_t offset, cl_image_format *fmt) {
> size_t align_size = 0;
>
> - if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w *
> image->bpp) % ALIGN16 == 0) &&
> + if(((image->w * image->bpp) % ALIGN16 == 0) &&
> ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0)
> && (offset % ALIGN16 == 0)){
> fmt->image_channel_order = CL_RGBA;
> fmt->image_channel_data_type = CL_UNSIGNED_INT32;
> align_size = ALIGN16;
> }
> - else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image-
> >w * image->bpp) % ALIGN4 == 0) &&
> + else if(((image->w * image->bpp) % ALIGN4 == 0) &&
> ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) &&
> (offset % ALIGN4 == 0)){
> fmt->image_channel_order = CL_R;
> fmt->image_channel_data_type = CL_UNSIGNED_INT32; @@ -2247,11
> +2247,29 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue,
> cl_event event, struct _cl_m
> cl_internal_copy_image_2d_to_buffer_str,
> (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);
> }
> }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> - extern char cl_internal_copy_image_3d_to_buffer_str[];
> - extern size_t cl_internal_copy_image_3d_to_buffer_str_size;
> + if(align_size == ALIGN16){
> + extern char cl_internal_copy_image_3d_to_buffer_align16_str[];
> + extern size_t
> + cl_internal_copy_image_3d_to_buffer_align16_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16,
> + cl_internal_copy_image_3d_to_buffer_align16_str,
> + (size_t)cl_internal_copy_image_3d_to_buffer_align16_str_size,
> NULL);
> + }
> + else if(align_size == ALIGN4){
> + extern char cl_internal_copy_image_3d_to_buffer_align4_str[];
> + extern size_t
> + cl_internal_copy_image_3d_to_buffer_align4_str_size;
>
> - ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4,
> + cl_internal_copy_image_3d_to_buffer_align4_str,
> + (size_t)cl_internal_copy_image_3d_to_buffer_align4_str_size,
> NULL);
> + }
> + else{
> + extern char cl_internal_copy_image_3d_to_buffer_str[];
> + extern size_t cl_internal_copy_image_3d_to_buffer_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
> cl_internal_copy_image_3d_to_buffer_str,
> (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
> + }
> }
>
> if (!ker) {
> @@ -2347,11 +2365,29 @@
> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event
> event, cl_mem buffe
> cl_internal_copy_buffer_to_image_2d_str,
> (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);
> }
> }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> + if(align_size == ALIGN16){
> + extern char cl_internal_copy_buffer_to_image_3d_align16_str[];
> + extern size_t
> + cl_internal_copy_buffer_to_image_3d_align16_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16,
> + cl_internal_copy_buffer_to_image_3d_align16_str,
> + (size_t)cl_internal_copy_buffer_to_image_3d_align16_str_size,
> NULL);
> + }
> + else if(align_size == ALIGN4){
> + extern char cl_internal_copy_buffer_to_image_3d_align4_str[];
> + extern size_t
> + cl_internal_copy_buffer_to_image_3d_align4_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4,
> + cl_internal_copy_buffer_to_image_3d_align4_str,
> + (size_t)cl_internal_copy_buffer_to_image_3d_align4_str_size,
> NULL);
> + }
> + else{
> extern char cl_internal_copy_buffer_to_image_3d_str[];
> extern size_t cl_internal_copy_buffer_to_image_3d_str_size;
>
> ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
> cl_internal_copy_buffer_to_image_3d_str,
> (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);
> + }
> }
> if (!ker)
> return CL_OUT_OF_RESOURCES;
> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
> b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
> new file mode 100644
> index 0000000..32f1f63
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl
> @@ -0,0 +1,19 @@
> +kernel void __cl_copy_buffer_to_image_3d_align16(__write_only
> image3d_t image, global uint4* buffer,
> + unsigned int region0, unsigned int region1, unsigned int
> region2,
> + unsigned int dst_origin0, unsigned int dst_origin1,
> unsigned int dst_origin2,
> + unsigned int src_offset) {
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + uint4 color = (uint4)(0);
> + int4 dst_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + dst_coord.z = dst_origin2 + k;
> + src_offset += (k * region1 + j) * region0 + i;
> + color = buffer[src_offset];
> + write_imageui(image, dst_coord, color); }
> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
> b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
> new file mode 100644
> index 0000000..2ccbcf1
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl
> @@ -0,0 +1,19 @@
> +kernel void __cl_copy_buffer_to_image_3d_align4(__write_only
> image3d_t image, global uint* buffer,
> + unsigned int region0, unsigned int region1, unsigned int
> region2,
> + unsigned int dst_origin0, unsigned int dst_origin1,
> unsigned int dst_origin2,
> + unsigned int src_offset) {
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + uint4 color = (uint4)(0);
> + int4 dst_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + dst_coord.x = dst_origin0 + i;
> + dst_coord.y = dst_origin1 + j;
> + dst_coord.z = dst_origin2 + k;
> + src_offset += (k * region1 + j) * region0 + i;
> + color.x = buffer[src_offset];
> + write_imageui(image, dst_coord, color); }
> diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
> b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
> new file mode 100644
> index 0000000..e116d47
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl
> @@ -0,0 +1,20 @@
> +kernel void __cl_copy_image_3d_to_buffer_align16 ( __read_only
> image3d_t image, global uint4* buffer,
> + unsigned int region0, unsigned int region1, unsigned int
> region2,
> + unsigned int src_origin0, unsigned int src_origin1,
> unsigned int src_origin2,
> + unsigned int dst_offset) {
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + uint4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int4 src_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + src_coord.z = src_origin2 + k;
> + color = read_imageui(image, sampler, src_coord);
> + dst_offset += (k * region1 + j) * region0 + i;
> + *(buffer + dst_offset) = color;
> +}
> diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
> b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
> new file mode 100644
> index 0000000..d5374c4
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl
> @@ -0,0 +1,20 @@
> +kernel void __cl_copy_image_3d_to_buffer_align4 ( __read_only
> image3d_t image, global uint* buffer,
> + unsigned int region0, unsigned int region1, unsigned int
> region2,
> + unsigned int src_origin0, unsigned int src_origin1,
> unsigned int src_origin2,
> + unsigned int dst_offset) {
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + uint4 color;
> + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> + int4 src_coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + src_coord.x = src_origin0 + i;
> + src_coord.y = src_origin1 + j;
> + src_coord.z = src_origin2 + k;
> + color = read_imageui(image, sampler, src_coord);
> + dst_offset += (k * region1 + j) * region0 + i;
> + buffer[dst_offset] = color.x;
> +}
> --
> 2.7.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list