[Beignet] [PATCH v2 2/2] Fix bug of clEnqueueCopyBufferToImage and clEnqueueCopyImageToBuffer.

Yang, Rong R rong.r.yang at intel.com
Thu May 25 07:33:15 UTC 2017


The patchset LGTM, pushed, thanks.

BYW: should also support align2 later.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> yan.wang at linux.intel.com
> Sent: Thursday, May 25, 2017 15:10
> To: beignet at lists.freedesktop.org
> Cc: Yan Wang <yan.wang at linux.intel.com>
> Subject: [Beignet] [PATCH v2 2/2] Fix bug of clEnqueueCopyBufferToImage
> and clEnqueueCopyImageToBuffer.
> 
> From: Yan Wang <yan.wang at linux.intel.com>
> 
> "imagedim_non_pow_2" cases of  basic modudle of confrmance shows
> regression after use TILE_Y mode for large image by previous patch.
> This bug comes from the non-align16 kernel of
> clEnqueueCopyBufferToImage and clEnqueueCopyImageToBuffer.
> It will force CL_RGBA/CL_UNORM_INT8/8191x8192 image of conformance
> test to CL_R/CL_UNSIGNED_INT8/32764x8192 image for copying.
> So it makes width as 8191 x 4 = 32764 and its width will exceed the maximum
> width (16 x 1024 = 16384) of GEN surface state structure which only has 14
> bits.
> So use align4 copy kernel to avoid this bug.
> 
> Signed-off-by: Yan Wang <yan.wang at linux.intel.com>
> ---
>  src/CMakeLists.txt                                 |  1 +
>  src/cl_context.h                                   |  2 +
>  src/cl_mem.c                                       | 78 ++++++++++++++--------
>  .../cl_internal_copy_buffer_to_image_2d_align4.cl  | 18
> +++++  .../cl_internal_copy_image_2d_to_buffer_align4.cl  | 18 +++++
>  5 files changed, 89 insertions(+), 28 deletions(-)  create mode 100644
> src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
>  create mode 100644
> src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 77a1c87..6433566
> 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -53,6 +53,7 @@ cl_internal_copy_image_2d_array_to_2d_array
> cl_internal_copy_image_2d_array_to_2
>  cl_internal_copy_image_2d_array_to_3d
> cl_internal_copy_image_3d_to_2d_array
>  cl_internal_copy_image_2d_to_buffer
> cl_internal_copy_image_2d_to_buffer_align16
> cl_internal_copy_image_3d_to_buffer
>  cl_internal_copy_buffer_to_image_2d
> cl_internal_copy_buffer_to_image_2d_align16
> cl_internal_copy_buffer_to_image_3d
> +cl_internal_copy_buffer_to_image_2d_align4
> +cl_internal_copy_image_2d_to_buffer_align4
>  cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
>  cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
>  cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git
> a/src/cl_context.h b/src/cl_context.h index 8ba499f..75bf895 100644
> --- a/src/cl_context.h
> +++ b/src/cl_context.h
> @@ -62,9 +62,11 @@ enum _cl_internal_ker_type {
>    CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,       //copy image 3d to
> image 2d array
>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,   //copy image 2d to buffer
>    CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
> +  CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
>    CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer
>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,   //copy buffer to image 2d
>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
> +  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
>    CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,   //copy buffer to image 3d
>    CL_ENQUEUE_FILL_BUFFER_UNALIGN,      //fill buffer with 1 aligne pattern,
> pattern size=1
>    CL_ENQUEUE_FILL_BUFFER_ALIGN2,       //fill buffer with 2 aligne pattern,
> pattern size=2
> diff --git a/src/cl_mem.c b/src/cl_mem.c index 0c49c3d..a8543c9 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -2146,6 +2146,36 @@ fail:
>    return ret;
>  }
> 
> +#define ALIGN16 16
> +#define ALIGN4 4
> +#define ALIGN1 1
> +
> +static size_t
> +get_align_size_for_copy_kernel(struct _cl_mem_image* image, const
> size_t origin0, const size_t region0,
> +                            const size_t offset, cl_image_format *fmt)
> +{
> +  size_t align_size = 0;
> +
> +  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w *
> image->bpp) % ALIGN16 == 0) &&
> +      ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0)
> && (offset % ALIGN16 == 0)){
> +    fmt->image_channel_order = CL_RGBA;
> +    fmt->image_channel_data_type = CL_UNSIGNED_INT32;
> +    align_size = ALIGN16;
> +  }
> +  else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image-
> >w * image->bpp) % ALIGN4 == 0) &&
> +      ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) &&
> (offset % ALIGN4 == 0)){
> +    fmt->image_channel_order = CL_R;
> +    fmt->image_channel_data_type = CL_UNSIGNED_INT32;
> +    align_size = ALIGN4;
> +  }
> +  else{
> +    fmt->image_channel_order = CL_R;
> +    fmt->image_channel_data_type = CL_UNSIGNED_INT8;
> +    align_size = ALIGN1;
> +  }
> +
> +  return align_size;
> +}
> +
>  LOCAL cl_int
>  cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event
> event, struct _cl_mem_image* image, cl_mem buffer,
>                           const size_t *src_origin, const size_t dst_offset, const size_t
> *region) { @@ -2158,7 +2188,6 @@
> cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event
> event, struct _cl_m
>    cl_image_format fmt;
>    size_t origin0, region0;
>    size_t kn_dst_offset;
> -  int align16 = 0;
>    size_t align_size = 1;
>    size_t w_saved;
> 
> @@ -2176,18 +2205,7 @@
> cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event
> event, struct _cl_m
>    w_saved = image->w;
>    region0 = region[0] * bpp;
>    kn_dst_offset = dst_offset;
> -  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w *
> image->bpp) % 16 == 0) &&
> -      ((src_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (dst_offset %
> 16 == 0)){
> -    fmt.image_channel_order = CL_RGBA;
> -    fmt.image_channel_data_type = CL_UNSIGNED_INT32;
> -    align16 = 1;
> -    align_size = 16;
> -  }
> -  else{
> -    fmt.image_channel_order = CL_R;
> -    fmt.image_channel_data_type = CL_UNSIGNED_INT8;
> -    align_size = 1;
> -  }
> +  align_size = get_align_size_for_copy_kernel(image, src_origin[0],
> + region0, dst_offset, &fmt);
>    image->intel_fmt = cl_image_get_intel_format(&fmt);
>    image->w = (image->w * image->bpp) / align_size;
>    image->bpp = align_size;
> @@ -2198,7 +2216,7 @@
> cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event
> event, struct _cl_m
> 
>    /* setup the kernel and run. */
>    if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> -    if(align16){
> +    if(align_size == ALIGN16){
>        extern char cl_internal_copy_image_2d_to_buffer_align16_str[];
>        extern size_t cl_internal_copy_image_2d_to_buffer_align16_str_size;
> 
> @@ -2206,6 +2224,14 @@
> cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event
> event, struct _cl_m
>                  cl_internal_copy_image_2d_to_buffer_align16_str,
>                  (size_t)cl_internal_copy_image_2d_to_buffer_align16_str_size,
> NULL);
>      }
> +    else if(align_size == ALIGN4){
> +      extern char cl_internal_copy_image_2d_to_buffer_align4_str[];
> +      extern size_t
> + cl_internal_copy_image_2d_to_buffer_align4_str_size;
> +
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
> +                cl_internal_copy_image_2d_to_buffer_align4_str,
> +                (size_t)cl_internal_copy_image_2d_to_buffer_align4_str_size,
> NULL);
> +    }
>      else{
>        extern char cl_internal_copy_image_2d_to_buffer_str[];
>        extern size_t cl_internal_copy_image_2d_to_buffer_str_size;
> @@ -2262,7 +2288,6 @@
> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event
> event, cl_mem buffe
>    cl_image_format fmt;
>    size_t origin0, region0;
>    size_t kn_src_offset;
> -  int align16 = 0;
>    size_t align_size = 1;
>    size_t w_saved = 0;
> 
> @@ -2280,18 +2305,7 @@
> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event
> event, cl_mem buffe
>    w_saved = image->w;
>    region0 = region[0] * bpp;
>    kn_src_offset = src_offset;
> -  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w *
> image->bpp) % 16 == 0) &&
> -      ((dst_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (src_offset %
> 16 == 0)){
> -    fmt.image_channel_order = CL_RGBA;
> -    fmt.image_channel_data_type = CL_UNSIGNED_INT32;
> -    align16 = 1;
> -    align_size = 16;
> -  }
> -  else{
> -    fmt.image_channel_order = CL_R;
> -    fmt.image_channel_data_type = CL_UNSIGNED_INT8;
> -    align_size = 1;
> -  }
> +  align_size = get_align_size_for_copy_kernel(image, dst_origin[0],
> + region0, src_offset, &fmt);
>    image->intel_fmt = cl_image_get_intel_format(&fmt);
>    image->w = (image->w * image->bpp) / align_size;
>    image->bpp = align_size;
> @@ -2302,7 +2316,7 @@
> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event
> event, cl_mem buffe
> 
>    /* setup the kernel and run. */
>    if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> -    if(align16){
> +    if(align_size == ALIGN16){
>        extern char cl_internal_copy_buffer_to_image_2d_align16_str[];
>        extern size_t cl_internal_copy_buffer_to_image_2d_align16_str_size;
> 
> @@ -2310,6 +2324,14 @@
> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event
> event, cl_mem buffe
>                  cl_internal_copy_buffer_to_image_2d_align16_str,
>                  (size_t)cl_internal_copy_buffer_to_image_2d_align16_str_size,
> NULL);
>      }
> +    else if(align_size == ALIGN4){
> +      extern char cl_internal_copy_buffer_to_image_2d_align4_str[];
> +      extern size_t
> + cl_internal_copy_buffer_to_image_2d_align4_str_size;
> +
> +      ker = cl_context_get_static_kernel_from_bin(queue->ctx,
> CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
> +                cl_internal_copy_buffer_to_image_2d_align4_str,
> +                (size_t)cl_internal_copy_buffer_to_image_2d_align4_str_size,
> NULL);
> +    }
>      else{
>        extern char cl_internal_copy_buffer_to_image_2d_str[];
>        extern size_t cl_internal_copy_buffer_to_image_2d_str_size;
> diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
> b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
> new file mode 100644
> index 0000000..79a3d8c
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
> @@ -0,0 +1,18 @@
> +kernel void __cl_copy_buffer_to_image_2d_align4(__write_only
> image2d_t image, global uint* buffer,
> +                                        unsigned int region0, unsigned int region1, unsigned int
> region2,
> +                                        unsigned int dst_origin0, unsigned int dst_origin1,
> unsigned int dst_origin2,
> +                                        unsigned int src_offset) {
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  uint4 color = (uint4)(0);
> +  int2 dst_coord;
> +  if((i >= region0) || (j>= region1))
> +    return;
> +  dst_coord.x = dst_origin0 + i;
> +  dst_coord.y = dst_origin1 + j;
> +  src_offset += j * region0 + i;
> +  color.x = buffer[src_offset];
> +  write_imageui(image, dst_coord, color.x); }
> +
> diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl
> b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl
> new file mode 100644
> index 0000000..dc76e02
> --- /dev/null
> +++ b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl
> @@ -0,0 +1,18 @@
> +kernel void __cl_copy_image_2d_to_buffer_align4( __read_only
> image2d_t image, global uint* buffer,
> +                                        unsigned int region0, unsigned int region1, unsigned int
> region2,
> +                                        unsigned int src_origin0, unsigned int src_origin1,
> unsigned int src_origin2,
> +                                        unsigned int dst_offset) {
> +
> +  int i = get_global_id(0);
> +  int j = get_global_id(1);
> +  if((i >= region0) || (j>= region1))
> +    return;
> +  uint4 color;
> +  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
> +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
> +  int2 src_coord;
> +  src_coord.x = src_origin0 + i;
> +  src_coord.y = src_origin1 + j;
> +  color = read_imageui(image, sampler, src_coord);
> +  *(buffer + dst_offset + region0*j + i) = color.x; }
> --
> 2.7.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list