[Beignet] [PATCH v2 2/2] Fix bug of clEnqueueCopyBufferToImage and clEnqueueCopyImageToBuffer.

yan.wang at linux.intel.com yan.wang at linux.intel.com
Thu May 25 07:10:19 UTC 2017


From: Yan Wang <yan.wang at linux.intel.com>

"imagedim_non_pow_2" cases of  basic modudle of confrmance shows
regression after use TILE_Y mode for large image by previous patch.
This bug comes from the non-align16 kernel of clEnqueueCopyBufferToImage
and clEnqueueCopyImageToBuffer.
It will force CL_RGBA/CL_UNORM_INT8/8191x8192 image of conformance test
to CL_R/CL_UNSIGNED_INT8/32764x8192 image for copying.
So it makes width as 8191 x 4 = 32764 and its width will exceed the maximum
width (16 x 1024 = 16384) of GEN surface state structure which only has 14 bits.
So use align4 copy kernel to avoid this bug.

Signed-off-by: Yan Wang <yan.wang at linux.intel.com>
---
 src/CMakeLists.txt                                 |  1 +
 src/cl_context.h                                   |  2 +
 src/cl_mem.c                                       | 78 ++++++++++++++--------
 .../cl_internal_copy_buffer_to_image_2d_align4.cl  | 18 +++++
 .../cl_internal_copy_image_2d_to_buffer_align4.cl  | 18 +++++
 5 files changed, 89 insertions(+), 28 deletions(-)
 create mode 100644 src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
 create mode 100644 src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 77a1c87..6433566 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,6 +53,7 @@ cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2
 cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array
 cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_2d_to_buffer_align16 cl_internal_copy_image_3d_to_buffer
 cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_2d_align16 cl_internal_copy_buffer_to_image_3d
+cl_internal_copy_buffer_to_image_2d_align4 cl_internal_copy_image_2d_to_buffer_align4
 cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
 cl_internal_fill_buf_align128 cl_internal_fill_image_1d
diff --git a/src/cl_context.h b/src/cl_context.h
index 8ba499f..75bf895 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -62,9 +62,11 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,       //copy image 3d to image 2d array
   CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,   //copy image 2d to buffer
   CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
   CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,   //copy image 3d tobuffer
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,   //copy buffer to image 2d
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
   CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,   //copy buffer to image 3d
   CL_ENQUEUE_FILL_BUFFER_UNALIGN,      //fill buffer with 1 aligne pattern, pattern size=1
   CL_ENQUEUE_FILL_BUFFER_ALIGN2,       //fill buffer with 2 aligne pattern, pattern size=2
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 0c49c3d..a8543c9 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -2146,6 +2146,36 @@ fail:
   return ret;
 }
 
+#define ALIGN16 16
+#define ALIGN4 4
+#define ALIGN1 1
+
+static size_t
+get_align_size_for_copy_kernel(struct _cl_mem_image* image, const size_t origin0, const size_t region0,
+                            const size_t offset, cl_image_format *fmt) {
+  size_t align_size = 0;
+
+  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN16 == 0) &&
+      ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) && (offset % ALIGN16 == 0)){
+    fmt->image_channel_order = CL_RGBA;
+    fmt->image_channel_data_type = CL_UNSIGNED_INT32;
+    align_size = ALIGN16;
+  }
+  else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN4 == 0) &&
+      ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && (offset % ALIGN4 == 0)){
+    fmt->image_channel_order = CL_R;
+    fmt->image_channel_data_type = CL_UNSIGNED_INT32;
+    align_size = ALIGN4;
+  }
+  else{
+    fmt->image_channel_order = CL_R;
+    fmt->image_channel_data_type = CL_UNSIGNED_INT8;
+    align_size = ALIGN1;
+  }
+
+  return align_size;
+}
+
 LOCAL cl_int
 cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_mem_image* image, cl_mem buffer,
                          const size_t *src_origin, const size_t dst_offset, const size_t *region) {
@@ -2158,7 +2188,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
   cl_image_format fmt;
   size_t origin0, region0;
   size_t kn_dst_offset;
-  int align16 = 0;
   size_t align_size = 1;
   size_t w_saved;
 
@@ -2176,18 +2205,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
   w_saved = image->w;
   region0 = region[0] * bpp;
   kn_dst_offset = dst_offset;
-  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) &&
-      ((src_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (dst_offset % 16 == 0)){
-    fmt.image_channel_order = CL_RGBA;
-    fmt.image_channel_data_type = CL_UNSIGNED_INT32;
-    align16 = 1;
-    align_size = 16;
-  }
-  else{
-    fmt.image_channel_order = CL_R;
-    fmt.image_channel_data_type = CL_UNSIGNED_INT8;
-    align_size = 1;
-  }
+  align_size = get_align_size_for_copy_kernel(image, src_origin[0], region0, dst_offset, &fmt);
   image->intel_fmt = cl_image_get_intel_format(&fmt);
   image->w = (image->w * image->bpp) / align_size;
   image->bpp = align_size;
@@ -2198,7 +2216,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
 
   /* setup the kernel and run. */
   if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
-    if(align16){
+    if(align_size == ALIGN16){
       extern char cl_internal_copy_image_2d_to_buffer_align16_str[];
       extern size_t cl_internal_copy_image_2d_to_buffer_align16_str_size;
 
@@ -2206,6 +2224,14 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
                 cl_internal_copy_image_2d_to_buffer_align16_str,
                 (size_t)cl_internal_copy_image_2d_to_buffer_align16_str_size, NULL);
     }
+    else if(align_size == ALIGN4){
+      extern char cl_internal_copy_image_2d_to_buffer_align4_str[];
+      extern size_t cl_internal_copy_image_2d_to_buffer_align4_str_size;
+
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
+                cl_internal_copy_image_2d_to_buffer_align4_str,
+                (size_t)cl_internal_copy_image_2d_to_buffer_align4_str_size, NULL);
+    }
     else{
       extern char cl_internal_copy_image_2d_to_buffer_str[];
       extern size_t cl_internal_copy_image_2d_to_buffer_str_size;
@@ -2262,7 +2288,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
   cl_image_format fmt;
   size_t origin0, region0;
   size_t kn_src_offset;
-  int align16 = 0;
   size_t align_size = 1;
   size_t w_saved = 0;
 
@@ -2280,18 +2305,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
   w_saved = image->w;
   region0 = region[0] * bpp;
   kn_src_offset = src_offset;
-  if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) &&
-      ((dst_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (src_offset % 16 == 0)){
-    fmt.image_channel_order = CL_RGBA;
-    fmt.image_channel_data_type = CL_UNSIGNED_INT32;
-    align16 = 1;
-    align_size = 16;
-  }
-  else{
-    fmt.image_channel_order = CL_R;
-    fmt.image_channel_data_type = CL_UNSIGNED_INT8;
-    align_size = 1;
-  }
+  align_size = get_align_size_for_copy_kernel(image, dst_origin[0], region0, src_offset, &fmt);
   image->intel_fmt = cl_image_get_intel_format(&fmt);
   image->w = (image->w * image->bpp) / align_size;
   image->bpp = align_size;
@@ -2302,7 +2316,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
 
   /* setup the kernel and run. */
   if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
-    if(align16){
+    if(align_size == ALIGN16){
       extern char cl_internal_copy_buffer_to_image_2d_align16_str[];
       extern size_t cl_internal_copy_buffer_to_image_2d_align16_str_size;
 
@@ -2310,6 +2324,14 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
                 cl_internal_copy_buffer_to_image_2d_align16_str,
                 (size_t)cl_internal_copy_buffer_to_image_2d_align16_str_size, NULL);
     }
+    else if(align_size == ALIGN4){
+      extern char cl_internal_copy_buffer_to_image_2d_align4_str[];
+      extern size_t cl_internal_copy_buffer_to_image_2d_align4_str_size;
+
+      ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
+                cl_internal_copy_buffer_to_image_2d_align4_str,
+                (size_t)cl_internal_copy_buffer_to_image_2d_align4_str_size, NULL);
+    }
     else{
       extern char cl_internal_copy_buffer_to_image_2d_str[];
       extern size_t cl_internal_copy_buffer_to_image_2d_str_size;
diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
new file mode 100644
index 0000000..79a3d8c
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl
@@ -0,0 +1,18 @@
+kernel void __cl_copy_buffer_to_image_2d_align4(__write_only image2d_t image, global uint* buffer,
+                                        unsigned int region0, unsigned int region1, unsigned int region2,
+                                        unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+                                        unsigned int src_offset)
+{
+  int i = get_global_id(0);
+  int j = get_global_id(1);
+  uint4 color = (uint4)(0);
+  int2 dst_coord;
+  if((i >= region0) || (j>= region1))
+    return;
+  dst_coord.x = dst_origin0 + i;
+  dst_coord.y = dst_origin1 + j;
+  src_offset += j * region0 + i;
+  color.x = buffer[src_offset];
+  write_imageui(image, dst_coord, color.x);
+}
+
diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl
new file mode 100644
index 0000000..dc76e02
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl
@@ -0,0 +1,18 @@
+kernel void __cl_copy_image_2d_to_buffer_align4( __read_only image2d_t image, global uint* buffer,
+                                        unsigned int region0, unsigned int region1, unsigned int region2,
+                                        unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+                                        unsigned int dst_offset)
+{
+
+  int i = get_global_id(0);
+  int j = get_global_id(1);
+  if((i >= region0) || (j>= region1))
+    return;
+  uint4 color;
+  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+  int2 src_coord;
+  src_coord.x = src_origin0 + i;
+  src_coord.y = src_origin1 + j;
+  color = read_imageui(image, sampler, src_coord);
+  *(buffer + dst_offset + region0*j + i) = color.x;
+}
-- 
2.7.4



More information about the Beignet mailing list