[Beignet] [PATCH] [PATCH_V2]improve the clEnqueueCopyBufferRect performance in some cases

Lv Meng meng.lv at intel.com
Tue May 27 23:52:08 PDT 2014


Signed-off-by: Lv Meng <meng.lv at intel.com>
---
 src/CMakeLists.txt |  3 ++-
 src/cl_context.h   |  1 +
 src/cl_mem.c       | 27 ++++++++++++++++++++++++++-
 3 files changed, 29 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 src/CMakeLists.txt
 mode change 100644 => 100755 src/cl_context.h
 mode change 100644 => 100755 src/cl_mem.c

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
old mode 100644
new mode 100755
index 20e1a4c..f085ee1
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -39,7 +39,8 @@ set (KERNEL_STR_FILES)
 set (KERNEL_NAMES cl_internal_copy_buf_align4
 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
 cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
-cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d
+cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4 
+cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d
 cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
 cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
 cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d)
diff --git a/src/cl_context.h b/src/cl_context.h
old mode 100644
new mode 100755
index 82d3217..5ba6c58
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -46,6 +46,7 @@ enum _cl_internal_ker_type {
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
   CL_ENQUEUE_COPY_BUFFER_RECT,
+  CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4,
   CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,             //copy image 2d to image 2d
   CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,             //copy image 3d to image 2d
   CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,             //copy image 2d to image 3d
diff --git a/src/cl_mem.c b/src/cl_mem.c
old mode 100644
new mode 100755
index 87ea317..f3ead16
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -944,9 +944,21 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
                        size_t dst_row_pitch, size_t dst_slice_pitch) {
   cl_int ret;
   cl_kernel ker;
+  cl_int index;
   size_t global_off[] = {0,0,0};
   size_t global_sz[] = {1,1,1};
   size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1};
+
+  // the src and dst mem rect is continuous, the copy is degraded to buf copy  
+  if((region[0] == dst_row_pitch) && (region[0] == src_row_pitch) &&
+  (region[1] * src_row_pitch == src_slice_pitch) && (region[1] * dst_row_pitch == dst_slice_pitch)){
+    cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0];
+    cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0];
+    cl_int size = region[0]*region[1]*region[2];
+    ret = cl_mem_copy(queue, src_buf, dst_buf,src_offset, dst_offset, size);
+    return ret;
+  }
+
   if(region[1] == 1) local_sz[1] = 1;
   if(region[2] == 1) local_sz[2] = 1;
   global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
@@ -962,7 +974,20 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
   extern char cl_internal_copy_buf_rect_str[];
   extern int cl_internal_copy_buf_rect_str_size;
 
-  ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT,
+  index = CL_ENQUEUE_COPY_BUFFER_RECT;
+  if( (src_offset % 4== 0) && (dst_offset % 4== 0) && (src_row_pitch % 4== 0) && (dst_row_pitch % 4== 0)
+  && (src_slice_pitch % 4== 0) && (dst_slice_pitch % 4== 0) && (global_sz[0] % 4 == 0) ){  
+    global_sz[0] /= 4;
+    src_offset /= 4;
+    dst_offset /= 4;
+    src_row_pitch /= 4;
+    dst_row_pitch /= 4;
+    src_slice_pitch /= 4;
+    dst_slice_pitch /= 4;
+    index = CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4;
+  }
+
+  ker = cl_context_get_static_kernel_form_bin(queue->ctx, index,
       cl_internal_copy_buf_rect_str, (size_t)cl_internal_copy_buf_rect_str_size, NULL);
 
   if (!ker)
-- 
1.8.3.2



More information about the Beignet mailing list