[Beignet] [PATCH] Implement 1D/2D image array related cl_mem_kernel_copy_image in cl way instead of cpu way.
Chuanbo Weng
chuanbo.weng at intel.com
Thu Feb 5 19:52:30 PST 2015
Before this patch, cl_mem_kernel_copy_image do cpu memory copy in order
to copy image array objects. This is very slow for large image size.
This patch implement image array copy in cl way, which dramatically
accelerate image array related clEnqueueCopyImage.
clCopyImage case in OpenCL conformance test will not be blocked anymore.
Signed-off-by: Chuanbo Weng <chuanbo.weng at intel.com>
---
src/CMakeLists.txt | 3 ++
src/cl_context.h | 6 +++
src/cl_mem.c | 43 ++++++++++++++++------
.../cl_internal_copy_image_1d_array_to_1d_array.cl | 21 +++++++++++
.../cl_internal_copy_image_2d_array_to_2d.cl | 21 +++++++++++
.../cl_internal_copy_image_2d_array_to_2d_array.cl | 23 ++++++++++++
.../cl_internal_copy_image_2d_array_to_3d.cl | 23 ++++++++++++
.../cl_internal_copy_image_2d_to_2d_array.cl | 21 +++++++++++
.../cl_internal_copy_image_3d_to_2d_array.cl | 23 ++++++++++++
9 files changed, 172 insertions(+), 12 deletions(-)
create mode 100644 src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a55f84d..939f58d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -46,6 +46,9 @@ cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4
cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d
cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_image_2d_to_2d_array cl_internal_copy_image_1d_array_to_1d_array
+cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2d
+cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array
cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
diff --git a/src/cl_context.h b/src/cl_context.h
index 38ad2fd..2ea0a73 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -53,6 +53,12 @@ enum _cl_internal_ker_type {
CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
+ CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image 2d
+ CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image 3d
+ CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array
CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer
CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 2ec89a4..2920bfe 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -1610,27 +1610,43 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
} else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ extern char cl_internal_copy_image_2d_to_2d_array_str[];
+ extern size_t cl_internal_copy_image_2d_to_2d_array_str_size;
- cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
- return CL_SUCCESS;
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY,
+ cl_internal_copy_image_2d_to_2d_array_str, (size_t)cl_internal_copy_image_2d_to_2d_array_str_size, NULL);
}
} else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ extern char cl_internal_copy_image_1d_array_to_1d_array_str[];
+ extern size_t cl_internal_copy_image_1d_array_to_1d_array_str_size;
- cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
- return CL_SUCCESS;
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY,
+ cl_internal_copy_image_1d_array_to_1d_array_str,
+ (size_t)cl_internal_copy_image_1d_array_to_1d_array_str_size, NULL);
}
} else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ extern char cl_internal_copy_image_2d_array_to_2d_array_str[];
+ extern size_t cl_internal_copy_image_2d_array_to_2d_array_str_size;
- cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
- return CL_SUCCESS;
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY,
+ cl_internal_copy_image_2d_array_to_2d_array_str,
+ (size_t)cl_internal_copy_image_2d_array_to_2d_array_str_size, NULL);
} else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
- cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
- return CL_SUCCESS;
+ extern char cl_internal_copy_image_2d_array_to_2d_str[];
+ extern size_t cl_internal_copy_image_2d_array_to_2d_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D,
+ cl_internal_copy_image_2d_array_to_2d_str,
+ (size_t)cl_internal_copy_image_2d_array_to_2d_str_size, NULL);
} else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
- cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
- return CL_SUCCESS;
+ extern char cl_internal_copy_image_2d_array_to_3d_str[];
+ extern size_t cl_internal_copy_image_2d_array_to_3d_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D,
+ cl_internal_copy_image_2d_array_to_3d_str,
+ (size_t)cl_internal_copy_image_2d_array_to_3d_str_size, NULL);
}
} else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
@@ -1646,8 +1662,11 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
} else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
- cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
- return CL_SUCCESS;
+ extern char cl_internal_copy_image_3d_to_2d_array_str[];
+ extern size_t cl_internal_copy_image_3d_to_2d_array_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,
+ cl_internal_copy_image_3d_to_2d_array_str, (size_t)cl_internal_copy_image_3d_to_2d_array_str_size, NULL);
}
}
diff --git a/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
new file mode 100644
index 0000000..0c7c6e2
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
@@ -0,0 +1,21 @@
+kernel void __cl_copy_image_1d_array_to_1d_array(__read_only image1d_array_t src_image, __write_only image1d_array_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ int2 dst_coord;
+ if((i >= region0) || (k>=region2))
+ return;
+
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
new file mode 100644
index 0000000..89e36c0
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl
@@ -0,0 +1,21 @@
+kernel void __cl_copy_image_2d_array_to_2d(__read_only image2d_array_t src_image, __write_only image2d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int2 dst_coord;
+ if((i >= region0) || (j>= region1))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
new file mode 100644
index 0000000..3653660
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
@@ -0,0 +1,23 @@
+kernel void __cl_copy_image_2d_array_to_2d_array(__read_only image2d_array_t src_image, __write_only image2d_array_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
new file mode 100644
index 0000000..424f6b5
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl
@@ -0,0 +1,23 @@
+kernel void __cl_copy_image_2d_array_to_3d(__read_only image2d_array_t src_image, __write_only image3d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
new file mode 100644
index 0000000..4384f01
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl
@@ -0,0 +1,21 @@
+kernel void __cl_copy_image_2d_to_2d_array(__read_only image2d_t src_image, __write_only image2d_array_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int2 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
new file mode 100644
index 0000000..8041a32
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl
@@ -0,0 +1,23 @@
+kernel void __cl_copy_image_3d_to_2d_array(__read_only image3d_t src_image, __write_only image2d_array_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int4 src_coord;
+ int4 dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord.x = src_origin0 + i;
+ src_coord.y = src_origin1 + j;
+ src_coord.z = src_origin2 + k;
+ dst_coord.x = dst_origin0 + i;
+ dst_coord.y = dst_origin1 + j;
+ dst_coord.z = dst_origin2 + k;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}
--
1.9.1
More information about the Beignet
mailing list