[Beignet] [PATCH] [opencl-1.2] implement API clEnqueueFillImage.
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Jun 24 07:43:12 PDT 2014
There are two issues hided in this patch,
1. We need to enqueue a 3D task rather than a 1D task for the image filling.
2. At the fill image 1d array kernel, we should use the third dimension as the array index
rather than using the second dimension.
And another potential issue in clEnqueueMapImage for 1D array image is
the image_row_pitch's return value. By default, we just return the row
pitch. And we return correct slice pitch value at *image_slice_pitch.
According to spec, the caller need to use image_slice_pitch to access different
row. But we found some application use the image_row_pitch even the image
is a 1D array image. To workaround this type of issue, we just set
the image_slice_pitch to image_row_pitch for the clEnqueueMapImage
call on image 1D array image.
I just fixed the above issues and made a new patch and pushed to the
master branch. Thanks for the patch.
On Mon, Jun 23, 2014 at 06:03:30AM +0800, xionghu.luo at intel.com wrote:
> From: Luo <xionghu.luo at intel.com>
>
> enqueues a command to fill an image object with a specified color.
>
> fix typo cl_context_get_static_kernel_from_bin.
>
> Signed-off-by: Luo <xionghu.luo at intel.com>
> ---
> src/CMakeLists.txt | 4 +-
> src/cl_api.c | 73 +++++++++++++++++
> src/cl_context.c | 2 +-
> src/cl_context.h | 7 +-
> src/cl_enqueue.c | 1 +
> src/cl_enqueue.h | 1 +
> src/cl_gt_device.h | 7 +-
> src/cl_khr_icd.c | 2 +-
> src/cl_mem.c | 106 ++++++++++++++++++++-----
> src/cl_mem.h | 3 +
> src/kernels/cl_internal_fill_image_1d.cl | 14 ++++
> src/kernels/cl_internal_fill_image_1d_array.cl | 15 ++++
> src/kernels/cl_internal_fill_image_2d.cl | 15 ++++
> src/kernels/cl_internal_fill_image_2d_array.cl | 16 ++++
> src/kernels/cl_internal_fill_image_3d.cl | 16 ++++
> 15 files changed, 257 insertions(+), 25 deletions(-)
> create mode 100644 src/kernels/cl_internal_fill_image_1d.cl
> create mode 100644 src/kernels/cl_internal_fill_image_1d_array.cl
> create mode 100644 src/kernels/cl_internal_fill_image_2d.cl
> create mode 100644 src/kernels/cl_internal_fill_image_2d_array.cl
> create mode 100644 src/kernels/cl_internal_fill_image_3d.cl
>
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index 8651af6..3d5ce4d 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -47,7 +47,9 @@ cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
> cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
> cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
> cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
> -cl_internal_fill_buf_align128)
> +cl_internal_fill_buf_align128 cl_internal_fill_image_1d
> +cl_internal_fill_image_1d_array cl_internal_fill_image_2d
> +cl_internal_fill_image_2d_array cl_internal_fill_image_3d)
> set (BUILT_IN_NAME cl_internal_built_in_kernel)
> MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
> MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 32f91d7..c93957f 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1812,6 +1812,79 @@ error:
> }
>
> cl_int
> +clEnqueueFillImage(cl_command_queue command_queue,
> + cl_mem image,
> + const void * fill_color,
> + const size_t * porigin,
> + const size_t * pregion,
> + cl_uint num_events_in_wait_list,
> + const cl_event * event_wait_list,
> + cl_event * event)
> +{
> + cl_int err = CL_SUCCESS;
> + enqueue_data *data, no_wait_data = { 0 };
> +
> + CHECK_QUEUE(command_queue);
> + CHECK_IMAGE(image, src_image);
> + FIXUP_IMAGE_REGION(src_image, pregion, region);
> + FIXUP_IMAGE_ORIGIN(src_image, porigin, origin);
> +
> + if (command_queue->ctx != image->ctx) {
> + err = CL_INVALID_CONTEXT;
> + goto error;
> + }
> +
> + if (fill_color == NULL) {
> + err = CL_INVALID_VALUE;
> + goto error;
> + }
> +
> + if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) {
> + err = CL_INVALID_VALUE;
> + goto error;
> + }
> +
> + if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){
> + err = CL_INVALID_VALUE;
> + goto error;
> + }
> +
> + if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){
> + err = CL_INVALID_VALUE;
> + goto error;
> + }
> +
> + err = cl_image_fill(command_queue, fill_color, src_image, origin, region);
> + if (err) {
> + goto error;
> + }
> +
> + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
> +
> + data = &no_wait_data;
> + data->type = EnqueueFillImage;
> + data->queue = command_queue;
> +
> + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> + event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
> + if (event && (*event)->type != CL_COMMAND_USER
> + && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> + cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> + }
> +
> + err = cl_command_queue_flush(command_queue);
> + }
> +
> + if(b_output_kernel_perf)
> + time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue);
> +
> + return 0;
> +
> + error:
> + return err;
> +}
> +
> +cl_int
> clEnqueueFillBuffer(cl_command_queue command_queue,
> cl_mem buffer,
> const void * pattern,
> diff --git a/src/cl_context.c b/src/cl_context.c
> index 8f42a58..152faf3 100644
> --- a/src/cl_context.c
> +++ b/src/cl_context.c
> @@ -319,7 +319,7 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern
> }
>
> cl_kernel
> -cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
> +cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
> const char * str_kernel, size_t size, const char * str_option)
> {
> cl_int ret;
> diff --git a/src/cl_context.h b/src/cl_context.h
> index cba0a0a..0e4db73 100644
> --- a/src/cl_context.h
> +++ b/src/cl_context.h
> @@ -63,6 +63,11 @@ enum _cl_internal_ker_type {
> CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, //fill buffer with 16 aligne pattern, pattern size=32
> CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, //fill buffer with 16 aligne pattern, pattern size=64
> CL_ENQUEUE_FILL_BUFFER_ALIGN128, //fill buffer with 128 aligne pattern, pattern size=128
> + CL_ENQUEUE_FILL_IMAGE_1D, //fill image 1d
> + CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array
> + CL_ENQUEUE_FILL_IMAGE_2D, //fill image 2d
> + CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array
> + CL_ENQUEUE_FILL_IMAGE_3D, //fill image 3d
> CL_INTERNAL_KERNEL_MAX
> };
>
> @@ -153,7 +158,7 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx);
> extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option);
>
> /* Get the internal used kernel from binary*/
> -extern cl_kernel cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
> +extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
> const char * str_kernel, size_t size, const char * str_option);
>
> #endif /* __CL_CONTEXT_H__ */
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index bc0ca2c..52c824d 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -432,6 +432,7 @@ cl_int cl_enqueue_handle(cl_event event, enqueue_data* data)
> case EnqueueCopyImageToBuffer:
> case EnqueueNDRangeKernel:
> case EnqueueFillBuffer:
> + case EnqueueFillImage:
> cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
> return CL_SUCCESS;
> case EnqueueNativeKernel:
> diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
> index 6527602..a9b3601 100644
> --- a/src/cl_enqueue.h
> +++ b/src/cl_enqueue.h
> @@ -43,6 +43,7 @@ typedef enum {
> EnqueueMarker,
> EnqueueBarrier,
> EnqueueFillBuffer,
> + EnqueueFillImage,
> EnqueueMigrateMemObj,
> EnqueueInvalid
> } enqueue_type;
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index f385815..42ab7a1 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -99,7 +99,12 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;"
> "__cl_fill_region_align8_4;"
> "__cl_fill_region_align8_8;"
> "__cl_fill_region_align8_16;"
> - "__cl_fill_region_align128;")
> + "__cl_fill_region_align128;"
> + "__cl_fill_image_1d;"
> + "__cl_fill_image_1d_array;"
> + "__cl_fill_image_2d;"
> + "__cl_fill_image_2d_array;"
> + "__cl_fill_image_3d;")
>
> DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
> #undef DECL_INFO_STRING
> diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c
> index b23c29d..6d49db0 100644
> --- a/src/cl_khr_icd.c
> +++ b/src/cl_khr_icd.c
> @@ -150,7 +150,7 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
> clUnloadPlatformCompiler,
> clGetKernelArgInfo,
> clEnqueueFillBuffer,
> - CL_1_2_NOTYET(clEnqueueFillImage),
> + clEnqueueFillImage,
> clEnqueueMigrateMemObjects,
> clEnqueueMarkerWithWaitList,
> clEnqueueBarrierWithWaitList,
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index e0c4ec9..cd77ef8 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -1047,7 +1047,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> extern char cl_internal_copy_buf_align16_str[];
> extern int cl_internal_copy_buf_align16_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16,
> cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL);
> cb = cb/16;
> aligned = 1;
> @@ -1055,7 +1055,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> extern char cl_internal_copy_buf_align4_str[];
> extern int cl_internal_copy_buf_align4_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4,
> cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL);
> cb = cb/4;
> aligned = 1;
> @@ -1102,7 +1102,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> extern char cl_internal_copy_buf_unalign_same_offset_str[];
> extern int cl_internal_copy_buf_unalign_same_offset_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET,
> cl_internal_copy_buf_unalign_same_offset_str,
> (size_t)cl_internal_copy_buf_unalign_same_offset_str_size, NULL);
>
> @@ -1129,7 +1129,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> unsigned int dw_mask = masks[align_diff];
> int shift = align_diff * 8;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
> cl_internal_copy_buf_unalign_dst_offset_str,
> (size_t)cl_internal_copy_buf_unalign_dst_offset_str_size, NULL);
>
> @@ -1159,7 +1159,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> int shift = align_diff * 8;
> int src_less = !(src_offset % 4) && !((src_offset + cb) % 4);
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
> cl_internal_copy_buf_unalign_src_offset_str,
> (size_t)cl_internal_copy_buf_unalign_src_offset_str_size, NULL);
>
> @@ -1184,6 +1184,72 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> }
>
> LOCAL cl_int
> +cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image* src_image,
> + const size_t * origin, const size_t * region)
> +{
> + cl_int ret = CL_SUCCESS;
> + cl_kernel ker = NULL;
> + size_t global_off[] = {0,0,0};
> + size_t global_sz[] = {1,1,1};
> + size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
> +
> + if(region[1] == 1) local_sz[1] = 1;
> + if(region[2] == 1) local_sz[2] = 1;
> + global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
> + global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
> + global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
> +
> + if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) {
> + extern char cl_internal_fill_image_1d_str[];
> + extern int cl_internal_fill_image_1d_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D,
> + cl_internal_fill_image_1d_str, (size_t)cl_internal_fill_image_1d_str_size, NULL);
> + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> + extern char cl_internal_fill_image_1d_array_str[];
> + extern int cl_internal_fill_image_1d_array_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D_ARRAY,
> + cl_internal_fill_image_1d_array_str, (size_t)cl_internal_fill_image_1d_array_str_size, NULL);
> + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
> + extern char cl_internal_fill_image_2d_str[];
> + extern int cl_internal_fill_image_2d_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D,
> + cl_internal_fill_image_2d_str, (size_t)cl_internal_fill_image_2d_str_size, NULL);
> + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> + extern char cl_internal_fill_image_2d_array_str[];
> + extern int cl_internal_fill_image_2d_array_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D_ARRAY,
> + cl_internal_fill_image_2d_array_str, (size_t)cl_internal_fill_image_2d_array_str_size, NULL);
> + }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> + extern char cl_internal_fill_image_3d_str[];
> + extern int cl_internal_fill_image_3d_str_size;
> +
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_3D,
> + cl_internal_fill_image_3d_str, (size_t)cl_internal_fill_image_3d_str_size, NULL);
> + }else{
> + return CL_IMAGE_FORMAT_NOT_SUPPORTED;
> + }
> +
> + if (!ker)
> + return CL_OUT_OF_RESOURCES;
> +
> + cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image);
> + cl_kernel_set_arg(ker, 1, sizeof(float)*4, pattern);
> + cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]);
> + cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]);
> + cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]);
> + cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin[0]);
> + cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]);
> + cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]);
> +
> + ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
> + return ret;
> +}
> +
> +LOCAL cl_int
> cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
> cl_mem buffer, size_t offset, size_t size)
> {
> @@ -1208,7 +1274,7 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
> extern char cl_internal_fill_buf_align128_str[];
> extern int cl_internal_fill_buf_align128_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128,
> cl_internal_fill_buf_align128_str, (size_t)cl_internal_fill_buf_align128_str_size, NULL);
> is_128 = 1;
> pattern_size = pattern_size / 2;
> @@ -1219,13 +1285,13 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
> extern int cl_internal_fill_buf_align8_str_size;
> int order = ffs(pattern_size / 8) - 1;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order,
> cl_internal_fill_buf_align8_str, (size_t)cl_internal_fill_buf_align8_str_size, NULL);
> } else if (pattern_size == 4) {
> extern char cl_internal_fill_buf_align4_str[];
> extern int cl_internal_fill_buf_align4_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
> cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL);
> } else if (size >= 4 && size % 4 == 0 && offset % 4 == 0) {
> /* The unaligned case. But if copy size and offset are aligned to 4, we can fake
> @@ -1242,7 +1308,7 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
> = pattern_comb[3] = *(char *)pattern;
> }
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
> cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL);
> pattern_size = 4;
> pattern = pattern_comb;
> @@ -1252,12 +1318,12 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
> else if (pattern_size == 2) {
> extern char cl_internal_fill_buf_align2_str[];
> extern int cl_internal_fill_buf_align2_str_size;
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2,
> cl_internal_fill_buf_align2_str, (size_t)cl_internal_fill_buf_align2_str_size, NULL);
> } else if (pattern_size == 1) {
> extern char cl_internal_fill_buf_unalign_str[];
> extern int cl_internal_fill_buf_unalign_str_size;
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN,
> cl_internal_fill_buf_unalign_str, (size_t)cl_internal_fill_buf_unalign_str_size, NULL);
> } else
> assert(0);
> @@ -1310,7 +1376,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
> extern char cl_internal_copy_buf_rect_str[];
> extern int cl_internal_copy_buf_rect_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT,
> cl_internal_copy_buf_rect_str, (size_t)cl_internal_copy_buf_rect_str_size, NULL);
>
> if (!ker)
> @@ -1382,13 +1448,13 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
> extern char cl_internal_copy_image_2d_to_2d_str[];
> extern int cl_internal_copy_image_2d_to_2d_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,
> cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL);
> }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> extern char cl_internal_copy_image_2d_to_3d_str[];
> extern int cl_internal_copy_image_2d_to_3d_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
> cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
> }
> }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> @@ -1396,13 +1462,13 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
> extern char cl_internal_copy_image_3d_to_2d_str[];
> extern int cl_internal_copy_image_3d_to_2d_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,
> cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL);
> }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> extern char cl_internal_copy_image_3d_to_3d_str[];
> extern int cl_internal_copy_image_3d_to_3d_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
> cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
> }
> }
> @@ -1471,13 +1537,13 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image,
> extern char cl_internal_copy_image_2d_to_buffer_str[];
> extern int cl_internal_copy_image_2d_to_buffer_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,
> cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);
> }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> extern char cl_internal_copy_image_3d_to_buffer_str[];
> extern int cl_internal_copy_image_3d_to_buffer_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
> cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
> }
>
> @@ -1545,13 +1611,13 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me
> extern char cl_internal_copy_buffer_to_image_2d_str[];
> extern int cl_internal_copy_buffer_to_image_2d_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,
> cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);
> }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
> extern char cl_internal_copy_buffer_to_image_3d_str[];
> extern int cl_internal_copy_buffer_to_image_3d_str_size;
>
> - ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
> + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
> cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);
> }
> if (!ker)
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index d589093..8ed8e2d 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -205,6 +205,9 @@ extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf
> extern cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
> cl_mem buffer, size_t offset, size_t size);
>
> +extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image*,
> + const size_t *, const size_t *);
> +
> /* api clEnqueueCopyBufferRect help function */
> extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem,
> const size_t *, const size_t *, const size_t *,
> diff --git a/src/kernels/cl_internal_fill_image_1d.cl b/src/kernels/cl_internal_fill_image_1d.cl
> new file mode 100644
> index 0000000..b3b0cbf
> --- /dev/null
> +++ b/src/kernels/cl_internal_fill_image_1d.cl
> @@ -0,0 +1,14 @@
> +kernel void __cl_fill_image_1d( __write_only image1d_t image, float4 pattern,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int origin0, unsigned int origin1, unsigned int origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + coord = origin0 + i;
> + write_imagef(image, coord, pattern);
> +
> +}
> diff --git a/src/kernels/cl_internal_fill_image_1d_array.cl b/src/kernels/cl_internal_fill_image_1d_array.cl
> new file mode 100644
> index 0000000..2513e37
> --- /dev/null
> +++ b/src/kernels/cl_internal_fill_image_1d_array.cl
> @@ -0,0 +1,15 @@
> +kernel void __cl_fill_image_1d_array( __write_only image1d_array_t image, float4 pattern,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int origin0, unsigned int origin1, unsigned int origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int2 coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + coord.x = origin0 + i;
> + coord.y = origin1 + j;
> + write_imagef(image, coord, pattern);
> +
> +}
> diff --git a/src/kernels/cl_internal_fill_image_2d.cl b/src/kernels/cl_internal_fill_image_2d.cl
> new file mode 100644
> index 0000000..0e29f3e
> --- /dev/null
> +++ b/src/kernels/cl_internal_fill_image_2d.cl
> @@ -0,0 +1,15 @@
> +kernel void __cl_fill_image_2d( __write_only image2d_t image, float4 pattern,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int origin0, unsigned int origin1, unsigned int origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int2 coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + coord.x = origin0 + i;
> + coord.y = origin1 + j;
> + write_imagef(image, coord, pattern);
> +
> +}
> diff --git a/src/kernels/cl_internal_fill_image_2d_array.cl b/src/kernels/cl_internal_fill_image_2d_array.cl
> new file mode 100644
> index 0000000..f29c9e7
> --- /dev/null
> +++ b/src/kernels/cl_internal_fill_image_2d_array.cl
> @@ -0,0 +1,16 @@
> +kernel void __cl_fill_image_2d_array( __write_only image2d_array_t image, float4 pattern,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int origin0, unsigned int origin1, unsigned int origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int4 coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + coord.x = origin0 + i;
> + coord.y = origin1 + j;
> + coord.z = origin2 + k;
> + write_imagef(image, coord, pattern);
> +
> +}
> diff --git a/src/kernels/cl_internal_fill_image_3d.cl b/src/kernels/cl_internal_fill_image_3d.cl
> new file mode 100644
> index 0000000..042b8ab
> --- /dev/null
> +++ b/src/kernels/cl_internal_fill_image_3d.cl
> @@ -0,0 +1,16 @@
> +kernel void __cl_fill_image_3d( __write_only image3d_t image, float4 pattern,
> + unsigned int region0, unsigned int region1, unsigned int region2,
> + unsigned int origin0, unsigned int origin1, unsigned int origin2)
> +{
> + int i = get_global_id(0);
> + int j = get_global_id(1);
> + int k = get_global_id(2);
> + int4 coord;
> + if((i >= region0) || (j>= region1) || (k>=region2))
> + return;
> + coord.x = origin0 + i;
> + coord.y = origin1 + j;
> + coord.z = origin2 + k;
> + write_imagef(image, coord, pattern);
> +
> +}
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list