[Beignet] [PATCH 6/9 V3] Modify all event related functions using new event handle.
Yang, Rong R
rong.r.yang at intel.com
Wed Sep 28 08:22:53 UTC 2016
The patchset pushed, thanks, it make event much more clearly.
Utest profiling_exec failed because cl_event_get_timestamp haven't implemented, junyan will implement it later.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Monday, September 26, 2016 16:00
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH 6/9 V3] Modify all event related functions using
> new event handle.
>
> From: Junyan He <junyan.he at intel.com>
>
> Rewrite the cl_event, and modify all the event functions
> using this new event manner. Event will co-operate with
> command queue's thread together.
>
> v2:
> Fix a logic problem in event create failed.
>
> V3:
> Set enqueue default to do nothing, handle some enqueue has nothing
> to do.
>
> Signed-off-by: Junyan He <junyan.he at intel.com>
> ---
> src/CMakeLists.txt | 5 +
> src/cl_api.c | 1888 +---------------------------------------
> src/cl_api_kernel.c | 27 +-
> src/cl_command_queue.c | 98 ++-
> src/cl_command_queue.h | 7 +-
> src/cl_command_queue_enqueue.c | 8 +-
> src/cl_command_queue_gen7.c | 21 +-
> src/cl_enqueue.c | 502 ++++++-----
> src/cl_enqueue.h | 44 +-
> src/cl_event.c | 1067 ++++++++++-------------
> src/cl_event.h | 146 ++--
> src/cl_mem.c | 118 ++-
> src/cl_mem.h | 29 +-
> 13 files changed, 1074 insertions(+), 2886 deletions(-)
>
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index 009d057..626b43f 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -68,6 +68,10 @@ set(OPENCL_SRC
> ${KERNEL_STR_FILES}
> cl_base_object.c
> cl_api.c
> + cl_api_mem.c
> + cl_api_kernel.c
> + cl_api_command_queue.c
> + cl_api_event.c
> cl_alloc.c
> cl_kernel.c
> cl_program.c
> @@ -85,6 +89,7 @@ set(OPENCL_SRC
> cl_command_queue.c
> cl_command_queue.h
> cl_command_queue_gen7.c
> + cl_command_queue_enqueue.c
> cl_thread.c
> cl_driver.h
> cl_driver.cpp
> diff --git a/src/cl_api.c b/src/cl_api.c
> index a2fee15..f8c48de 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -67,92 +67,6 @@ typedef intptr_t cl_device_partition_property;
> return RET; \
> } while(0)
>
> -static inline cl_int
> -handle_events(cl_command_queue queue, cl_int num, const cl_event
> *wait_list,
> - cl_event* event, enqueue_data* data, cl_command_type type)
> -{
> - cl_int status = cl_event_wait_events(num, wait_list, queue);
> - cl_event e = NULL;
> - if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) {
> - e = cl_event_new(queue->ctx, queue, type, event!=NULL);
> -
> - /* if need profiling, add the submit timestamp here. */
> - if (e->type != CL_COMMAND_USER &&
> - e->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED);
> - cl_event_get_queued_cpu_timestamp(e);
> - }
> -
> - if(event != NULL)
> - *event = e;
> - if(status == CL_ENQUEUE_EXECUTE_DEFER) {
> - cl_event_new_enqueue_callback(e, data, num, wait_list);
> - }
> - }
> - set_current_event(queue, e);
> - return status;
> -}
> -
> -/* The following code checking overlap is from Appendix of openCL spec 1.1
> */
> -cl_bool check_copy_overlap(const size_t src_offset[3],
> - const size_t dst_offset[3],
> - const size_t region[3],
> - size_t row_pitch, size_t slice_pitch)
> -{
> - const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
> - const size_t src_max[] = {src_offset[0] + region[0],
> - src_offset[1] + region[1],
> - src_offset[2] + region[2]};
> - const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
> - const size_t dst_max[] = {dst_offset[0] + region[0],
> - dst_offset[1] + region[1],
> - dst_offset[2] + region[2]};
> - // Check for overlap
> - cl_bool overlap = CL_TRUE;
> - unsigned i;
> - size_t dst_start = dst_offset[2] * slice_pitch +
> - dst_offset[1] * row_pitch + dst_offset[0];
> - size_t dst_end = dst_start + (region[2] * slice_pitch +
> - region[1] * row_pitch + region[0]);
> - size_t src_start = src_offset[2] * slice_pitch +
> - src_offset[1] * row_pitch + src_offset[0];
> - size_t src_end = src_start + (region[2] * slice_pitch +
> - region[1] * row_pitch + region[0]);
> -
> - for (i=0; i != 3; ++i) {
> - overlap = overlap && (src_min[i] < dst_max[i])
> - && (src_max[i] > dst_min[i]);
> - }
> -
> - if (!overlap) {
> - size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ?
> - src_offset[0] + region[0] - row_pitch : 0;
> - size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ?
> - dst_offset[0] + region[0] - row_pitch : 0;
> - if ( (delta_src_x > 0 && delta_src_x > dst_offset[0]) ||
> - (delta_dst_x > 0 && delta_dst_x > src_offset[0]) ) {
> - if ( (src_start <= dst_start && dst_start < src_end) ||
> - (dst_start <= src_start && src_start < dst_end) )
> - overlap = CL_TRUE;
> - }
> - if (region[2] > 1) {
> - size_t src_height = slice_pitch / row_pitch;
> - size_t dst_height = slice_pitch / row_pitch;
> - size_t delta_src_y = (src_offset[1] + region[1] > src_height) ?
> - src_offset[1] + region[1] - src_height : 0;
> - size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ?
> - dst_offset[1] + region[1] - dst_height : 0;
> - if ( (delta_src_y > 0 && delta_src_y > dst_offset[1]) ||
> - (delta_dst_y > 0 && delta_dst_y > src_offset[1]) ) {
> - if ( (src_start <= dst_start && dst_start < src_end) ||
> - (dst_start <= src_start && src_start < dst_end) )
> - overlap = CL_TRUE;
> - }
> - }
> - }
> - return overlap;
> -}
> -
> static cl_int
> cl_check_device_type(cl_device_type device_type)
> {
> @@ -448,16 +362,6 @@ error:
> }
>
> cl_int
> -clReleaseCommandQueue(cl_command_queue command_queue)
> -{
> - cl_int err = CL_SUCCESS;
> - CHECK_QUEUE (command_queue);
> - cl_command_queue_delete(command_queue);
> -error:
> - return err;
> -}
> -
> -cl_int
> clGetCommandQueueInfo(cl_command_queue command_queue,
> cl_command_queue_info param_name,
> size_t param_value_size,
> @@ -1369,26 +1273,6 @@ clGetKernelSubGroupInfoKHR(cl_kernel
> kernel,
> }
>
> cl_int
> -clWaitForEvents(cl_uint num_events,
> - const cl_event * event_list)
> -{
> - cl_int err = CL_SUCCESS;
> - cl_context ctx = NULL;
> -
> - if(num_events > 0 && event_list)
> - ctx = event_list[0]->ctx;
> -
> - TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx);
> -
> - while(cl_event_wait_events(num_events, event_list, NULL) ==
> CL_ENQUEUE_EXECUTE_DEFER) {
> - usleep(8000); //sleep 8ms to wait other thread
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> clGetEventInfo(cl_event event,
> cl_event_info param_name,
> size_t param_value_size,
> @@ -1403,9 +1287,9 @@ clGetEventInfo(cl_event event,
> } else if (param_name == CL_EVENT_CONTEXT) {
> FILL_GETINFO_RET (cl_context, 1, &event->ctx, CL_SUCCESS);
> } else if (param_name == CL_EVENT_COMMAND_TYPE) {
> - FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS);
> + FILL_GETINFO_RET (cl_command_type, 1, &event->event_type,
> CL_SUCCESS);
> } else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) {
> - cl_event_update_status(event, 0);
> + cl_event_get_status(event);
> FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS);
> } else if (param_name == CL_EVENT_REFERENCE_COUNT) {
> cl_uint ref = CL_OBJECT_GET_REF(event);
> @@ -1419,22 +1303,6 @@ error:
>
> }
>
> -cl_event
> -clCreateUserEvent(cl_context context,
> - cl_int * errcode_ret)
> -{
> - cl_int err = CL_SUCCESS;
> - cl_event event = NULL;
> - CHECK_CONTEXT(context);
> -
> - TRY_ALLOC(event, cl_event_new(context, NULL, CL_COMMAND_USER,
> CL_TRUE));
> -
> -error:
> - if(errcode_ret)
> - *errcode_ret = err;
> - return event;
> -}
> -
> cl_int
> clRetainEvent(cl_event event)
> {
> @@ -1459,48 +1327,6 @@ error:
> return err;
> }
>
> -cl_int
> -clSetUserEventStatus(cl_event event,
> - cl_int execution_status)
> -{
> - cl_int err = CL_SUCCESS;
> -
> - CHECK_EVENT(event);
> - if(execution_status > CL_COMPLETE) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> - if(event->status != CL_SUBMITTED) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - cl_event_set_status(event, execution_status);
> -error:
> - return err;
> -}
> -
> -cl_int
> -clSetEventCallback(cl_event event,
> - cl_int command_exec_callback_type,
> - void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *),
> - void * user_data)
> -{
> - cl_int err = CL_SUCCESS;
> -
> - CHECK_EVENT(event);
> - if((pfn_notify == NULL) ||
> - (command_exec_callback_type > CL_SUBMITTED) ||
> - (command_exec_callback_type < CL_COMPLETE)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> - err = cl_event_set_callback(event, command_exec_callback_type,
> pfn_notify, user_data);
> -
> -error:
> - return err;
> -
> -}
>
> cl_int
> clGetEventProfilingInfo(cl_event event,
> @@ -1513,9 +1339,9 @@ clGetEventProfilingInfo(cl_event event,
> cl_ulong ret_val;
>
> CHECK_EVENT(event);
> - cl_event_update_status(event, 0);
> + //cl_event_update_status(event, 0);
>
> - if (event->type == CL_COMMAND_USER ||
> + if (event->event_type == CL_COMMAND_USER ||
> !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) ||
> event->status != CL_COMPLETE) {
> err = CL_PROFILING_INFO_NOT_AVAILABLE;
> @@ -1552,1712 +1378,6 @@ error:
> return err;
> }
>
> -cl_int
> -clFlush(cl_command_queue command_queue)
> -{
> - /* have nothing to do now, as currently
> - * clEnqueueNDRangeKernel will flush at
> - * the end of each calling. we may need
> - * to optimize it latter.*/
> - return 0;
> -}
> -
> -cl_int
> -clFinish(cl_command_queue command_queue)
> -{
> - cl_int err = CL_SUCCESS;
> -
> - CHECK_QUEUE (command_queue);
> -
> -#ifdef HAS_CMRT
> - if (command_queue->cmrt_event != NULL)
> - return cmrt_wait_for_task_finished(command_queue);
> -#endif
> -
> - err = cl_command_queue_finish(command_queue);
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueReadBuffer(cl_command_queue command_queue,
> - cl_mem buffer,
> - cl_bool blocking_read,
> - size_t offset,
> - size_t size,
> - void * ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, defer_enqueue_data = { 0 };
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(buffer);
> - if (command_queue->ctx != buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!ptr || !size || offset + size > buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> -
> - data = &defer_enqueue_data;
> - data->type = EnqueueReadBuffer;
> - data->mem_obj = buffer;
> - data->ptr = ptr;
> - data->offset = offset;
> - data->size = size;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_READ_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueReadBufferRect(cl_command_queue command_queue,
> - cl_mem buffer,
> - cl_bool blocking_read,
> - const size_t * buffer_origin,
> - const size_t * host_origin,
> - const size_t * region,
> - size_t buffer_row_pitch,
> - size_t buffer_slice_pitch,
> - size_t host_row_pitch,
> - size_t host_slice_pitch,
> - void * ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(buffer);
> -
> - if (command_queue->ctx != buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if(buffer_row_pitch == 0)
> - buffer_row_pitch = region[0];
> - if(buffer_slice_pitch == 0)
> - buffer_slice_pitch = region[1] * buffer_row_pitch;
> -
> - if(host_row_pitch == 0)
> - host_row_pitch = region[0];
> - if(host_slice_pitch == 0)
> - host_slice_pitch = region[1] * host_row_pitch;
> -
> - if (buffer_row_pitch < region[0] ||
> - host_row_pitch < region[0]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch %
> buffer_row_pitch != 0 ) ||
> - (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch %
> host_row_pitch != 0 )) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch
> - + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch
> - + buffer_origin[0] + region[0] > buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueReadBufferRect;
> - data->mem_obj = buffer;
> - data->ptr = ptr;
> - data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data-
> >origin[2] = buffer_origin[2];
> - data->host_origin[0] = host_origin[0]; data->host_origin[1] =
> host_origin[1]; data->host_origin[2] = host_origin[2];
> - data->region[0] = region[0]; data->region[1] = region[1]; data->region[2]
> = region[2];
> - data->row_pitch = buffer_row_pitch;
> - data->slice_pitch = buffer_slice_pitch;
> - data->host_row_pitch = host_row_pitch;
> - data->host_slice_pitch = host_slice_pitch;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_READ_BUFFER_RECT) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> - error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueWriteBuffer(cl_command_queue command_queue,
> - cl_mem buffer,
> - cl_bool blocking_write,
> - size_t offset,
> - size_t size,
> - const void * ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(buffer);
> - if (command_queue->ctx != buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!ptr || !size || offset + size > buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (buffer->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueWriteBuffer;
> - data->mem_obj = buffer;
> - data->const_ptr = ptr;
> - data->offset = offset;
> - data->size = size;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_WRITE_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> - error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueWriteBufferRect(cl_command_queue command_queue,
> - cl_mem buffer,
> - cl_bool blocking_write,
> - const size_t * buffer_origin,
> - const size_t * host_origin,
> - const size_t * region,
> - size_t buffer_row_pitch,
> - size_t buffer_slice_pitch,
> - size_t host_row_pitch,
> - size_t host_slice_pitch,
> - const void * ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(buffer);
> -
> - if (command_queue->ctx != buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (buffer->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if(buffer_row_pitch == 0)
> - buffer_row_pitch = region[0];
> - if(buffer_slice_pitch == 0)
> - buffer_slice_pitch = region[1] * buffer_row_pitch;
> -
> - if(host_row_pitch == 0)
> - host_row_pitch = region[0];
> - if(host_slice_pitch == 0)
> - host_slice_pitch = region[1] * host_row_pitch;
> -
> - if (buffer_row_pitch < region[0] ||
> - host_row_pitch < region[0]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch %
> buffer_row_pitch != 0 ) ||
> - (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch %
> host_row_pitch != 0 )) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch
> - + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch
> - + buffer_origin[0] + region[0] > buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueWriteBufferRect;
> - data->mem_obj = buffer;
> - data->const_ptr = ptr;
> - data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data-
> >origin[2] = buffer_origin[2];
> - data->host_origin[0] = host_origin[0]; data->host_origin[1] =
> host_origin[1]; data->host_origin[2] = host_origin[2];
> - data->region[0] = region[0]; data->region[1] = region[1]; data->region[2]
> = region[2];
> - data->row_pitch = buffer_row_pitch;
> - data->slice_pitch = buffer_slice_pitch;
> - data->host_row_pitch = host_row_pitch;
> - data->host_slice_pitch = host_slice_pitch;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_WRITE_BUFFER_RECT) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueFillImage(cl_command_queue command_queue,
> - cl_mem image,
> - const void * fill_color,
> - const size_t * porigin,
> - const size_t * pregion,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_IMAGE(image, src_image);
> - FIXUP_IMAGE_REGION(src_image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(src_image, porigin, origin);
> -
> - if (command_queue->ctx != image->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (fill_color == NULL) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] +
> region[1] > src_image->h || origin[2] + region[2] > src_image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] !=
> 0 || region[2] != 1)){
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] !=
> 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - err = cl_image_fill(command_queue, fill_color, src_image, origin, region);
> - if (err) {
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, image->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueFillImage;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_FILL_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image",
> "", command_queue);
> -
> - return 0;
> -
> - error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueFillBuffer(cl_command_queue command_queue,
> - cl_mem buffer,
> - const void * pattern,
> - size_t pattern_size,
> - size_t offset,
> - size_t size,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> - static size_t valid_sz[] = {1, 2, 4, 8, 16, 32, 64, 128};
> - int i = 0;
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(buffer);
> -
> - if (command_queue->ctx != buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (offset + size > buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (pattern == NULL) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - for (i = 0; i < sizeof(valid_sz) / sizeof(size_t); i++) {
> - if (valid_sz[i] == pattern_size)
> - break;
> - }
> - if (i == sizeof(valid_sz) / sizeof(size_t)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (offset % pattern_size || size % pattern_size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - err = cl_mem_fill(command_queue, pattern, pattern_size, buffer, offset,
> size);
> - if (err) {
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueFillBuffer;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_FILL_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel : cl_fill_buffer",
> "", command_queue);
> -
> - return 0;
> -
> - error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueCopyBuffer(cl_command_queue command_queue,
> - cl_mem src_buffer,
> - cl_mem dst_buffer,
> - size_t src_offset,
> - size_t dst_offset,
> - size_t cb,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(src_buffer);
> - CHECK_MEM(dst_buffer);
> -
> - if (command_queue->ctx != src_buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (command_queue->ctx != dst_buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (src_offset + cb > src_buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> - if (dst_offset + cb > dst_buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - /* Check overlap */
> - if (src_buffer == dst_buffer
> - && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1)
> - && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) {
> - err = CL_MEM_COPY_OVERLAP;
> - goto error;
> - }
> -
> - /* Check sub overlap */
> - if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type
> == CL_MEM_SUBBUFFER_TYPE ) {
> - struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer;
> - struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer;
> - size_t src_sub_offset = src_b->sub_offset;
> - size_t dst_sub_offset = dst_b->sub_offset;
> -
> - if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset
> - && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1)
> - && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset
> - && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb -
> 1)) {
> - err = CL_MEM_COPY_OVERLAP;
> - goto error;
> - }
> - }
> -
> - err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset,
> dst_offset, cb);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, src_buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueCopyBuffer;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_COPY_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel :
> cl_mem_copy", "", command_queue);
> -
> - return 0;
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueCopyBufferRect(cl_command_queue command_queue,
> - cl_mem src_buffer,
> - cl_mem dst_buffer,
> - const size_t * src_origin,
> - const size_t * dst_origin,
> - const size_t * region,
> - size_t src_row_pitch,
> - size_t src_slice_pitch,
> - size_t dst_row_pitch,
> - size_t dst_slice_pitch,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(src_buffer);
> - CHECK_MEM(dst_buffer);
> -
> - if ((command_queue->ctx != src_buffer->ctx) ||
> - (command_queue->ctx != dst_buffer->ctx)) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if(src_row_pitch == 0)
> - src_row_pitch = region[0];
> - if(src_slice_pitch == 0)
> - src_slice_pitch = region[1] * src_row_pitch;
> -
> - if(dst_row_pitch == 0)
> - dst_row_pitch = region[0];
> - if(dst_slice_pitch == 0)
> - dst_slice_pitch = region[1] * dst_row_pitch;
> -
> - if (src_row_pitch < region[0] ||
> - dst_row_pitch < region[0]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((src_slice_pitch < region[1] * src_row_pitch || src_slice_pitch %
> src_row_pitch != 0 ) ||
> - (dst_slice_pitch < region[1] * dst_row_pitch || dst_slice_pitch %
> dst_row_pitch != 0 )) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((src_origin[2] + region[2] - 1) * src_slice_pitch
> - + (src_origin[1] + region[1] - 1) * src_row_pitch
> - + src_origin[0] + region[0] > src_buffer->size
> - ||(dst_origin[2] + region[2] - 1) * dst_slice_pitch
> - + (dst_origin[1] + region[1] - 1) * dst_row_pitch
> - + dst_origin[0] + region[0] > dst_buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (src_buffer == dst_buffer && (src_row_pitch != dst_row_pitch ||
> src_slice_pitch != dst_slice_pitch)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (src_buffer == dst_buffer &&
> - check_copy_overlap(src_origin, dst_origin, region, src_row_pitch,
> src_slice_pitch)) {
> - err = CL_MEM_COPY_OVERLAP;
> - goto error;
> - }
> -
> - cl_mem_copy_buffer_rect(command_queue, src_buffer, dst_buffer,
> src_origin, dst_origin, region,
> - src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, src_buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueCopyBufferRect;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_COPY_BUFFER_RECT) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel :
> cl_mem_copy_buffer_rect", "", command_queue);
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueReadImage(cl_command_queue command_queue,
> - cl_mem mem,
> - cl_bool blocking_read,
> - const size_t * porigin,
> - const size_t * pregion,
> - size_t row_pitch,
> - size_t slice_pitch,
> - void * ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_IMAGE(mem, image);
> - FIXUP_IMAGE_REGION(image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(image, porigin, origin);
> - if (command_queue->ctx != mem->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!origin || !region || origin[0] + region[0] > image->w || origin[1] +
> region[1] > image->h || origin[2] + region[2] > image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!row_pitch)
> - row_pitch = image->bpp*region[0];
> - else if (row_pitch < image->bpp*region[0]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (image->slice_pitch) {
> - if (!slice_pitch)
> - slice_pitch = row_pitch*region[1];
> - else if (slice_pitch < row_pitch*region[1]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> - }
> - else if (slice_pitch) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!ptr) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (mem->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, mem->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueReadImage;
> - data->mem_obj = mem;
> - data->ptr = ptr;
> - data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] =
> origin[2];
> - data->region[0] = region[0]; data->region[1] = region[1]; data->region[2]
> = region[2];
> - data->row_pitch = row_pitch;
> - data->slice_pitch = slice_pitch;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_READ_IMAGE) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueWriteImage(cl_command_queue command_queue,
> - cl_mem mem,
> - cl_bool blocking_write,
> - const size_t * porigin,
> - const size_t * pregion,
> - size_t row_pitch,
> - size_t slice_pitch,
> - const void * ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_IMAGE(mem, image);
> - FIXUP_IMAGE_REGION(image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(image, porigin, origin);
> - if (command_queue->ctx != mem->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!origin || !region || origin[0] + region[0] > image->w || origin[1] +
> region[1] > image->h || origin[2] + region[2] > image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!row_pitch)
> - row_pitch = image->bpp*region[0];
> - else if (row_pitch < image->bpp*region[0]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (image->slice_pitch) {
> - if (!slice_pitch)
> - slice_pitch = row_pitch*region[1];
> - else if (slice_pitch < row_pitch*region[1]) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> - }
> - else if (slice_pitch) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!ptr) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (mem->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, mem->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueWriteImage;
> - data->mem_obj = mem;
> - data->const_ptr = ptr;
> - data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] =
> origin[2];
> - data->region[0] = region[0]; data->region[1] = region[1]; data->region[2]
> = region[2];
> - data->row_pitch = row_pitch;
> - data->slice_pitch = slice_pitch;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_WRITE_IMAGE) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueCopyImage(cl_command_queue command_queue,
> - cl_mem src_mem,
> - cl_mem dst_mem,
> - const size_t * psrc_origin,
> - const size_t * pdst_origin,
> - const size_t * pregion,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> - cl_bool overlap = CL_TRUE;
> - cl_int i = 0;
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_IMAGE(src_mem, src_image);
> - CHECK_IMAGE(dst_mem, dst_image);
> - FIXUP_IMAGE_REGION(src_image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin);
> - FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin);
> - if (command_queue->ctx != src_mem->ctx ||
> - command_queue->ctx != dst_mem->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (src_image->fmt.image_channel_order != dst_image-
> >fmt.image_channel_order ||
> - src_image->fmt.image_channel_data_type != dst_image-
> >fmt.image_channel_data_type) {
> - err = CL_IMAGE_FORMAT_MISMATCH;
> - goto error;
> - }
> -
> - if (!src_origin || !region || src_origin[0] + region[0] > src_image->w ||
> - src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] >
> src_image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w ||
> - dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] >
> dst_image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (src_origin[2] != 0 || region[2] != 1)) ||
> - (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (dst_origin[2] != 0 || region[2] != 1))) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (src_image == dst_image) {
> - for(i = 0; i < 3; i++)
> - overlap = overlap && (src_origin[i] < dst_origin[i] + region[i])
> - && (dst_origin[i] < src_origin[i] + region[i]);
> - if(overlap == CL_TRUE) {
> - err = CL_MEM_COPY_OVERLAP;
> - goto error;
> - }
> - }
> -
> - cl_mem_kernel_copy_image(command_queue, src_image, dst_image,
> src_origin, dst_origin, region);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, src_mem->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueCopyImage;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_COPY_IMAGE) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel :
> cl_mem_kernel_copy_image", "", command_queue);
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
> - cl_mem src_mem,
> - cl_mem dst_buffer,
> - const size_t * psrc_origin,
> - const size_t * pregion,
> - size_t dst_offset,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_IMAGE(src_mem, src_image);
> - CHECK_MEM(dst_buffer);
> - FIXUP_IMAGE_REGION(src_image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin);
> - if (command_queue->ctx != src_mem->ctx ||
> - command_queue->ctx != dst_buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp >
> dst_buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!src_origin || !region || src_origin[0] + region[0] > src_image->w ||
> - src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] >
> src_image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (src_origin[2] != 0 || region[2] != 1)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer,
> src_origin, dst_offset, region);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, src_mem->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueCopyImageToBuffer;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel :
> cl_mem_copy_image_to_buffer", "", command_queue);
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueCopyBufferToImage(cl_command_queue command_queue,
> - cl_mem src_buffer,
> - cl_mem dst_mem,
> - size_t src_offset,
> - const size_t * pdst_origin,
> - const size_t * pregion,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(src_buffer);
> - CHECK_IMAGE(dst_mem, dst_image);
> - FIXUP_IMAGE_REGION(dst_image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin);
> - if (command_queue->ctx != src_buffer->ctx ||
> - command_queue->ctx != dst_mem->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (src_offset + region[0]*region[1]*region[2]*dst_image->bpp >
> src_buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w ||
> - dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] >
> dst_image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (dst_origin[2] != 0 || region[2] != 1)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - cl_mem_copy_buffer_to_image(command_queue, src_buffer, dst_image,
> src_offset, dst_origin, region);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, dst_mem->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueCopyBufferToImage;
> - data->queue = command_queue;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> - if(b_output_kernel_perf)
> - time_end(command_queue->ctx, "beignet internal kernel :
> cl_mem_copy_buffer_to_image", "", command_queue);
> -
> -error:
> - return err;
> -}
> -
> -static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr,
> - size_t offset, size_t size,
> - const size_t *origin, const size_t *region)
> -{
> - cl_int slot = -1;
> - int err = CL_SUCCESS;
> - size_t sub_offset = 0;
> -
> - if(mem->type == CL_MEM_SUBBUFFER_TYPE) {
> - struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> - sub_offset = buffer->sub_offset;
> - }
> -
> - ptr = (char*)ptr + offset + sub_offset;
> - if(mem->flags & CL_MEM_USE_HOST_PTR) {
> - assert(mem->host_ptr);
> - //only calc ptr here, will do memcpy in enqueue
> - *mem_ptr = (char *)mem->host_ptr + offset + sub_offset;
> - } else {
> - *mem_ptr = ptr;
> - }
> - /* Record the mapped address. */
> - if (!mem->mapped_ptr_sz) {
> - mem->mapped_ptr_sz = 16;
> - mem->mapped_ptr = (cl_mapped_ptr *)malloc(
> - sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
> - if (!mem->mapped_ptr) {
> - cl_mem_unmap_auto(mem);
> - err = CL_OUT_OF_HOST_MEMORY;
> - goto error;
> - }
> - memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> - slot = 0;
> - } else {
> - int i = 0;
> - for (; i < mem->mapped_ptr_sz; i++) {
> - if (mem->mapped_ptr[i].ptr == NULL) {
> - slot = i;
> - break;
> - }
> - }
> - if (i == mem->mapped_ptr_sz) {
> - cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> - sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
> - if (!new_ptr) {
> - cl_mem_unmap_auto(mem);
> - err = CL_OUT_OF_HOST_MEMORY;
> - goto error;
> - }
> - memset(new_ptr, 0, 2 * mem->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> - memcpy(new_ptr, mem->mapped_ptr,
> - mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> - slot = mem->mapped_ptr_sz;
> - mem->mapped_ptr_sz *= 2;
> - free(mem->mapped_ptr);
> - mem->mapped_ptr = new_ptr;
> - }
> - }
> - assert(slot != -1);
> - mem->mapped_ptr[slot].ptr = *mem_ptr;
> - mem->mapped_ptr[slot].v_ptr = ptr;
> - mem->mapped_ptr[slot].size = size;
> - if(origin) {
> - assert(region);
> - mem->mapped_ptr[slot].origin[0] = origin[0];
> - mem->mapped_ptr[slot].origin[1] = origin[1];
> - mem->mapped_ptr[slot].origin[2] = origin[2];
> - mem->mapped_ptr[slot].region[0] = region[0];
> - mem->mapped_ptr[slot].region[1] = region[1];
> - mem->mapped_ptr[slot].region[2] = region[2];
> - }
> - mem->map_ref++;
> -error:
> - if (err != CL_SUCCESS)
> - *mem_ptr = NULL;
> - return err;
> -}
> -
> -void *
> -clEnqueueMapBuffer(cl_command_queue command_queue,
> - cl_mem buffer,
> - cl_bool blocking_map,
> - cl_map_flags map_flags,
> - size_t offset,
> - size_t size,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event,
> - cl_int * errcode_ret)
> -{
> - cl_int err = CL_SUCCESS;
> - void *ptr = NULL;
> - void *mem_ptr = NULL;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(buffer);
> - if (command_queue->ctx != buffer->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!size || offset + size > buffer->size) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((map_flags & CL_MAP_READ &&
> - buffer->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) ||
> - (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)
> &&
> - buffer->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)))
> - {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> -#ifdef HAS_CMRT
> - if (command_queue->cmrt_event != NULL)
> - cmrt_wait_for_task_finished(command_queue);
> -#endif
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueMapBuffer;
> - data->mem_obj = buffer;
> - data->offset = offset;
> - data->size = size;
> - data->ptr = ptr;
> - data->unsync_map = 1;
> - if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
> - data->write_map = 1;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_MAP_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - data->unsync_map = 0;
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if (err != CL_SUCCESS)
> - goto error;
> - ptr = data->ptr;
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - } else {
> - if (buffer->is_userptr)
> - ptr = buffer->host_ptr;
> - else {
> - if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
> - err = CL_MAP_FAILURE;
> - goto error;
> - }
> - }
> - }
> - err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL);
> - if (err != CL_SUCCESS)
> - goto error;
> -
> -error:
> - if (errcode_ret)
> - *errcode_ret = err;
> - return mem_ptr;
> -}
> -
> -void *
> -clEnqueueMapImage(cl_command_queue command_queue,
> - cl_mem mem,
> - cl_bool blocking_map,
> - cl_map_flags map_flags,
> - const size_t * porigin,
> - const size_t * pregion,
> - size_t * image_row_pitch,
> - size_t * image_slice_pitch,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event,
> - cl_int * errcode_ret)
> -{
> - cl_int err = CL_SUCCESS;
> - void *ptr = NULL;
> - void *mem_ptr = NULL;
> - size_t offset = 0;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_IMAGE(mem, image);
> - FIXUP_IMAGE_REGION(image, pregion, region);
> - FIXUP_IMAGE_ORIGIN(image, porigin, origin);
> - if (command_queue->ctx != mem->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - if (!origin || !region || origin[0] + region[0] > image->w || origin[1] +
> region[1] > image->h || origin[2] + region[2] > image->depth) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (!image_row_pitch || (image->slice_pitch && !image_slice_pitch)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if ((map_flags & CL_MAP_READ &&
> - mem->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) ||
> - (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)
> &&
> - mem->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)))
> - {
> - err = CL_INVALID_OPERATION;
> - goto error;
> - }
> -
> -#ifdef HAS_CMRT
> - if (command_queue->cmrt_event != NULL)
> - cmrt_wait_for_task_finished(command_queue);
> -#endif
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, mem->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueMapImage;
> - data->mem_obj = mem;
> - data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] =
> origin[2];
> - data->region[0] = region[0]; data->region[1] = region[1]; data->region[2]
> = region[2];
> - data->ptr = ptr;
> - data->unsync_map = 1;
> - if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
> - data->write_map = 1;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_MAP_IMAGE) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - data->unsync_map = 0;
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if (err != CL_SUCCESS)
> - goto error;
> - ptr = data->ptr;
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - } else {
> - if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
> - err = CL_MAP_FAILURE;
> - goto error;
> - }
> - }
> -
> - if(mem->flags & CL_MEM_USE_HOST_PTR) {
> - if (image_slice_pitch)
> - *image_slice_pitch = image->host_slice_pitch;
> - *image_row_pitch = image->host_row_pitch;
> -
> - offset = image->bpp*origin[0] + image->host_row_pitch*origin[1] +
> image->host_slice_pitch*origin[2];
> - } else {
> - if (image_slice_pitch)
> - *image_slice_pitch = image->slice_pitch;
> - if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> - *image_row_pitch = image->slice_pitch;
> - else
> - *image_row_pitch = image->row_pitch;
> -
> - offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image-
> >slice_pitch*origin[2];
> - }
> - err = _cl_map_mem(mem, ptr, &mem_ptr, offset, 0, origin, region);
> -
> -error:
> - if (errcode_ret)
> - *errcode_ret = err;
> - return mem_ptr; //TODO: map and unmap first
> -}
> -
> -cl_int
> -clEnqueueUnmapMemObject(cl_command_queue command_queue,
> - cl_mem memobj,
> - void * mapped_ptr,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_MEM(memobj);
> - if (command_queue->ctx != memobj->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, memobj->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueUnmapMemObject;
> - data->mem_obj = memobj;
> - data->ptr = mapped_ptr;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_UNMAP_MEM_OBJECT) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueMigrateMemObjects(cl_command_queue command_queue,
> - cl_uint num_mem_objects,
> - const cl_mem * mem_objects,
> - cl_mem_migration_flags flags,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - /* So far, we just support 1 device and no subdevice. So all the command
> queues
> - belong to the small context. There is no need to migrate the mem objects
> by now. */
> - cl_int err = CL_SUCCESS;
> - cl_uint i = 0;
> - enqueue_data *data, defer_enqueue_data = { 0 };
> -
> - if (!flags & CL_MIGRATE_MEM_OBJECT_HOST)
> - CHECK_QUEUE(command_queue);
> -
> - if (num_mem_objects == 0 || mem_objects == NULL) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - if (flags && flags & ~(CL_MIGRATE_MEM_OBJECT_HOST |
> - CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - for (i = 0; i < num_mem_objects; i++) {
> - CHECK_MEM(mem_objects[i]);
> - if (mem_objects[i]->ctx != command_queue->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> - }
> -
> - /* really nothing to do, fill the event. */
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, command_queue->ctx);
> - data = &defer_enqueue_data;
> - data->type = EnqueueMigrateMemObj;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_READ_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueNDRangeKernel(cl_command_queue command_queue,
> - cl_kernel kernel,
> - cl_uint work_dim,
> - const size_t * global_work_offset,
> - const size_t * global_work_size,
> - const size_t * local_work_size,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - size_t fixed_global_off[] = {0,0,0};
> - size_t fixed_global_sz[] = {1,1,1};
> - size_t fixed_local_sz[] = {1,1,1};
> - cl_int err = CL_SUCCESS;
> - cl_uint i;
> - enqueue_data *data, no_wait_data = { 0 };
> -
> - CHECK_QUEUE(command_queue);
> - CHECK_KERNEL(kernel);
> -
> - /* Check number of dimensions we have */
> - if (UNLIKELY(work_dim == 0 || work_dim > 3)) {
> - err = CL_INVALID_WORK_DIMENSION;
> - goto error;
> - }
> -
> - /* We need a work size per dimension */
> - if (UNLIKELY(global_work_size == NULL)) {
> - err = CL_INVALID_GLOBAL_WORK_SIZE;
> - goto error;
> - }
> -
> - if (kernel->vme) {
> - if (work_dim != 2) {
> - err = CL_INVALID_WORK_DIMENSION;
> - goto error;
> - }
> - if (local_work_size != NULL) {
> - err = CL_INVALID_WORK_GROUP_SIZE;
> - goto error;
> - }
> - }
> -
> - if (global_work_offset != NULL)
> - for (i = 0; i < work_dim; ++i) {
> - if (UNLIKELY(global_work_offset[i] + global_work_size[i] > (size_t)-1)) {
> - err = CL_INVALID_GLOBAL_OFFSET;
> - goto error;
> - }
> - }
> -
> - /* Local sizes must be non-null and divide global sizes */
> - if (local_work_size != NULL)
> - for (i = 0; i < work_dim; ++i)
> - if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] %
> local_work_size[i])) {
> - err = CL_INVALID_WORK_GROUP_SIZE;
> - goto error;
> - }
> -
> - /* Queue and kernel must share the same context */
> - assert(kernel->program);
> - if (command_queue->ctx != kernel->program->ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto error;
> - }
> -
> -#ifdef HAS_CMRT
> - if (kernel->cmrt_kernel != NULL) {
> - err = cmrt_enqueue(command_queue, kernel, global_work_size,
> local_work_size);
> - goto error;
> - }
> -#endif
> -
> - /* XXX No event right now */
> - //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported");
> - //FATAL_IF(event_wait_list != NULL, "Events are not supported");
> - //FATAL_IF(event != NULL, "Events are not supported");
> -
> - if (local_work_size != NULL) {
> - for (i = 0; i < work_dim; ++i)
> - fixed_local_sz[i] = local_work_size[i];
> - } else {
> - if (kernel->vme) {
> - fixed_local_sz[0] = 16;
> - fixed_local_sz[1] = 1;
> - } else {
> - uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256;
> //MAX_WORK_GROUP_SIZE may too large
> - size_t realGroupSize = 1;
> - for (i = 0; i< work_dim; i++) {
> - for (j = maxDimSize; j > 1; j--) {
> - if (global_work_size[i] % j == 0 && j <= maxGroupSize) {
> - fixed_local_sz[i] = j;
> - maxGroupSize = maxGroupSize /j;
> - maxDimSize = maxGroupSize > maxDimSize ? maxDimSize :
> maxGroupSize;
> - break; //choose next work_dim
> - }
> - }
> - realGroupSize *= fixed_local_sz[i];
> - }
> -
> - //in a loop of conformance test (such as test_api
> repeated_setup_cleanup), in each loop:
> - //create a new context, a new command queue, and uses
> 'globalsize[0]=1000, localsize=NULL' to enqueu kernel
> - //it triggers the following message for many times.
> - //to avoid too many messages, only print it for the first time of the
> process.
> - //just use static variable since it doesn't matter to print a few times at
> multi-thread case.
> - static int warn_no_good_localsize = 1;
> - if (realGroupSize % 8 != 0 && warn_no_good_localsize) {
> - warn_no_good_localsize = 0;
> - DEBUGP(DL_WARNING, "unable to find good values for
> local_work_size[i], please provide local_work_size[] explicitly, you can find
> good values with trial-and-error method.");
> - }
> - }
> - }
> -
> - if (kernel->vme) {
> - fixed_global_sz[0] = (global_work_size[0]+15) / 16 * 16;
> - fixed_global_sz[1] = (global_work_size[1]+15) / 16;
> - } else {
> - for (i = 0; i < work_dim; ++i)
> - fixed_global_sz[i] = global_work_size[i];
> - }
> - if (global_work_offset != NULL)
> - for (i = 0; i < work_dim; ++i)
> - fixed_global_off[i] = global_work_offset[i];
> -
> - if (kernel->compile_wg_sz[0] || kernel->compile_wg_sz[1] || kernel-
> >compile_wg_sz[2]) {
> - if (fixed_local_sz[0] != kernel->compile_wg_sz[0]
> - || fixed_local_sz[1] != kernel->compile_wg_sz[1]
> - || fixed_local_sz[2] != kernel->compile_wg_sz[2])
> - {
> - err = CL_INVALID_WORK_GROUP_SIZE;
> - goto error;
> - }
> - }
> -
> - /* Do device specific checks are enqueue the kernel */
> - err = cl_command_queue_ND_range(command_queue,
> - kernel,
> - work_dim,
> - fixed_global_off,
> - fixed_global_sz,
> - fixed_local_sz);
> - if(err != CL_SUCCESS)
> - goto error;
> -
> - data = &no_wait_data;
> - data->type = EnqueueNDRangeKernel;
> - data->queue = command_queue;
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, command_queue->ctx);
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_NDRANGE_KERNEL) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - if (event && (*event)->type != CL_COMMAND_USER
> - && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
> - }
> -
> - err = cl_command_queue_flush(command_queue);
> - }
> -
> -error:
> - if(b_output_kernel_perf)
> - {
> - if(kernel->program->build_opts != NULL)
> - time_end(command_queue->ctx, cl_kernel_get_name(kernel), kernel-
> >program->build_opts, command_queue);
> - else
> - time_end(command_queue->ctx, cl_kernel_get_name(kernel), "",
> command_queue);
> - }
> -
> - return err;
> -}
> -
> -cl_int
> -clEnqueueTask(cl_command_queue command_queue,
> - cl_kernel kernel,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - const size_t global_size[3] = {1, 0, 0};
> - const size_t local_size[3] = {1, 0, 0};
> -
> - return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
> global_size, local_size,
> - num_events_in_wait_list, event_wait_list, event);
> -}
> -
> -cl_int
> -clEnqueueNativeKernel(cl_command_queue command_queue,
> - void (*user_func)(void *),
> - void * args,
> - size_t cb_args,
> - cl_uint num_mem_objects,
> - const cl_mem * mem_list,
> - const void ** args_mem_loc,
> - cl_uint num_events_in_wait_list,
> - const cl_event * event_wait_list,
> - cl_event * event)
> -{
> - cl_int err = CL_SUCCESS;
> - void *new_args = NULL;
> - enqueue_data *data, no_wait_data = { 0 };
> - cl_int i;
> -
> - if(user_func == NULL ||
> - (args == NULL && cb_args > 0) ||
> - (args == NULL && num_mem_objects ==0) ||
> - (args != NULL && cb_args == 0) ||
> - (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc ==
> NULL)) ||
> - (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc !=
> NULL))) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - //Per spec, need copy args
> - if (cb_args)
> - {
> - new_args = malloc(cb_args);
> - if (!new_args)
> - {
> - err = CL_OUT_OF_HOST_MEMORY;
> - goto error;
> - }
> - memcpy(new_args, args, cb_args);
> -
> - for (i=0; i<num_mem_objects; ++i)
> - {
> - CHECK_MEM(mem_list[i]);
> - args_mem_loc[i] = new_args + (args_mem_loc[i] - args); //change to
> new args
> - }
> - }
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, command_queue->ctx);
> -
> - data = &no_wait_data;
> - data->type = EnqueueNativeKernel;
> - data->mem_list = mem_list;
> - data->ptr = new_args;
> - data->size = cb_args;
> - data->offset = (size_t)num_mem_objects;
> - data->const_ptr = args_mem_loc;
> - data->user_func = user_func;
> -
> - if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
> - event, data, CL_COMMAND_NATIVE_KERNEL) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> - err = cl_enqueue_handle(event ? *event : NULL, data);
> - if(event) cl_event_set_status(*event, CL_COMPLETE);
> - }
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueMarker(cl_command_queue command_queue,
> - cl_event *event)
> -{
> - cl_int err = CL_SUCCESS;
> - CHECK_QUEUE(command_queue);
> - if(event == NULL) {
> - err = CL_INVALID_VALUE;
> - goto error;
> - }
> -
> - cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
> - cl_uint num_events_in_wait_list,
> - const cl_event *event_wait_list,
> - cl_event *event)
> -{
> - cl_int err = CL_SUCCESS;
> - CHECK_QUEUE(command_queue);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, command_queue->ctx);
> -
> - cl_event_marker_with_wait_list(command_queue,
> num_events_in_wait_list, event_wait_list, event);
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueWaitForEvents(cl_command_queue command_queue,
> - cl_uint num_events,
> - const cl_event * event_list)
> -{
> - cl_int err = CL_SUCCESS;
> - CHECK_QUEUE(command_queue);
> - err = clWaitForEvents(num_events, event_list);
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueBarrier(cl_command_queue command_queue)
> -{
> - cl_int err = CL_SUCCESS;
> - CHECK_QUEUE(command_queue);
> -
> - cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL);
> -
> -error:
> - return err;
> -}
> -
> -cl_int
> -clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
> - cl_uint num_events_in_wait_list,
> - const cl_event *event_wait_list,
> - cl_event *event)
> -{
> - cl_int err = CL_SUCCESS;
> - CHECK_QUEUE(command_queue);
> -
> - TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, command_queue->ctx);
> -
> - cl_event_barrier_with_wait_list(command_queue,
> num_events_in_wait_list, event_wait_list, event);
> -error:
> - return err;
> -}
> -
> #define EXTFUNC(x) \
> if (strcmp(#x, func_name) == 0) \
> return (void *)x;
> diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
> index a1075d7..ef494e6 100644
> --- a/src/cl_api_kernel.c
> +++ b/src/cl_api_kernel.c
> @@ -130,10 +130,19 @@ clEnqueueNDRangeKernel(cl_command_queue
> command_queue,
> }
> realGroupSize *= fixed_local_sz[i];
> }
> - if (realGroupSize % 8 != 0)
> +
> + //in a loop of conformance test (such as test_api
> repeated_setup_cleanup), in each loop:
> + //create a new context, a new command queue, and uses
> 'globalsize[0]=1000, localsize=NULL' to enqueu kernel
> + //it triggers the following message for many times.
> + //to avoid too many messages, only print it for the first time of the
> process.
> + //just use static variable since it doesn't matter to print a few times at
> multi-thread case.
> + static int warn_no_good_localsize = 1;
> + if (realGroupSize % 8 != 0 && warn_no_good_localsize) {
> + warn_no_good_localsize = 0;
> DEBUGP(DL_WARNING, "unable to find good values for
> local_work_size[i], please provide\n"
> - " local_work_size[] explicitly, you can find good values
> with\n"
> - " trial-and-error method.");
> + " local_work_size[] explicitly, you can find good values with\n"
> + " trial-and-error method.");
> + }
> }
> }
>
> @@ -253,10 +262,10 @@ clEnqueueNativeKernel(cl_command_queue
> command_queue,
>
> //Per spec, need copy args
> if (cb_args) {
> - new_args = CL_MALLOC(cb_args);
> + new_args = cl_malloc(cb_args);
> if (num_mem_objects) {
> - new_args_mem_loc = CL_MALLOC(sizeof(void *) * num_mem_objects);
> - new_mem_list = CL_MALLOC(sizeof(cl_mem) * num_mem_objects);
> + new_args_mem_loc = cl_malloc(sizeof(void *) * num_mem_objects);
> + new_mem_list = cl_malloc(sizeof(cl_mem) * num_mem_objects);
> memcpy(new_mem_list, mem_list, sizeof(cl_mem) *
> num_mem_objects);
> }
>
> @@ -320,11 +329,11 @@ clEnqueueNativeKernel(cl_command_queue
> command_queue,
>
> if (err != CL_SUCCESS) {
> if (new_args)
> - CL_FREE(new_args);
> + cl_free(new_args);
> if (new_mem_list)
> - CL_FREE(new_mem_list);
> + cl_free(new_mem_list);
> if (new_args_mem_loc)
> - CL_FREE(new_args_mem_loc);
> + cl_free(new_args_mem_loc);
> }
>
> if (err == CL_SUCCESS && event) {
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 8d3c6b0..54a487c 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -45,14 +45,16 @@ cl_command_queue_new(cl_context ctx)
> assert(ctx);
> TRY_ALLOC_NO_ERR (queue, CALLOC(struct _cl_command_queue));
> CL_OBJECT_INIT_BASE(queue, CL_OBJECT_COMMAND_QUEUE_MAGIC);
> + cl_command_queue_init_enqueue(queue);
>
> - queue->cmrt_event = NULL;
> if ((queue->thread_data = cl_thread_data_create()) == NULL) {
> goto error;
> }
>
> /* Append the command queue in the list */
> cl_context_add_queue(ctx, queue);
> + queue->ctx = ctx;
> + queue->cmrt_event = NULL;
>
> exit:
> return queue;
> @@ -69,6 +71,8 @@ cl_command_queue_delete(cl_command_queue
> queue)
> if (CL_OBJECT_DEC_REF(queue) > 1)
> return;
>
> + cl_command_queue_destroy_enqueue(queue);
> +
> #ifdef HAS_CMRT
> if (queue->cmrt_event != NULL)
> cmrt_destroy_event(queue);
> @@ -76,7 +80,7 @@ cl_command_queue_delete(cl_command_queue
> queue)
>
> // If there is a list of valid events, we need to give them
> // a chance to call the call-back function.
> - cl_event_update_last_events(queue,1);
> + //cl_event_update_last_events(queue,1);
>
> cl_thread_data_destroy(queue);
> queue->thread_data = NULL;
> @@ -112,10 +116,9 @@ set_image_info(char *curbe,
> }
>
> LOCAL cl_int
> -cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
> +cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k,
> cl_gpgpu gpgpu)
> {
> uint32_t i;
> - GET_QUEUE_THREAD_GPGPU(queue);
>
> for (i = 0; i < k->image_sz; i++) {
> int id = k->images[i].arg_idx;
> @@ -149,9 +152,9 @@
> cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
> }
>
> LOCAL cl_int
> -cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
> +cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k,
> cl_gpgpu gpgpu)
> {
> - GET_QUEUE_THREAD_GPGPU(queue);
> + //GET_QUEUE_THREAD_GPGPU(queue);
>
> /* Bind all user buffers (given by clSetKernelArg) */
> uint32_t i;
> @@ -175,7 +178,8 @@
> cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
> return CL_SUCCESS;
> }
>
> -extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue,
> cl_kernel, uint32_t, const size_t *, const size_t *, const size_t *);
> +extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue,
> cl_kernel, cl_event,
> + uint32_t, const size_t *, const size_t *, const size_t *);
>
> static cl_int
> cl_kernel_check_args(cl_kernel k)
> @@ -190,6 +194,7 @@ cl_kernel_check_args(cl_kernel k)
> LOCAL cl_int
> cl_command_queue_ND_range(cl_command_queue queue,
> cl_kernel k,
> + cl_event event,
> const uint32_t work_dim,
> const size_t *global_wk_off,
> const size_t *global_wk_sz,
> @@ -203,8 +208,10 @@
> cl_command_queue_ND_range(cl_command_queue queue,
> /* Check that the user did not forget any argument */
> TRY (cl_kernel_check_args, k);
>
> +
> if (ver == 7 || ver == 75 || ver == 8 || ver == 9)
> - TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim,
> global_wk_off, global_wk_sz, local_wk_sz);
> + TRY (cl_command_queue_ND_range_gen7, queue, k, event,
> + work_dim, global_wk_off, global_wk_sz, local_wk_sz);
> else
> FATAL ("Unknown Gen Device");
>
> @@ -213,7 +220,7 @@ error:
> }
>
> LOCAL int
> -cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu
> gpgpu)
> +cl_command_queue_flush_gpgpu(cl_gpgpu gpgpu)
> {
> void* printf_info = cl_gpgpu_get_printf_info(gpgpu);
> void* profiling_info;
> @@ -246,15 +253,15 @@ cl_command_queue_flush(cl_command_queue
> queue)
> {
> int err;
> GET_QUEUE_THREAD_GPGPU(queue);
> - err = cl_command_queue_flush_gpgpu(queue, gpgpu);
> + err = cl_command_queue_flush_gpgpu(gpgpu);
> // We now keep a list of uncompleted events and check if they compelte
> // every flush. This can make sure all events created have chance to be
> // update status, so the callback functions or reference can be handled.
> - cl_event_update_last_events(queue,0);
> + //cl_event_update_last_events(queue,0);
>
> cl_event current_event = get_current_event(queue);
> if (current_event && err == CL_SUCCESS) {
> - err = cl_event_flush(current_event);
> + //err = cl_event_flush(current_event);
> set_current_event(queue, NULL);
> }
> cl_invalid_thread_gpgpu(queue);
> @@ -265,7 +272,7 @@ LOCAL cl_int
> cl_command_queue_finish(cl_command_queue queue)
> {
> cl_gpgpu_sync(cl_get_thread_batch_buf(queue));
> - cl_event_update_last_events(queue,1);
> + //cl_event_update_last_events(queue,1);
> return CL_SUCCESS;
> }
>
> @@ -337,72 +344,69 @@
> cl_command_queue_remove_event(cl_command_queue queue, cl_event
> event)
> queue->wait_events_num -= 1;
> }
>
> -#define DEFAULT_WAIT_EVENTS_SIZE 16
> LOCAL void
> cl_command_queue_insert_barrier_event(cl_command_queue queue,
> cl_event event)
> {
> - cl_int i=0;
> - cl_event *new_list;
> + cl_int i = 0;
> +
> + cl_event_add_ref(event);
>
> assert(queue != NULL);
> - if(queue->barrier_events == NULL) {
> - queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE;
> - TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event,
> queue->barrier_events_size));
> + CL_OBJECT_LOCK(queue);
> +
> + if (queue->barrier_events == NULL) {
> + queue->barrier_events_size = 4;
> + queue->barrier_events = cl_calloc(queue->barrier_events_size,
> sizeof(cl_event));
> + assert(queue->barrier_events);
> }
>
> - for(i=0; i<queue->barrier_events_num; i++) {
> - if(queue->barrier_events[i] == event)
> - return; //is in the barrier_events, need to insert
> + for (i = 0; i<queue->barrier_events_num; i++) {
> + assert(queue->barrier_events[i] != event);
> }
>
> if(queue->barrier_events_num < queue->barrier_events_size) {
> queue->barrier_events[queue->barrier_events_num++] = event;
> + CL_OBJECT_UNLOCK(queue);
> return;
> }
>
> - //barrier_events_num == barrier_events_size, array is full
> + /* Array is full, double expand. */
> queue->barrier_events_size *= 2;
> - TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue-
> >barrier_events_size));
> - memcpy(new_list, queue->barrier_events, sizeof(cl_event)*queue-
> >barrier_events_num);
> - cl_free(queue->barrier_events);
> - queue->barrier_events = new_list;
> - queue->barrier_events[queue->barrier_events_num++] = event;
> - return;
> + queue->barrier_events = cl_realloc(queue->barrier_events,
> + queue->barrier_events_size * sizeof(cl_event));
> + assert(queue->barrier_events);
>
> -exit:
> + queue->barrier_events[queue->barrier_events_num++] = event;
> + CL_OBJECT_UNLOCK(queue);
> return;
> -error:
> - if(queue->barrier_events)
> - cl_free(queue->barrier_events);
> - queue->barrier_events = NULL;
> - queue->barrier_events_size = 0;
> - queue->barrier_events_num = 0;
> - goto exit;
> -
> }
>
> LOCAL void
> cl_command_queue_remove_barrier_event(cl_command_queue queue,
> cl_event event)
> {
> - cl_int i=0;
> + cl_int i = 0;
> + assert(queue != NULL);
>
> - if(queue->barrier_events_num == 0)
> - return;
> + CL_OBJECT_LOCK(queue);
>
> - for(i=0; i<queue->barrier_events_num; i++) {
> + assert(queue->barrier_events_num > 0);
> + assert(queue->barrier_events);
> +
> + for(i = 0; i < queue->barrier_events_num; i++) {
> if(queue->barrier_events[i] == event)
> break;
> }
> + assert(i < queue->barrier_events_num); // Must find it.
>
> - if(i == queue->barrier_events_num)
> - return;
> -
> - if(i == queue->barrier_events_num - 1) {
> + if(i == queue->barrier_events_num - 1) { // The last one.
> queue->barrier_events[i] = NULL;
> } else {
> - for(; i<queue->barrier_events_num-1; i++) {
> + for(; i < queue->barrier_events_num - 1; i++) { // Move forward.
> queue->barrier_events[i] = queue->barrier_events[i+1];
> }
> }
> queue->barrier_events_num -= 1;
> + CL_OBJECT_UNLOCK(queue);
> +
> + cl_event_delete(event);
> }
> diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
> index 34886f8..470cafb 100644
> --- a/src/cl_command_queue.h
> +++ b/src/cl_command_queue.h
> @@ -81,6 +81,7 @@ extern void
> cl_command_queue_add_ref(cl_command_queue);
> /* Map ND range kernel from OCL API */
> extern cl_int cl_command_queue_ND_range(cl_command_queue queue,
> cl_kernel ker,
> + cl_event event,
> const uint32_t work_dim,
> const size_t *global_work_offset,
> const size_t *global_work_size,
> @@ -93,16 +94,16 @@ extern cl_int
> cl_command_queue_set_report_buffer(cl_command_queue, cl_mem);
> extern cl_int cl_command_queue_flush(cl_command_queue);
>
> /* Flush for the specified gpgpu */
> -extern int cl_command_queue_flush_gpgpu(cl_command_queue,
> cl_gpgpu);
> +extern int cl_command_queue_flush_gpgpu(cl_gpgpu);
>
> /* Wait for the completion of the command queue */
> extern cl_int cl_command_queue_finish(cl_command_queue);
>
> /* Bind all the surfaces in the GPGPU state */
> -extern cl_int cl_command_queue_bind_surface(cl_command_queue,
> cl_kernel);
> +extern cl_int cl_command_queue_bind_surface(cl_command_queue,
> cl_kernel, cl_gpgpu);
>
> /* Bind all the image surfaces in the GPGPU state */
> -extern cl_int cl_command_queue_bind_image(cl_command_queue,
> cl_kernel);
> +extern cl_int cl_command_queue_bind_image(cl_command_queue,
> cl_kernel, cl_gpgpu);
>
> /* Insert a user event to command's wait_events */
> extern void cl_command_queue_insert_event(cl_command_queue,
> cl_event);
> diff --git a/src/cl_command_queue_enqueue.c
> b/src/cl_command_queue_enqueue.c
> index 1848d50..7bc6dd3 100644
> --- a/src/cl_command_queue_enqueue.c
> +++ b/src/cl_command_queue_enqueue.c
> @@ -18,7 +18,7 @@
> */
>
> #include "cl_command_queue.h"
> -#include "cl_event_new.h"
> +#include "cl_event.h"
> #include "cl_alloc.h"
> #include <stdio.h>
>
> @@ -203,7 +203,7 @@
> cl_command_queue_record_in_queue_events(cl_command_queue queue,
> cl_uint *list_nu
> }
> assert(event_num > 0);
>
> - enqueued_list = CL_CALLOC(event_num, sizeof(cl_event));
> + enqueued_list = cl_calloc(event_num, sizeof(cl_event));
> assert(enqueued_list);
>
> i = 0;
> @@ -265,7 +265,7 @@ cl_command_queue_wait_flush(cl_command_queue
> queue)
> cl_event_delete(enqueued_list[i]);
> }
> if (enqueued_list)
> - CL_FREE(enqueued_list);
> + cl_free(enqueued_list);
>
> return CL_SUCCESS;
> }
> @@ -315,7 +315,7 @@
> cl_command_queue_wait_finish(cl_command_queue queue)
> cl_event_delete(enqueued_list[i]);
> }
> if (enqueued_list)
> - CL_FREE(enqueued_list);
> + cl_free(enqueued_list);
>
> return CL_SUCCESS;
> }
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c
> index b6a5920..5ad3b8b 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -23,6 +23,7 @@
> #include "cl_kernel.h"
> #include "cl_device_id.h"
> #include "cl_mem.h"
> +#include "cl_event.h"
> #include "cl_utils.h"
> #include "cl_alloc.h"
>
> @@ -123,12 +124,12 @@ error:
> }
>
> static int
> -cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker)
> +cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker,
> cl_gpgpu gpgpu)
> {
> /* calculate constant buffer size
> * we need raw_size & aligned_size
> */
> - GET_QUEUE_THREAD_GPGPU(queue);
> + //GET_QUEUE_THREAD_GPGPU(queue);
> int32_t arg;
> size_t offset = 0;
> uint32_t raw_size = 0, aligned_size =0;
> @@ -331,12 +332,14 @@ cl_alloc_printf(cl_gpgpu gpgpu, cl_kernel ker, void*
> printf_info, int printf_num
> LOCAL cl_int
> cl_command_queue_ND_range_gen7(cl_command_queue queue,
> cl_kernel ker,
> + cl_event event,
> const uint32_t work_dim,
> const size_t *global_wk_off,
> const size_t *global_wk_sz,
> const size_t *local_wk_sz)
> {
> - GET_QUEUE_THREAD_GPGPU(queue);
> + //GET_QUEUE_THREAD_GPGPU(queue);
> + cl_gpgpu gpgpu = cl_gpgpu_new(queue->ctx->drv);
> cl_context ctx = queue->ctx;
> char *final_curbe = NULL; /* Includes them and one sub-buffer per group
> */
> cl_gpgpu_kernel kernel;
> @@ -403,9 +406,9 @@
> cl_command_queue_ND_range_gen7(cl_command_queue queue,
> }
>
> /* Bind user buffers */
> - cl_command_queue_bind_surface(queue, ker);
> + cl_command_queue_bind_surface(queue, ker, gpgpu);
> /* Bind user images */
> - if(UNLIKELY(err = cl_command_queue_bind_image(queue, ker) !=
> CL_SUCCESS))
> + if(UNLIKELY(err = cl_command_queue_bind_image(queue, ker, gpgpu) !=
> CL_SUCCESS))
> return err;
> /* Bind all samplers */
> if (ker->vme)
> @@ -419,7 +422,7 @@
> cl_command_queue_ND_range_gen7(cl_command_queue queue,
> /* Bind a stack if needed */
> cl_bind_stack(gpgpu, ker);
>
> - if (cl_upload_constant_buffer(queue, ker) != 0)
> + if (cl_upload_constant_buffer(queue, ker, gpgpu) != 0)
> goto error;
>
> cl_gpgpu_states_setup(gpgpu, &kernel);
> @@ -440,7 +443,7 @@
> cl_command_queue_ND_range_gen7(cl_command_queue queue,
> batch_sz = cl_kernel_compute_batch_sz(ker);
> if (cl_gpgpu_batch_reset(gpgpu, batch_sz) != 0)
> goto error;
> - cl_set_thread_batch_buf(queue, cl_gpgpu_ref_batch_buf(gpgpu));
> + //cl_set_thread_batch_buf(queue, cl_gpgpu_ref_batch_buf(gpgpu));
> cl_gpgpu_batch_start(gpgpu);
>
> /* Issue the GPGPU_WALKER command */
> @@ -448,6 +451,10 @@
> cl_command_queue_ND_range_gen7(cl_command_queue queue,
>
> /* Close the batch buffer and submit it */
> cl_gpgpu_batch_end(gpgpu, 0);
> +
> + event->exec_data.gpgpu = gpgpu;
> + event->exec_data.type = EnqueueNDRangeKernel;
> +
> return CL_SUCCESS;
>
> error:
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index 54c0ffa..ac29ebe 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -16,93 +16,101 @@
> *
> * Author: Rong Yang <rong.r.yang at intel.com>
> */
> -#include <stdio.h>
> -#include <string.h>
> -#include <assert.h>
> -#include <pthread.h>
>
> +//#include "cl_image.h"
> #include "cl_enqueue.h"
> -#include "cl_image.h"
> #include "cl_driver.h"
> #include "cl_event.h"
> #include "cl_command_queue.h"
> #include "cl_utils.h"
> +#include "cl_alloc.h"
> +#include <stdio.h>
> +#include <string.h>
> +#include <assert.h>
> +#include <pthread.h>
>
> -
> -cl_int cl_enqueue_read_buffer(enqueue_data* data)
> +static cl_int
> +cl_enqueue_read_buffer(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> cl_mem mem = data->mem_obj;
> +
> + if (status != CL_COMPLETE)
> + return err;
> +
> assert(mem->type == CL_MEM_BUFFER_TYPE ||
> mem->type == CL_MEM_SUBBUFFER_TYPE);
> - struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> + struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
> //cl_buffer_get_subdata sometime is very very very slow in linux kernel, in
> skl and chv,
> //and it is randomly. So temporary disable it, use map/copy/unmap to read.
> //Should re-enable it after find root cause.
> if (0 && !mem->is_userptr) {
> if (cl_buffer_get_subdata(mem->bo, data->offset + buffer->sub_offset,
> - data->size, data->ptr) != 0)
> + data->size, data->ptr) != 0)
> err = CL_MAP_FAILURE;
> } else {
> - void* src_ptr = cl_mem_map_auto(mem, 0);
> + void *src_ptr = cl_mem_map_auto(mem, 0);
> if (src_ptr == NULL)
> err = CL_MAP_FAILURE;
> else {
> //sometimes, application invokes read buffer, instead of map buffer,
> even if userptr is enabled
> //memcpy is not necessary for this case
> - if (data->ptr != (char*)src_ptr + data->offset + buffer->sub_offset)
> - memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset,
> data->size);
> + if (data->ptr != (char *)src_ptr + data->offset + buffer->sub_offset)
> + memcpy(data->ptr, (char *)src_ptr + data->offset + buffer->sub_offset,
> data->size);
> cl_mem_unmap_auto(mem);
> }
> }
> return err;
> }
>
> -cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
> +static cl_int
> +cl_enqueue_read_buffer_rect(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> - void* src_ptr;
> - void* dst_ptr;
> + void *src_ptr;
> + void *dst_ptr;
>
> - const size_t* origin = data->origin;
> - const size_t* host_origin = data->host_origin;
> - const size_t* region = data->region;
> + const size_t *origin = data->origin;
> + const size_t *host_origin = data->host_origin;
> + const size_t *region = data->region;
>
> cl_mem mem = data->mem_obj;
> +
> + if (status != CL_COMPLETE)
> + return err;
> +
> assert(mem->type == CL_MEM_BUFFER_TYPE ||
> mem->type == CL_MEM_SUBBUFFER_TYPE);
> - struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> + struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
>
> if (!(src_ptr = cl_mem_map_auto(mem, 0))) {
> err = CL_MAP_FAILURE;
> goto error;
> }
>
> - size_t offset = origin[0] + data->row_pitch*origin[1] + data-
> >slice_pitch*origin[2];
> - src_ptr = (char*)src_ptr + offset + buffer->sub_offset;
> -
> - offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data-
> >host_slice_pitch*host_origin[2];
> - dst_ptr = (char *)data->ptr + offset;
> -
> - if (data->row_pitch == region[0] && data->row_pitch == data-
> >host_row_pitch &&
> - (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data-
> >slice_pitch == data->host_slice_pitch)))
> - {
> - memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] :
> data->slice_pitch*region[2]);
> - }
> - else {
> - cl_uint y, z;
> - for (z = 0; z < region[2]; z++) {
> - const char* src = src_ptr;
> - char* dst = dst_ptr;
> - for (y = 0; y < region[1]; y++) {
> - memcpy(dst, src, region[0]);
> - src += data->row_pitch;
> - dst += data->host_row_pitch;
> - }
> - src_ptr = (char*)src_ptr + data->slice_pitch;
> - dst_ptr = (char*)dst_ptr + data->host_slice_pitch;
> - }
> - }
> + size_t offset = origin[0] + data->row_pitch * origin[1] + data->slice_pitch *
> origin[2];
> + src_ptr = (char *)src_ptr + offset + buffer->sub_offset;
> +
> + offset = host_origin[0] + data->host_row_pitch * host_origin[1] + data-
> >host_slice_pitch * host_origin[2];
> + dst_ptr = (char *)data->ptr + offset;
> +
> + if (data->row_pitch == region[0] && data->row_pitch == data-
> >host_row_pitch &&
> + (region[2] == 1 || (data->slice_pitch == region[0] * region[1] && data-
> >slice_pitch == data->host_slice_pitch))) {
> + memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch * region[1] :
> data->slice_pitch * region[2]);
> + } else {
> + cl_uint y, z;
> + for (z = 0; z < region[2]; z++) {
> + const char *src = src_ptr;
> + char *dst = dst_ptr;
> + for (y = 0; y < region[1]; y++) {
> + memcpy(dst, src, region[0]);
> + src += data->row_pitch;
> + dst += data->host_row_pitch;
> + }
> + src_ptr = (char *)src_ptr + data->slice_pitch;
> + dst_ptr = (char *)dst_ptr + data->host_slice_pitch;
> + }
> + }
>
> err = cl_mem_unmap_auto(mem);
>
> @@ -110,75 +118,80 @@ error:
> return err;
> }
>
> -cl_int cl_enqueue_write_buffer(enqueue_data *data)
> +static cl_int
> +cl_enqueue_write_buffer(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> cl_mem mem = data->mem_obj;
> assert(mem->type == CL_MEM_BUFFER_TYPE ||
> mem->type == CL_MEM_SUBBUFFER_TYPE);
> - struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> + struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
> +
> + if (status != CL_COMPLETE)
> + return err;
>
> if (mem->is_userptr) {
> - void* dst_ptr = cl_mem_map_auto(mem, 1);
> + void *dst_ptr = cl_mem_map_auto(mem, 1);
> if (dst_ptr == NULL)
> err = CL_MAP_FAILURE;
> else {
> - memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data-
> >const_ptr, data->size);
> + memcpy((char *)dst_ptr + data->offset + buffer->sub_offset, data-
> >const_ptr, data->size);
> cl_mem_unmap_auto(mem);
> }
> - }
> - else {
> + } else {
> if (cl_buffer_subdata(mem->bo, data->offset + buffer->sub_offset,
> - data->size, data->const_ptr) != 0)
> + data->size, data->const_ptr) != 0)
> err = CL_MAP_FAILURE;
> }
>
> return err;
> }
>
> -cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
> +static cl_int
> +cl_enqueue_write_buffer_rect(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> - void* src_ptr;
> - void* dst_ptr;
> + void *src_ptr;
> + void *dst_ptr;
>
> - const size_t* origin = data->origin;
> - const size_t* host_origin = data->host_origin;
> - const size_t* region = data->region;
> + const size_t *origin = data->origin;
> + const size_t *host_origin = data->host_origin;
> + const size_t *region = data->region;
>
> cl_mem mem = data->mem_obj;
> assert(mem->type == CL_MEM_BUFFER_TYPE ||
> mem->type == CL_MEM_SUBBUFFER_TYPE);
> - struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> + struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
> +
> + if (status != CL_COMPLETE)
> + return err;
>
> if (!(dst_ptr = cl_mem_map_auto(mem, 1))) {
> err = CL_MAP_FAILURE;
> goto error;
> }
>
> - size_t offset = origin[0] + data->row_pitch*origin[1] + data-
> >slice_pitch*origin[2];
> + size_t offset = origin[0] + data->row_pitch * origin[1] + data->slice_pitch *
> origin[2];
> dst_ptr = (char *)dst_ptr + offset + buffer->sub_offset;
>
> - offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data-
> >host_slice_pitch*host_origin[2];
> - src_ptr = (char*)data->const_ptr + offset;
> + offset = host_origin[0] + data->host_row_pitch * host_origin[1] + data-
> >host_slice_pitch * host_origin[2];
> + src_ptr = (char *)data->const_ptr + offset;
>
> if (data->row_pitch == region[0] && data->row_pitch == data-
> >host_row_pitch &&
> - (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data-
> >slice_pitch == data->host_slice_pitch)))
> - {
> - memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] :
> data->slice_pitch*region[2]);
> - }
> - else {
> + (region[2] == 1 || (data->slice_pitch == region[0] * region[1] && data-
> >slice_pitch == data->host_slice_pitch))) {
> + memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch * region[1] :
> data->slice_pitch * region[2]);
> + } else {
> cl_uint y, z;
> for (z = 0; z < region[2]; z++) {
> - const char* src = src_ptr;
> - char* dst = dst_ptr;
> + const char *src = src_ptr;
> + char *dst = dst_ptr;
> for (y = 0; y < region[1]; y++) {
> memcpy(dst, src, region[0]);
> src += data->host_row_pitch;
> dst += data->row_pitch;
> }
> - src_ptr = (char*)src_ptr + data->host_slice_pitch;
> - dst_ptr = (char*)dst_ptr + data->slice_pitch;
> + src_ptr = (char *)src_ptr + data->host_slice_pitch;
> + dst_ptr = (char *)dst_ptr + data->slice_pitch;
> }
> }
>
> @@ -188,16 +201,19 @@ error:
> return err;
> }
>
> -
> -cl_int cl_enqueue_read_image(enqueue_data *data)
> +static cl_int
> +cl_enqueue_read_image(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> - void* src_ptr;
> + void *src_ptr;
>
> cl_mem mem = data->mem_obj;
> CHECK_IMAGE(mem, image);
> - const size_t* origin = data->origin;
> - const size_t* region = data->region;
> + const size_t *origin = data->origin;
> + const size_t *region = data->region;
> +
> + if (status != CL_COMPLETE)
> + return err;
>
> if (!(src_ptr = cl_mem_map_auto(mem, 0))) {
> err = CL_MAP_FAILURE;
> @@ -208,40 +224,42 @@ cl_int cl_enqueue_read_image(enqueue_data
> *data)
> src_ptr = (char*)src_ptr + offset;
>
> if (!origin[0] && region[0] == image->w && data->row_pitch == image-
> >row_pitch &&
> - (region[2] == 1 || (!origin[1] && region[1] == image->h && data-
> >slice_pitch == image->slice_pitch)))
> - {
> - memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] :
> data->slice_pitch*region[2]);
> - }
> - else {
> + (region[2] == 1 || (!origin[1] && region[1] == image->h && data-
> >slice_pitch == image->slice_pitch))) {
> + memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch * region[1] :
> data->slice_pitch * region[2]);
> + } else {
> cl_uint y, z;
> for (z = 0; z < region[2]; z++) {
> - const char* src = src_ptr;
> - char* dst = data->ptr;
> + const char *src = src_ptr;
> + char *dst = data->ptr;
> for (y = 0; y < region[1]; y++) {
> - memcpy(dst, src, image->bpp*region[0]);
> + memcpy(dst, src, image->bpp * region[0]);
> src += image->row_pitch;
> dst += data->row_pitch;
> }
> - src_ptr = (char*)src_ptr + image->slice_pitch;
> - data->ptr = (char*)data->ptr + data->slice_pitch;
> + src_ptr = (char *)src_ptr + image->slice_pitch;
> + data->ptr = (char *)data->ptr + data->slice_pitch;
> }
> }
>
> - err = cl_mem_unmap_auto(mem);
> + err = cl_mem_unmap_auto(mem);
>
> error:
> return err;
> -
> }
>
> -cl_int cl_enqueue_write_image(enqueue_data *data)
> +static cl_int
> +cl_enqueue_write_image(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> - void* dst_ptr;
> + void *dst_ptr;
>
> cl_mem mem = data->mem_obj;
> +
> CHECK_IMAGE(mem, image);
>
> + if (status != CL_COMPLETE)
> + return err;
> +
> if (!(dst_ptr = cl_mem_map_auto(mem, 1))) {
> err = CL_MAP_FAILURE;
> goto error;
> @@ -255,45 +273,57 @@ cl_int cl_enqueue_write_image(enqueue_data
> *data)
>
> error:
> return err;
> -
> }
>
> -cl_int cl_enqueue_map_buffer(enqueue_data *data)
> +static cl_int
> +cl_enqueue_map_buffer(enqueue_data *data, cl_int status)
> {
> void *ptr = NULL;
> cl_int err = CL_SUCCESS;
> cl_mem mem = data->mem_obj;
> assert(mem->type == CL_MEM_BUFFER_TYPE ||
> mem->type == CL_MEM_SUBBUFFER_TYPE);
> - struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> + struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
>
> - if (mem->is_userptr)
> - ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> - else {
> - if(data->unsync_map == 1)
> - //because using unsync map in clEnqueueMapBuffer, so force use
> map_gtt here
> - ptr = cl_mem_map_gtt(mem);
> - else
> + if (status == CL_SUBMITTED) {
> + if (buffer->base.is_userptr) {
> + ptr = buffer->base.host_ptr;
> + } else {
> + if ((ptr = cl_mem_map_gtt_unsync(&buffer->base)) == NULL) {
> + err = CL_MAP_FAILURE;
> + return err;
> + }
> + }
> + data->ptr = ptr;
> + } else if (status == CL_COMPLETE) {
> + if (mem->is_userptr)
> ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> - }
> + else {
> + if (data->unsync_map == 1)
> + //because using unsync map in clEnqueueMapBuffer, so force use
> map_gtt here
> + ptr = cl_mem_map_gtt(mem);
> + else
> + ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> + }
>
> - if (ptr == NULL) {
> - err = CL_MAP_FAILURE;
> - goto error;
> - }
> - data->ptr = ptr;
> + if (ptr == NULL) {
> + err = CL_MAP_FAILURE;
> + return err;
> + }
> + data->ptr = ptr;
>
> - if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
> - assert(mem->host_ptr);
> - ptr = (char*)ptr + data->offset + buffer->sub_offset;
> - memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data-
> >size);
> + if ((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
> + assert(mem->host_ptr);
> + ptr = (char *)ptr + data->offset + buffer->sub_offset;
> + memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data-
> >size);
> + }
> }
>
> -error:
> return err;
> }
>
> -cl_int cl_enqueue_map_image(enqueue_data *data)
> +static cl_int
> +cl_enqueue_map_image(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> cl_mem mem = data->mem_obj;
> @@ -301,46 +331,59 @@ cl_int cl_enqueue_map_image(enqueue_data
> *data)
> size_t row_pitch = 0;
> CHECK_IMAGE(mem, image);
>
> - if(data->unsync_map == 1)
> - //because using unsync map in clEnqueueMapBuffer, so force use
> map_gtt here
> - ptr = cl_mem_map_gtt(mem);
> - else
> - ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> + if (status == CL_SUBMITTED) {
> + if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
> + err = CL_MAP_FAILURE;
> + goto error;
> + }
> + data->ptr = ptr;
> + } else if (status == CL_COMPLETE) {
> + if (data->unsync_map == 1)
> + //because using unsync map in clEnqueueMapBuffer, so force use
> map_gtt here
> + ptr = cl_mem_map_gtt(mem);
> + else
> + ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> +
> + if (ptr == NULL) {
> + err = CL_MAP_FAILURE;
> + goto error;
> + }
>
> - if (ptr == NULL) {
> - err = CL_MAP_FAILURE;
> - goto error;
> - }
> - data->ptr = (char*)ptr + image->offset;
> - if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> - row_pitch = image->slice_pitch;
> - else
> - row_pitch = image->row_pitch;
> -
> - if(mem->flags & CL_MEM_USE_HOST_PTR) {
> - assert(mem->host_ptr);
> - if (!mem->is_userptr)
> - //src and dst need add offset in function cl_mem_copy_image_region
> - cl_mem_copy_image_region(data->origin, data->region,
> - mem->host_ptr, image->host_row_pitch, image-
> >host_slice_pitch,
> - data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE,
> CL_TRUE);
> + data->ptr = (char*)ptr + image->offset;
> + if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> + row_pitch = image->slice_pitch;
> + else
> + row_pitch = image->row_pitch;
> +
> + if(mem->flags & CL_MEM_USE_HOST_PTR) {
> + assert(mem->host_ptr);
> + if (!mem->is_userptr)
> + //src and dst need add offset in function cl_mem_copy_image_region
> + cl_mem_copy_image_region(data->origin, data->region,
> + mem->host_ptr, image->host_row_pitch, image-
> >host_slice_pitch,
> + data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE,
> CL_TRUE);
> + }
> }
>
> error:
> return err;
> }
>
> -cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
> +static cl_int
> +cl_enqueue_unmap_mem_object(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> int i, j;
> size_t mapped_size = 0;
> size_t origin[3], region[3];
> - void * v_ptr = NULL;
> - void * mapped_ptr = data->ptr;
> + void *v_ptr = NULL;
> + void *mapped_ptr = data->ptr;
> cl_mem memobj = data->mem_obj;
> size_t row_pitch = 0;
>
> + if (status != CL_COMPLETE)
> + return err;
> +
> assert(memobj->mapped_ptr_sz >= memobj->map_ref);
> INVALID_VALUE_IF(!mapped_ptr);
> for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> @@ -348,7 +391,7 @@ cl_int
> cl_enqueue_unmap_mem_object(enqueue_data *data)
> memobj->mapped_ptr[i].ptr = NULL;
> mapped_size = memobj->mapped_ptr[i].size;
> v_ptr = memobj->mapped_ptr[i].v_ptr;
> - for(j=0; j<3; j++) {
> + for (j = 0; j < 3; j++) {
> region[j] = memobj->mapped_ptr[i].region[j];
> origin[j] = memobj->mapped_ptr[i].origin[j];
> memobj->mapped_ptr[i].region[j] = 0;
> @@ -364,10 +407,10 @@ cl_int
> cl_enqueue_unmap_mem_object(enqueue_data *data)
> INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
>
> if (memobj->flags & CL_MEM_USE_HOST_PTR) {
> - if(memobj->type == CL_MEM_BUFFER_TYPE ||
> - memobj->type == CL_MEM_SUBBUFFER_TYPE) {
> + if (memobj->type == CL_MEM_BUFFER_TYPE ||
> + memobj->type == CL_MEM_SUBBUFFER_TYPE) {
> assert(mapped_ptr >= memobj->host_ptr &&
> - mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
> + mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
> /* Sync the data. */
> if (!memobj->is_userptr)
> memcpy(v_ptr, mapped_ptr, mapped_size);
> @@ -381,8 +424,8 @@ cl_int
> cl_enqueue_unmap_mem_object(enqueue_data *data)
> if (!memobj->is_userptr)
> //v_ptr have added offset, host_ptr have not added offset.
> cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image-
> >slice_pitch,
> - memobj->host_ptr, image->host_row_pitch, image-
> >host_slice_pitch,
> - image, CL_FALSE, CL_TRUE);
> + memobj->host_ptr, image->host_row_pitch, image-
> >host_slice_pitch,
> + image, CL_FALSE, CL_TRUE);
> }
> } else {
> assert(v_ptr == mapped_ptr);
> @@ -391,24 +434,24 @@ cl_int
> cl_enqueue_unmap_mem_object(enqueue_data *data)
> cl_mem_unmap_auto(memobj);
>
> /* shrink the mapped slot. */
> - if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> + if (memobj->mapped_ptr_sz / 2 > memobj->map_ref) {
> int j = 0;
> cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> - sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
> + sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz / 2));
> if (!new_ptr) {
> /* Just do nothing. */
> goto error;
> }
> - memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) *
> sizeof(cl_mapped_ptr));
> + memset(new_ptr, 0, (memobj->mapped_ptr_sz / 2) *
> sizeof(cl_mapped_ptr));
>
> for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> if (memobj->mapped_ptr[i].ptr) {
> new_ptr[j] = memobj->mapped_ptr[i];
> j++;
> - assert(j < memobj->mapped_ptr_sz/2);
> + assert(j < memobj->mapped_ptr_sz / 2);
> }
> }
> - memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
> + memobj->mapped_ptr_sz = memobj->mapped_ptr_sz / 2;
> free(memobj->mapped_ptr);
> memobj->mapped_ptr = new_ptr;
> }
> @@ -417,7 +460,8 @@ error:
> return err;
> }
>
> -cl_int cl_enqueue_native_kernel(enqueue_data *data)
> +static cl_int
> +cl_enqueue_native_kernel(enqueue_data *data, cl_int status)
> {
> cl_int err = CL_SUCCESS;
> cl_uint num_mem_objects = (cl_uint)data->offset;
> @@ -425,18 +469,19 @@ cl_int cl_enqueue_native_kernel(enqueue_data
> *data)
> const void **args_mem_loc = (const void **)data->const_ptr;
> cl_uint i;
>
> - for (i=0; i<num_mem_objects; ++i)
> - {
> - const cl_mem buffer = mem_list[i];
> - CHECK_MEM(buffer);
> + if (status != CL_COMPLETE)
> + return err;
> +
> + for (i = 0; i < num_mem_objects; ++i) {
> + const cl_mem buffer = mem_list[i];
> + CHECK_MEM(buffer);
>
> - *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer, 0);
> + *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer, 0);
> }
> data->user_func(data->ptr);
>
> - for (i=0; i<num_mem_objects; ++i)
> - {
> - cl_mem_unmap_auto(mem_list[i]);
> + for (i = 0; i < num_mem_objects; ++i) {
> + cl_mem_unmap_auto(mem_list[i]);
> }
>
> free(data->ptr);
> @@ -444,46 +489,115 @@ error:
> return err;
> }
>
> -cl_int cl_enqueue_handle(cl_event event, enqueue_data* data)
> +static cl_int
> +cl_enqueue_ndrange(enqueue_data *data, cl_int status)
> {
> - /* if need profiling, add the submit timestamp here. */
> - if (event && event->type != CL_COMMAND_USER
> - && event->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> - cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT);
> + cl_int err = CL_SUCCESS;
> +
> + if (status == CL_SUBMITTED) {
> + err = cl_command_queue_flush_gpgpu(data->gpgpu);
> + } else if (status == CL_COMPLETE) {
> + void *batch_buf = cl_gpgpu_ref_batch_buf(data->gpgpu);
> + cl_gpgpu_sync(batch_buf);
> + cl_gpgpu_unref_batch_buf(batch_buf);
> + /* Finished, we can release the gpgpu now. */
> + cl_gpgpu_delete(data->gpgpu);
> + data->gpgpu = NULL;
> }
>
> - switch(data->type) {
> - case EnqueueReadBuffer:
> - return cl_enqueue_read_buffer(data);
> - case EnqueueReadBufferRect:
> - return cl_enqueue_read_buffer_rect(data);
> - case EnqueueWriteBuffer:
> - return cl_enqueue_write_buffer(data);
> - case EnqueueWriteBufferRect:
> - return cl_enqueue_write_buffer_rect(data);
> - case EnqueueReadImage:
> - return cl_enqueue_read_image(data);
> - case EnqueueWriteImage:
> - return cl_enqueue_write_image(data);
> - case EnqueueMapBuffer:
> - return cl_enqueue_map_buffer(data);
> - case EnqueueMapImage:
> - return cl_enqueue_map_image(data);
> - case EnqueueUnmapMemObject:
> - return cl_enqueue_unmap_mem_object(data);
> - case EnqueueCopyBufferRect:
> - case EnqueueCopyBuffer:
> - case EnqueueCopyImage:
> - case EnqueueCopyBufferToImage:
> - case EnqueueCopyImageToBuffer:
> - case EnqueueNDRangeKernel:
> - case EnqueueFillBuffer:
> - case EnqueueFillImage:
> - return cl_event_flush(event);
> - case EnqueueNativeKernel:
> - return cl_enqueue_native_kernel(data);
> - case EnqueueMigrateMemObj:
> - default:
> - return CL_SUCCESS;
> + return err;
> +}
> +
> +static cl_int
> +cl_enqueue_marker_or_barrier(enqueue_data *data, cl_int status)
> +{
> + return CL_COMPLETE;
> +}
> +
> +LOCAL void
> +cl_enqueue_delete(enqueue_data *data)
> +{
> + if (data == NULL)
> + return;
> +
> + if (data->type == EnqueueCopyBufferRect ||
> + data->type == EnqueueCopyBuffer ||
> + data->type == EnqueueCopyImage ||
> + data->type == EnqueueCopyBufferToImage ||
> + data->type == EnqueueCopyImageToBuffer ||
> + data->type == EnqueueNDRangeKernel ||
> + data->type == EnqueueFillBuffer ||
> + data->type == EnqueueFillImage) {
> + if (data->gpgpu) {
> + cl_gpgpu_delete(data->gpgpu);
> + data->gpgpu = NULL;
> + }
> + return;
> + }
> +
> + if (data->type == EnqueueNativeKernel) {
> + if (data->mem_list) {
> + cl_free((void*)data->mem_list);
> + data->mem_list = NULL;
> + }
> + if (data->ptr) {
> + cl_free((void*)data->ptr);
> + data->ptr = NULL;
> + }
> + if (data->const_ptr) {
> + cl_free((void*)data->const_ptr);
> + data->const_ptr = NULL;
> + }
> + }
> +}
> +
> +LOCAL cl_int
> +cl_enqueue_handle(enqueue_data *data, cl_int status)
> +{
> + /* if need profiling, add the submit timestamp here. */
> + // if (event && event->event_type != CL_COMMAND_USER &&
> + // event->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> + // cl_event_get_timestamp(event,
> CL_PROFILING_COMMAND_SUBMIT);
> + // }
> +
> + switch (data->type) {
> + case EnqueueReturnSuccesss:
> + return CL_SUCCESS;
> + case EnqueueReadBuffer:
> + return cl_enqueue_read_buffer(data, status);
> + case EnqueueReadBufferRect:
> + return cl_enqueue_read_buffer_rect(data, status);
> + case EnqueueWriteBuffer:
> + return cl_enqueue_write_buffer(data, status);
> + case EnqueueWriteBufferRect:
> + return cl_enqueue_write_buffer_rect(data, status);
> + case EnqueueReadImage:
> + return cl_enqueue_read_image(data, status);
> + case EnqueueWriteImage:
> + return cl_enqueue_write_image(data, status);
> + case EnqueueMapBuffer:
> + return cl_enqueue_map_buffer(data, status);
> + case EnqueueMapImage:
> + return cl_enqueue_map_image(data, status);
> + case EnqueueUnmapMemObject:
> + return cl_enqueue_unmap_mem_object(data, status);
> + case EnqueueMarker:
> + case EnqueueBarrier:
> + return cl_enqueue_marker_or_barrier(data, status);
> + case EnqueueCopyBufferRect:
> + case EnqueueCopyBuffer:
> + case EnqueueCopyImage:
> + case EnqueueCopyBufferToImage:
> + case EnqueueCopyImageToBuffer:
> + case EnqueueNDRangeKernel:
> + case EnqueueFillBuffer:
> + case EnqueueFillImage:
> + //return cl_event_flush(event);
> + return cl_enqueue_ndrange(data, status);
> + case EnqueueNativeKernel:
> + return cl_enqueue_native_kernel(data, status);
> + case EnqueueMigrateMemObj:
> + default:
> + return CL_SUCCESS;
> }
> }
> diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
> index 09305af..f8fff9d 100644
> --- a/src/cl_enqueue.h
> +++ b/src/cl_enqueue.h
> @@ -24,7 +24,8 @@
> #include "CL/cl.h"
>
> typedef enum {
> - EnqueueReadBuffer = 0,
> + EnqueueReturnSuccesss = 0, /* For some case, we have nothing to do, just
> return SUCCESS. */
> + EnqueueReadBuffer,
> EnqueueReadBufferRect,
> EnqueueWriteBuffer,
> EnqueueWriteBufferRect,
> @@ -49,26 +50,29 @@ typedef enum {
> } enqueue_type;
>
> typedef struct _enqueue_data {
> - enqueue_type type; /* Command type */
> - cl_mem mem_obj; /* Enqueue's cl_mem */
> - cl_command_queue queue; /* Command queue */
> - size_t offset; /* Mem object's offset */
> - size_t size; /* Size */
> - size_t origin[3]; /* Origin */
> - size_t host_origin[3]; /* Origin */
> - size_t region[3]; /* Region */
> - size_t row_pitch; /* Row pitch */
> - size_t slice_pitch; /* Slice pitch */
> - size_t host_row_pitch; /* Host row pitch, used in read/write buffer
> rect */
> - size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer
> rect */
> - const void * const_ptr; /* Const ptr for memory read */
> - void * ptr; /* Ptr for write and return value */
> - const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */
> - uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image
> is unsync map */
> - uint8_t write_map; /* Indicate if the clEnqueueMapBuffer is write
> enable */
> - void (*user_func)(void *); /* pointer to a host-callable user function */
> + enqueue_type type; /* Command type */
> + cl_mem mem_obj; /* Enqueue's cl_mem */
> + cl_command_queue queue; /* Command queue */
> + size_t offset; /* Mem object's offset */
> + size_t size; /* Size */
> + size_t origin[3]; /* Origin */
> + size_t host_origin[3]; /* Origin */
> + size_t region[3]; /* Region */
> + size_t row_pitch; /* Row pitch */
> + size_t slice_pitch; /* Slice pitch */
> + size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect
> */
> + size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect
> */
> + const void *const_ptr; /* Const ptr for memory read */
> + void *ptr; /* Ptr for write and return value */
> + const cl_mem *mem_list; /* mem_list of clEnqueueNativeKernel */
> + uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is
> unsync map */
> + uint8_t write_map; /* Indicate if the clEnqueueMapBuffer is write
> enable */
> + void (*user_func)(void *); /* pointer to a host-callable user function */
> + cl_gpgpu gpgpu;
> } enqueue_data;
>
> /* Do real enqueue commands */
> -cl_int cl_enqueue_handle(cl_event event, enqueue_data* data);
> +extern cl_int cl_enqueue_handle(enqueue_data *data, cl_int status);
> +extern void cl_enqueue_delete(enqueue_data *data);
> +
> #endif /* __CL_ENQUEUE_H__ */
> diff --git a/src/cl_event.c b/src/cl_event.c
> index 6c7c2e0..4acd619 100644
> --- a/src/cl_event.c
> +++ b/src/cl_event.c
> @@ -14,750 +14,615 @@
> * You should have received a copy of the GNU Lesser General Public
> * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> *
> - * Author: Rong Yang <rong.r.yang at intel.com>
> */
>
> #include "cl_event.h"
> #include "cl_context.h"
> -#include "cl_utils.h"
> -#include "cl_alloc.h"
> -#include "cl_khr_icd.h"
> -#include "cl_kernel.h"
> #include "cl_command_queue.h"
> -
> -#include <assert.h>
> +#include "cl_alloc.h"
> +#include <string.h>
> #include <stdio.h>
>
> -void cl_event_update_last_events(cl_command_queue queue, int wait)
> +LOCAL cl_int
> +cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
> {
> - cl_event last_event = get_last_event(queue);
> - if(!last_event) return;
> - cl_event next, now;
> - now = last_event;
> - while(now){
> - next = now->last_next;//get next first in case set status maintain it
> - cl_event_update_status(now,wait);//update event status
> - now = next;
> - }
> + // TODO:
> + return CL_INVALID_VALUE;
> }
>
> -void cl_event_insert_last_events(cl_command_queue queue,cl_event
> event)
> +LOCAL cl_ulong
> +cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong
> end_timestamp)
> {
> - if(!event) return;
> - cl_event last_event = get_last_event(queue);
> - if(last_event){
> - cl_event now = last_event;
> - while(now->last_next)
> - now = now->last_next;
> - now->last_next = event;
> - event->last_prev = now;
> + cl_ulong ret_val;
> +
> + if (end_timestamp > start_timestamp) {
> + ret_val = end_timestamp - start_timestamp;
> + } else {
> + /*if start time stamp is greater than end timstamp then set ret value to
> max*/
> + ret_val = ((cl_ulong)1 << 32);
> }
> - else set_last_event(queue,event);
> +
> + return ret_val;
> }
>
> -static inline cl_bool
> -cl_event_is_gpu_command_type(cl_command_type type)
> +LOCAL cl_ulong
> +cl_event_get_start_timestamp(cl_event event)
> {
> - switch(type) {
> - case CL_COMMAND_COPY_BUFFER:
> - case CL_COMMAND_FILL_BUFFER:
> - case CL_COMMAND_COPY_IMAGE:
> - case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
> - case CL_COMMAND_COPY_BUFFER_TO_IMAGE:
> - case CL_COMMAND_COPY_BUFFER_RECT:
> - case CL_COMMAND_TASK:
> - case CL_COMMAND_NDRANGE_KERNEL:
> - return CL_TRUE;
> - default:
> - return CL_FALSE;
> - }
> + cl_ulong ret_val;
> +
> + ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event-
> >timestamp[2]);
> +
> + return ret_val;
> }
>
> -int cl_event_flush(cl_event event)
> +LOCAL cl_ulong
> +cl_event_get_end_timestamp(cl_event event)
> {
> - int err = CL_SUCCESS;
> - if(!event) {
> - err = CL_INVALID_VALUE;
> - return err;
> - }
> + cl_ulong ret_val;
>
> - assert(event->gpgpu_event != NULL);
> - if (event->gpgpu) {
> - err = cl_command_queue_flush_gpgpu(event->queue, event->gpgpu);
> - cl_gpgpu_delete(event->gpgpu);
> - event->gpgpu = NULL;
> - }
> - cl_gpgpu_event_flush(event->gpgpu_event);
> - cl_event_insert_last_events(event->queue,event);
> - return err;
> + ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event-
> >timestamp[3]);
> +
> + return ret_val;
> }
>
> -cl_event cl_event_new(cl_context ctx, cl_command_queue queue,
> cl_command_type type, cl_bool emplict)
> +LOCAL void
> +cl_event_add_ref(cl_event event)
> {
> - cl_event event = NULL;
> - GET_QUEUE_THREAD_GPGPU(queue);
> + assert(event);
> + CL_OBJECT_INC_REF(event);
> +}
>
> - /* Allocate and inialize the structure itself */
> - TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event));
> - CL_OBJECT_INIT_BASE(event, CL_OBJECT_EVENT_MAGIC);
> +LOCAL cl_int
> +cl_event_get_status(cl_event event)
> +{
> + cl_int ret;
> +
> + assert(event);
> + CL_OBJECT_LOCK(event);
> + ret = event->status;
> + CL_OBJECT_UNLOCK(event);
> + return ret;
> +}
> +
> +static cl_event
> +cl_event_new(cl_context ctx, cl_command_queue queue,
> cl_command_type type,
> + cl_uint num_events, cl_event *event_list)
> +{
> + cl_event e = cl_calloc(1, sizeof(_cl_event));
> + if (e == NULL)
> + return NULL;
> +
> + CL_OBJECT_INIT_BASE(e, CL_OBJECT_EVENT_MAGIC);
>
> /* Append the event in the context event list */
> - cl_context_add_event(ctx, event);
> -
> - /* Initialize all members and create GPGPU event object */
> - event->queue = queue;
> - event->type = type;
> - event->gpgpu_event = NULL;
> - if(type == CL_COMMAND_USER) {
> - event->status = CL_SUBMITTED;
> + cl_context_add_event(ctx, e);
> + e->queue = queue;
> +
> + list_init(&e->callbacks);
> + list_init(&e->enqueue_node);
> +
> + assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <=
> CL_COMMAND_FILL_IMAGE);
> + e->event_type = type;
> + if (type == CL_COMMAND_USER) {
> + e->status = CL_SUBMITTED;
> + } else {
> + e->status = CL_QUEUED;
> }
> - else {
> - event->status = CL_QUEUED;
> - if(cl_event_is_gpu_command_type(event->type))
> - event->gpgpu_event = cl_gpgpu_event_new(gpgpu);
> +
> + if (type == CL_COMMAND_USER) {
> + assert(queue == NULL);
> }
> - cl_event_add_ref(event); //dec when complete
> - event->user_cb = NULL;
> - event->enqueue_cb = NULL;
> - event->waits_head = NULL;
> - event->emplict = emplict;
> -
> -exit:
> - return event;
> -error:
> - cl_event_delete(event);
> - event = NULL;
> - goto exit;
> +
> + e->depend_events = event_list;
> + e->depend_event_num = num_events;
> + return e;
> }
>
> -void cl_event_delete(cl_event event)
> +LOCAL void
> +cl_event_delete(cl_event event)
> {
> + int i;
> + cl_event_user_callback cb;
> +
> if (UNLIKELY(event == NULL))
> return;
>
> - cl_event_update_status(event, 0);
> -
> if (CL_OBJECT_DEC_REF(event) > 1)
> return;
>
> - /* Call all user's callback if haven't execute */
> - cl_event_call_callback(event, CL_COMPLETE, CL_TRUE); // CL_COMPLETE
> status will force all callbacks that are not executed to run
> + cl_enqueue_delete(&event->exec_data);
>
> - /* delete gpgpu event object */
> - if(event->gpgpu_event)
> - cl_gpgpu_event_delete(event->gpgpu_event);
> + assert(list_empty(&event->enqueue_node));
>
> - /* Remove it from the list */
> - cl_context_remove_event(event->ctx, event);
> + if (event->depend_events) {
> + assert(event->depend_event_num);
> + for (i = 0; i < event->depend_event_num; i++) {
> + cl_event_delete(event->depend_events[i]);
> + }
> + cl_free(event->depend_events);
> + }
>
> - if (event->gpgpu) {
> - fprintf(stderr, "Warning: a event is deleted with a pending enqueued
> task.\n");
> - cl_gpgpu_delete(event->gpgpu);
> - event->gpgpu = NULL;
> + /* Free all the callbacks. Last ref, no need to lock. */
> + while (!list_empty(&event->callbacks)) {
> + cb = list_entry(event->callbacks.next, _cl_event_user_callback, node);
> + list_del(&cb->node);
> + cl_free(cb);
> }
>
> + /* Remove it from the list */
> + assert(event->ctx);
> + cl_context_remove_event(event->ctx, event);
> +
> CL_OBJECT_DESTROY_BASE(event);
> cl_free(event);
> }
>
> -void cl_event_add_ref(cl_event event)
> +LOCAL cl_event
> +cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint
> num_events,
> + const cl_event *event_list, cl_command_type type, cl_int
> *errcode_ret)
> {
> - assert(event);
> - CL_OBJECT_INC_REF(event);
> -}
> + cl_event e = NULL;
> + cl_event *depend_events = NULL;
> + cl_int err = CL_SUCCESS;
> + cl_uint total_events = 0;
> + int i;
>
> -cl_int cl_event_set_callback(cl_event event ,
> - cl_int command_exec_callback_type,
> - EVENT_NOTIFY pfn_notify,
> - void* user_data)
> -{
> - assert(event);
> - assert(pfn_notify);
> + assert(ctx);
>
> - cl_int err = CL_SUCCESS;
> - user_callback *cb;
> - TRY_ALLOC(cb, CALLOC(user_callback));
> -
> - cb->pfn_notify = pfn_notify;
> - cb->user_data = user_data;
> - cb->status = command_exec_callback_type;
> - cb->executed = CL_FALSE;
> -
> -
> - // It is possible that the event enqueued is already completed.
> - // clEnqueueReadBuffer can be synchronous and when the callback
> - // is registered after, it still needs to get executed.
> - CL_OBJECT_LOCK(event); // Thread safety required: operations on the
> event->status can be made from many different threads
> - if(event->status <= command_exec_callback_type) {
> - /* Call user callback */
> - CL_OBJECT_UNLOCK(event); // pfn_notify can call clFunctions that use
> the event_lock and from here it's not required
> - cb->pfn_notify(event, event->status, cb->user_data);
> - cl_free(cb);
> - } else {
> - // Enqueue to callback list
> - cb->next = event->user_cb;
> - event->user_cb = cb;
> - CL_OBJECT_UNLOCK(event);
> - }
> + do {
> + if (event_list)
> + assert(num_events);
>
> -exit:
> - return err;
> -error:
> - err = CL_OUT_OF_HOST_MEMORY;
> - cl_free(cb);
> - goto exit;
> -};
> -
> -cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list,
> - const cl_event *event_wait_list,
> - cl_event *event,cl_context ctx)
> -{
> - cl_int err = CL_SUCCESS;
> - cl_int i;
> - /* check the event_wait_list and num_events_in_wait_list */
> - if((event_wait_list == NULL) &&
> - (num_events_in_wait_list > 0))
> - goto error;
> -
> - if ((event_wait_list != NULL) &&
> - (num_events_in_wait_list == 0)){
> - goto error;
> - }
> + if (queue == NULL) {
> + assert(type == CL_COMMAND_USER);
> + assert(event_list == NULL);
> + assert(num_events == 0);
>
> - /* check the event and context */
> - for(i=0; i<num_events_in_wait_list; i++) {
> - CHECK_EVENT(event_wait_list[i]);
> - if(event_wait_list[i]->status < CL_COMPLETE) {
> - err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> - goto exit;
> - }
> - if(event && event == &event_wait_list[i])
> - goto error;
> - if(event_wait_list[i]->ctx != ctx) {
> - err = CL_INVALID_CONTEXT;
> - goto exit;
> - }
> - }
> + e = cl_event_new(ctx, queue, type, 0, NULL);
> + if (e == NULL) {
> + err = CL_OUT_OF_HOST_MEMORY;
> + break;
> + }
> + } else {
> + CL_OBJECT_LOCK(queue);
> + total_events = queue->barrier_events_num + num_events;
> +
> + if (total_events) {
> + depend_events = cl_calloc(total_events, sizeof(cl_event));
> + if (depend_events == NULL) {
> + CL_OBJECT_UNLOCK(queue);
> + err = CL_OUT_OF_HOST_MEMORY;
> + break;
> + }
> + }
>
> -exit:
> - return err;
> -error:
> - err = CL_INVALID_EVENT_WAIT_LIST; //reset error
> - goto exit;
> -}
> + /* Add all the barrier events as depend events. */
> + for (i = 0; i < queue->barrier_events_num; i++) {
> + assert(CL_EVENT_IS_BARRIER(queue->barrier_events[i]));
> + cl_event_add_ref(queue->barrier_events[i]);
> + depend_events[num_events + i] = queue->barrier_events[i];
> + }
>
> -cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const
> cl_event *event_wait_list,
> - cl_command_queue queue)
> -{
> - cl_int i;
> + CL_OBJECT_UNLOCK(queue);
>
> - /* Check whether wait user events */
> - for(i=0; i<num_events_in_wait_list; i++) {
> - if(event_wait_list[i]->status <= CL_COMPLETE)
> - continue;
> + for (i = 0; i < num_events; i++) {
> + assert(event_list[i]);
> + assert(event_list[i]->ctx == ctx);
> + assert(CL_OBJECT_IS_EVENT(event_list[i]));
> + cl_event_add_ref(event_list[i]);
> + depend_events[i] = event_list[i];
> + }
>
> - /* Need wait on user event, return and do enqueue defer */
> - if((event_wait_list[i]->type == CL_COMMAND_USER) ||
> - (event_wait_list[i]->enqueue_cb &&
> - (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){
> - return CL_ENQUEUE_EXECUTE_DEFER;
> - }
> - }
> + if (depend_events)
> + assert(total_events);
>
> - if(queue && queue->barrier_events_num )
> - return CL_ENQUEUE_EXECUTE_DEFER;
> + e = cl_event_new(ctx, queue, type, total_events, depend_events);
> + if (e == NULL) {
> + err = CL_OUT_OF_HOST_MEMORY;
> + break;
> + }
> + depend_events = NULL;
> + }
> + } while (0);
>
> - /* Non user events or all user event finished, wait all enqueue events
> finish */
> - for(i=0; i<num_events_in_wait_list; i++) {
> - if(event_wait_list[i]->status <= CL_COMPLETE)
> - continue;
> + if (err != CL_SUCCESS) {
> + if (depend_events) {
> + for (i = 0; i < total_events; i++) {
> + cl_event_delete(depend_events[i]);
> + }
> + cl_free(depend_events);
> + }
>
> - //enqueue callback haven't finish, in another thread, wait
> - if(event_wait_list[i]->enqueue_cb != NULL)
> - return CL_ENQUEUE_EXECUTE_DEFER;
> - if(event_wait_list[i]->gpgpu_event)
> - cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
> - cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's
> callback
> + // if set depend_events, must succeed.
> + assert(e->depend_events == NULL);
> + cl_event_delete(e);
> }
> - return CL_ENQUEUE_EXECUTE_IMM;
> +
> + if (errcode_ret)
> + *errcode_ret = err;
> +
> + return e;
> }
>
> -void cl_event_new_enqueue_callback(cl_event event,
> - enqueue_data *data,
> - cl_uint num_events_in_wait_list,
> - const cl_event *event_wait_list)
> +LOCAL cl_int
> +cl_event_set_callback(cl_event event, cl_int exec_type,
> cl_event_notify_cb pfn_notify, void *user_data)
> {
> - enqueue_callback *cb, *node;
> - user_event *user_events, *u_ev;
> - cl_command_queue queue = event ? event->queue : NULL;
> - cl_int i;
> cl_int err = CL_SUCCESS;
> + cl_event_user_callback cb;
> + cl_bool exec_imm = CL_FALSE;
>
> - /* Allocate and initialize the structure itself */
> - TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
> - cb->num_events = 0;
> - TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event,
> num_events_in_wait_list));
> - for(i=0; i<num_events_in_wait_list; i++) {
> - //user event will insert to cb->wait_user_events, need not in wait list,
> avoid ref twice
> - if(event_wait_list[i]->type != CL_COMMAND_USER) {
> - cb->wait_list[cb->num_events++] = event_wait_list[i];
> - cl_event_add_ref(event_wait_list[i]); //add defer enqueue's wait event
> reference
> - }
> - }
> - cb->event = event;
> - cb->next = NULL;
> - cb->wait_user_events = NULL;
> -
> - if(queue && queue->barrier_events_num > 0) {
> - for(i=0; i<queue->barrier_events_num; i++) {
> - /* Insert the enqueue_callback to user event list */
> - node = queue->wait_events[i]->waits_head;
> - if(node == NULL)
> - queue->wait_events[i]->waits_head = cb;
> - else{
> - while((node != cb) && node->next)
> - node = node->next;
> - if(node == cb) //wait on dup user event
> - continue;
> - node->next = cb;
> - }
> + assert(event);
> + assert(pfn_notify);
>
> - /* Insert the user event to enqueue_callback's wait_user_events */
> - TRY(cl_event_insert_user_event, &cb->wait_user_events, queue-
> >wait_events[i]);
> - cl_event_add_ref(queue->wait_events[i]);
> + do {
> + cb = cl_calloc(1, sizeof(_cl_event_user_callback));
> + if (cb == NULL) {
> + err = CL_OUT_OF_HOST_MEMORY;
> + break;
> }
> - }
>
> - /* Find out all user events that in event_wait_list wait */
> - for(i=0; i<num_events_in_wait_list; i++) {
> - if(event_wait_list[i]->status <= CL_COMPLETE)
> - continue;
> -
> - if(event_wait_list[i]->type == CL_COMMAND_USER) {
> - /* Insert the enqueue_callback to user event list */
> - node = event_wait_list[i]->waits_head;
> - if(node == NULL)
> - event_wait_list[i]->waits_head = cb;
> - else {
> - while((node != cb) && node->next)
> - node = node->next;
> - if(node == cb) //wait on dup user event
> - continue;
> - node->next = cb;
> - }
> - /* Insert the user event to enqueue_callback's wait_user_events */
> - TRY(cl_event_insert_user_event, &cb->wait_user_events,
> event_wait_list[i]);
> - cl_event_add_ref(event_wait_list[i]);
> - if(queue)
> - cl_command_queue_insert_event(queue, event_wait_list[i]);
> - if(queue && data->type == EnqueueBarrier){
> - cl_command_queue_insert_barrier_event(queue, event_wait_list[i]);
> - }
> - } else if(event_wait_list[i]->enqueue_cb != NULL) {
> - user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
> - while(user_events != NULL) {
> - /* Insert the enqueue_callback to user event's waits_tail */
> - node = user_events->event->waits_head;
> - if(node == NULL)
> - event_wait_list[i]->waits_head = cb;
> - else{
> - while((node != cb) && node->next)
> - node = node->next;
> - if(node == cb) { //wait on dup user event
> - user_events = user_events->next;
> - continue;
> - }
> - node->next = cb;
> - }
> -
> - /* Insert the user event to enqueue_callback's wait_user_events */
> - TRY(cl_event_insert_user_event, &cb->wait_user_events,
> user_events->event);
> - cl_event_add_ref(user_events->event);
> - if(queue)
> - cl_command_queue_insert_event(event->queue, user_events-
> >event);
> - if(queue && data->type == EnqueueBarrier){
> - cl_command_queue_insert_barrier_event(event->queue,
> user_events->event);
> - }
> - user_events = user_events->next;
> - }
> - }
> - }
> - if(event != NULL && event->queue != NULL && event->gpgpu_event !=
> NULL) {
> - event->gpgpu = cl_thread_gpgpu_take(event->queue);
> - data->ptr = (void *)event->gpgpu_event;
> - }
> - cb->data = *data;
> - if(event)
> - event->enqueue_cb = cb;
> -
> -exit:
> - return;
> -error:
> - if(cb) {
> - while(cb->wait_user_events) {
> - u_ev = cb->wait_user_events;
> - cb->wait_user_events = cb->wait_user_events->next;
> - cl_event_delete(u_ev->event);
> - cl_free(u_ev);
> + list_init(&cb->node);
> + cb->pfn_notify = pfn_notify;
> + cb->user_data = user_data;
> + cb->status = exec_type;
> + cb->executed = CL_FALSE;
> +
> + CL_OBJECT_LOCK(event);
> + if (event->status > exec_type) {
> + list_add_tail(&cb->node, &event->callbacks);
> + cb = NULL;
> + } else {
> + /* The state has already OK, call it immediately. */
> + exec_imm = CL_TRUE;
> }
> - for(i=0; i<cb->num_events; i++) {
> - if(cb->wait_list[i]) {
> - cl_event_delete(cb->wait_list[i]);
> - }
> - }
> - cl_free(cb);
> - }
> - goto exit;
> -}
> + CL_OBJECT_UNLOCK(event);
>
> -void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb) {
> - user_callback *user_cb = NULL;
> - user_callback *queue_cb = NULL; // For thread safety, we create a queue
> that holds user_callback's pfn_notify contents
> - user_callback *temp_cb = NULL;
> - user_cb = event->user_cb;
> - CL_OBJECT_LOCK(event);
> - while(user_cb) {
> - if(user_cb->status >= status
> - && user_cb->executed == CL_FALSE) { // Added check to not execute a
> callback when it was already handled
> - user_cb->executed = CL_TRUE;
> - temp_cb = cl_malloc(sizeof(user_callback));
> - if(!temp_cb) {
> - break; // Out of memory
> - }
> - temp_cb->pfn_notify = user_cb->pfn_notify; // Minor struct copy to call
> ppfn_notify out of the pthread_mutex
> - temp_cb->user_data = user_cb->user_data;
> - if(free_cb) {
> - cl_free(user_cb);
> - }
> - if(!queue_cb) {
> - queue_cb = temp_cb;
> - queue_cb->next = NULL;
> - } else { // Enqueue First
> - temp_cb->next = queue_cb;
> - queue_cb = temp_cb;
> - }
> + if (exec_imm) {
> + cb->pfn_notify(event, event->status, cb->user_data);
> }
> - user_cb = user_cb->next;
> - }
> - CL_OBJECT_UNLOCK(event);
>
> - // Calling the callbacks outside of the event_lock is required because the
> callback can call cl_api functions and get deadlocked
> - while(queue_cb) { // For each callback queued, actually execute the
> callback
> - queue_cb->pfn_notify(event, event->status, queue_cb->user_data);
> - temp_cb = queue_cb;
> - queue_cb = queue_cb->next;
> - cl_free(temp_cb);
> - }
> + } while (0);
> +
> + if (cb)
> + cl_free(cb);
> +
> + return err;
> }
>
> -void cl_event_set_status(cl_event event, cl_int status)
> +LOCAL cl_int
> +cl_event_set_status(cl_event event, cl_int status)
> {
> - cl_int ret, i;
> - cl_event evt;
> + list_head tmp_callbacks;
> + list_head *n;
> + list_head *pos;
> + cl_bool notify_queue = CL_FALSE;
> + cl_event_user_callback cb;
> +
> + assert(event);
>
> CL_OBJECT_LOCK(event);
> - if(status >= event->status) {
> + if (event->status <= CL_COMPLETE) { // Already set to error or completed
> CL_OBJECT_UNLOCK(event);
> - return;
> + return CL_INVALID_OPERATION;
> }
> - if(event->status <= CL_COMPLETE) {
> - event->status = status; //have done enqueue before or doing in another
> thread
> - CL_OBJECT_UNLOCK(event);
> - return;
> +
> + if (CL_EVENT_IS_USER(event)) {
> + assert(event->status != CL_RUNNING && event->status != CL_QUEUED);
> + } else {
> + assert(event->queue); // Must belong to some queue.
> }
>
> - if(status <= CL_COMPLETE) {
> - if(event->enqueue_cb) {
> - if(status == CL_COMPLETE) {
> - cl_enqueue_handle(event, &event->enqueue_cb->data);
> - if(event->gpgpu_event)
> - cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set
> complet, need refine
> - } else {
> - if(event->gpgpu_event) {
> - // Error then cancel the enqueued event.
> - cl_gpgpu_delete(event->gpgpu);
> - event->gpgpu = NULL;
> - }
> - }
> + if (status >= event->status) { // Should never go back.
> + CL_OBJECT_UNLOCK(event);
> + return CL_INVALID_OPERATION;
> + }
>
> - event->status = status; //Change the event status after enqueue and
> befor unlock
> + event->status = status;
>
> + /* Call all the callbacks. */
> + if (!list_empty(&event->callbacks)) {
> + do {
> + status = event->status;
> + list_init(&tmp_callbacks);
> + list_replace(&event->callbacks, &tmp_callbacks);
> + list_init(&event->callbacks);
> + /* Call all the callbacks without lock. */
> CL_OBJECT_UNLOCK(event);
> - for(i=0; i<event->enqueue_cb->num_events; i++)
> - cl_event_delete(event->enqueue_cb->wait_list[i]);
> +
> + list_for_each_safe(pos, n, &tmp_callbacks)
> + {
> + cb = list_entry(pos, _cl_event_user_callback, node);
> +
> + assert(cb->executed == CL_FALSE);
> +
> + if (cb->status < status)
> + continue;
> +
> + list_del(&cb->node);
> + cb->executed = CL_TRUE;
> + cb->pfn_notify(event, status, cb->user_data);
> + cl_free(cb);
> + }
> +
> CL_OBJECT_LOCK(event);
>
> - if(event->enqueue_cb->wait_list)
> - cl_free(event->enqueue_cb->wait_list);
> - cl_free(event->enqueue_cb);
> - event->enqueue_cb = NULL;
> - }
> + // Set back the uncalled callbacks.
> + list_splice_tail(&tmp_callbacks, &event->callbacks);
> +
> + /* Status may changed because we unlock. need to check again. */
> + } while (status != event->status);
> }
> - if(event->status >= status) //maybe changed in other threads
> - event->status = status;
> +
> + /* Wakeup all the waiter for status change. */
> + CL_OBJECT_NOTIFY_COND(event);
> +
> + if (event->status <= CL_COMPLETE) {
> + notify_queue = CL_TRUE;
> + }
> +
> CL_OBJECT_UNLOCK(event);
>
> - /* Call user callback */
> - cl_event_call_callback(event, status, CL_FALSE);
> + /* Need to notify all the command queue within the same context. */
> + if (notify_queue) {
> + cl_command_queue *q_list = NULL;
> + cl_uint queue_num = 0;
> + int i = 0;
> + int cookie = 0;
> +
> + /*First, we need to remove it from queue's barrier list. */
> + if (CL_EVENT_IS_BARRIER(event)) {
> + assert(event->queue);
> + cl_command_queue_remove_barrier_event(event->queue, event);
> + }
>
> - if(event->type == CL_COMMAND_USER) {
> - /* Check all defer enqueue */
> - enqueue_callback *cb, *enqueue_cb = event->waits_head;
> - while(enqueue_cb) {
> - /* Remove this user event in enqueue_cb, update the header if needed.
> */
> - cl_event_remove_user_event(&enqueue_cb->wait_user_events,
> event);
> - cl_event_delete(event);
> + /* Then, notify all the queues within the same context. */
> + CL_OBJECT_LOCK(event->ctx);
> + do {
> + queue_num = event->ctx->queue_num;
> + cookie = event->ctx->queue_cookie;
> +
> + if (queue_num > 0) {
> + q_list = cl_calloc(queue_num, sizeof(cl_command_queue));
> + assert(q_list);
> + i = 0;
> + list_for_each(pos, &event->ctx->queues)
> + {
> + q_list[i] = (cl_command_queue)(list_entry(pos, _cl_base_object,
> node));
> + assert(i < queue_num);
> + i++;
> + }
>
> - /* Still wait on other user events */
> - if(enqueue_cb->wait_user_events != NULL) {
> - enqueue_cb = enqueue_cb->next;
> - continue;
> - }
> + CL_OBJECT_UNLOCK(event->ctx); // Update status without context
> lock.
>
> - //remove user event frome enqueue_cb's ctx
> - cl_command_queue_remove_event(enqueue_cb->event->queue,
> event);
> - cl_command_queue_remove_barrier_event(enqueue_cb->event-
> >queue, event);
> -
> - /* All user events complete, now wait enqueue events */
> - ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb-
> >wait_list,
> - enqueue_cb->event->queue);
> - assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
> - ret = ~ret;
> - cb = enqueue_cb;
> - enqueue_cb = enqueue_cb->next;
> -
> - /* Call the pending operation */
> - evt = cb->event;
> - /* TODO: if this event wait on several events, one event's
> - status is error, the others is complete, what's the status
> - of this event? Can't find the description in OpenCL spec.
> - Simply update to latest finish wait event.*/
> - cl_event_set_status(cb->event, status);
> - if(evt->emplict == CL_FALSE) {
> - cl_event_delete(evt);
> + for (i = 0; i < queue_num; i++) {
> + cl_command_queue_notify(q_list[i]);
> + }
> +
> + CL_OBJECT_LOCK(event->ctx); // Lock again.
> + } else {
> + /* No queue? Just do nothing. */
> }
> - }
> - event->waits_head = NULL;
> - }
>
> - if(event->status <= CL_COMPLETE){
> - /* Maintain the last_list when event completed*/
> - if (event->last_prev)
> - event->last_prev->last_next = event->last_next;
> - if (event->last_next)
> - event->last_next->last_prev = event->last_prev;
> - if(event->queue && get_last_event(event->queue) == event)
> - set_last_event(event->queue, event->last_next);
> - event->last_prev = NULL;
> - event->last_next = NULL;
> - cl_event_delete(event);
> + } while (cookie != event->ctx->queue_cookie); // Some queue may be
> added when we unlock.
> + CL_OBJECT_UNLOCK(event->ctx);
> +
> + if (q_list)
> + cl_free(q_list);
> }
> +
> + return CL_SUCCESS;
> }
>
> -void cl_event_update_status(cl_event event, int wait)
> +LOCAL cl_int
> +cl_event_wait_for_event_ready(const cl_event event)
> {
> - if(event->status <= CL_COMPLETE)
> - return;
> - if((event->gpgpu_event) &&
> - (cl_gpgpu_event_update_status(event->gpgpu_event, wait) ==
> command_complete))
> - cl_event_set_status(event, CL_COMPLETE);
> + assert(CL_OBJECT_IS_EVENT(event));
> + return cl_event_wait_for_events_list(event->depend_event_num,
> event->depend_events);
> }
>
> -cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
> - cl_uint num_events_in_wait_list,
> - const cl_event *event_wait_list,
> - cl_event* event)
> +LOCAL cl_int
> +cl_event_wait_for_events_list(cl_uint num_events, const cl_event
> *event_list)
> {
> - enqueue_data data = { 0 };
> + int i;
> cl_event e;
> + cl_int ret = CL_SUCCESS;
>
> - e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER,
> CL_TRUE);
> - if(e == NULL)
> - return CL_OUT_OF_HOST_MEMORY;
> -
> - if(event != NULL ){
> - *event = e;
> - }
> + for (i = 0; i < num_events; i++) {
> + e = event_list[i];
> + assert(e);
> + assert(CL_OBJECT_IS_EVENT(e));
>
> -//enqueues a marker command which waits for either a list of events to
> complete, or if the list is
> -//empty it waits for all commands previously enqueued in command_queue
> to complete before it completes.
> - if(num_events_in_wait_list > 0){
> - if(cl_event_wait_events(num_events_in_wait_list, event_wait_list,
> queue) == CL_ENQUEUE_EXECUTE_DEFER) {
> - data.type = EnqueueMarker;
> - cl_event_new_enqueue_callback(event?*event:NULL, &data,
> num_events_in_wait_list, event_wait_list);
> - return CL_SUCCESS;
> + CL_OBJECT_LOCK(e);
> + while (e->status > CL_COMPLETE) {
> + CL_OBJECT_WAIT_ON_COND(e);
> }
> - } else if(queue->wait_events_num > 0) {
> - data.type = EnqueueMarker;
> - cl_event_new_enqueue_callback(event?*event:NULL, &data, queue-
> >wait_events_num, queue->wait_events);
> - return CL_SUCCESS;
> + /* Iff some error happened, return the error. */
> + if (e->status < CL_COMPLETE) {
> + ret = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> + }
> + CL_OBJECT_UNLOCK(e);
> }
>
> - cl_event_update_last_events(queue,1);
> -
> - cl_event_set_status(e, CL_COMPLETE);
> - return CL_SUCCESS;
> + return ret;
> }
>
> -cl_int cl_event_barrier_with_wait_list(cl_command_queue queue,
> - cl_uint num_events_in_wait_list,
> - const cl_event *event_wait_list,
> - cl_event* event)
> +LOCAL cl_int
> +cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event
> *event_wait_list,
> + cl_event *event, cl_context ctx)
> {
> - enqueue_data data = { 0 };
> - cl_event e;
> -
> - e = cl_event_new(queue->ctx, queue, CL_COMMAND_BARRIER,
> CL_TRUE);
> - if(e == NULL)
> - return CL_OUT_OF_HOST_MEMORY;
> + cl_int err = CL_SUCCESS;
> + cl_int i;
>
> - if(event != NULL ){
> - *event = e;
> - }
> -//enqueues a barrier command which waits for either a list of events to
> complete, or if the list is
> -//empty it waits for all commands previously enqueued in command_queue
> to complete before it completes.
> - if(num_events_in_wait_list > 0){
> - if(cl_event_wait_events(num_events_in_wait_list, event_wait_list,
> queue) == CL_ENQUEUE_EXECUTE_DEFER) {
> - data.type = EnqueueBarrier;
> - cl_event_new_enqueue_callback(e, &data, num_events_in_wait_list,
> event_wait_list);
> - return CL_SUCCESS;
> + do {
> + /* check the event_wait_list and num_events_in_wait_list */
> + if ((event_wait_list == NULL) && (num_events_in_wait_list > 0)) {
> + err = CL_INVALID_EVENT_WAIT_LIST;
> + break;
> }
> - } else if(queue->wait_events_num > 0) {
> - data.type = EnqueueBarrier;
> - cl_event_new_enqueue_callback(e, &data, queue->wait_events_num,
> queue->wait_events);
> - return CL_SUCCESS;
> - }
>
> - cl_event_update_last_events(queue,1);
> + if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)) {
> + err = CL_INVALID_EVENT_WAIT_LIST;
> + break;
> + }
>
> - cl_event_set_status(e, CL_COMPLETE);
> - return CL_SUCCESS;
> -}
> + /* check the event and context */
> + for (i = 0; i < num_events_in_wait_list; i++) {
> + if (event_wait_list[i] == NULL
> || !CL_OBJECT_IS_EVENT(event_wait_list[i])) {
> + err = CL_INVALID_EVENT;
> + break;
> + }
>
> -cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time)
> -{
> - struct timespec ts;
> + if (cl_event_get_status(event_wait_list[i]) < CL_COMPLETE) {
> + err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> + break;
> + }
>
> - if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){
> - printf("CPU Timmer error\n");
> - return CL_FALSE;
> - }
> - *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec;
> + if (event == event_wait_list + i) { /* Pointer of element of the wait list */
> + err = CL_INVALID_EVENT_WAIT_LIST;
> + break;
> + }
>
> - return CL_SUCCESS;
> -}
> + /* check all belong to same context. */
> + if (ctx == NULL) {
> + ctx = event_wait_list[i]->ctx;
> + }
> + if (event_wait_list[i]->ctx != ctx) {
> + err = CL_INVALID_CONTEXT;
> + break;
> + }
> + }
>
> -cl_int cl_event_get_queued_cpu_timestamp(cl_event event)
> -{
> - cl_int ret_val;
> + if (err != CL_SUCCESS)
> + break;
>
> - ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp);
> + } while (0);
>
> - return ret_val;
> + return err;
> }
>
> -cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong
> end_timestamp)
> +LOCAL void
> +cl_event_exec(cl_event event, cl_int exec_status)
> {
> - cl_ulong ret_val;
> + /* We are MT safe here, no one should call this
> + at the same time. No need to lock */
> + cl_int ret = CL_SUCCESS;
> + cl_int status = cl_event_get_status(event);
> + cl_int depend_status;
>
> - if(end_timestamp > start_timestamp){
> - ret_val = end_timestamp - start_timestamp;
> - }
> - else {
> - /*if start time stamp is greater than end timstamp then set ret value to
> max*/
> - ret_val = ((cl_ulong) 1 << 32);
> + if (status < CL_COMPLETE || status <= exec_status) {
> + return;
> }
>
> - return ret_val;
> -}
> -
> -cl_ulong cl_event_get_start_timestamp(cl_event event)
> -{
> - cl_ulong ret_val;
> + depend_status = cl_event_is_ready(event);
> + assert(depend_status <= CL_COMPLETE);
> + if (depend_status < CL_COMPLETE) { // Error happend, cancel exec.
> + ret = cl_event_set_status(event, depend_status);
> + return;
> + }
>
> - ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event-
> >timestamp[2]);
> + /* Do the according thing based on event type. */
> + ret = cl_enqueue_handle(&event->exec_data, exec_status);
>
> - return ret_val;
> + if (ret != CL_SUCCESS) {
> + assert(ret < 0);
> + DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error staus
> is %d",
> + event, event->event_type, ret);
> + ret = cl_event_set_status(event, ret);
> + assert(ret == CL_SUCCESS);
> + } else {
> + ret = cl_event_set_status(event, exec_status);
> + assert(ret == CL_SUCCESS);
> + }
> }
>
> -cl_ulong cl_event_get_end_timestamp(cl_event event)
> +/* 0 means ready, >0 means not ready, <0 means error. */
> +LOCAL cl_int
> +cl_event_is_ready(cl_event event)
> {
> - cl_ulong ret_val;
> -
> - ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event-
> >timestamp[3]);
> + int i;
> + int status;
>
> - return ret_val;
> -}
> + for (i = 0; i < event->depend_event_num; i++) {
> + status = cl_event_get_status(event->depend_events[i]);
>
> -cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info
> param_name)
> -{
> - cl_ulong ret_val = 0;
> - GET_QUEUE_THREAD_GPGPU(event->queue);
> -
> - if (!event->gpgpu_event) {
> - cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
> - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED]
> = ret_val;
> - return CL_SUCCESS;
> + if (status != CL_COMPLETE) {
> + return status;
> + }
> }
>
> - if(param_name == CL_PROFILING_COMMAND_SUBMIT ||
> - param_name == CL_PROFILING_COMMAND_QUEUED) {
> - cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
> - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED]
> = ret_val;
> - return CL_SUCCESS;
> - } else if(param_name == CL_PROFILING_COMMAND_START) {
> - cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 0,
> &ret_val);
> - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED]
> = ret_val;
> - return CL_SUCCESS;
> - } else if (param_name == CL_PROFILING_COMMAND_END) {
> - cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 1,
> &ret_val);
> - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED]
> = ret_val;
> - return CL_SUCCESS;
> - }
> - return CL_INVALID_VALUE;
> + return CL_COMPLETE;
> }
>
> -cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event)
> +LOCAL cl_event
> +cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint
> num_events_in_wait_list,
> + const cl_event *event_wait_list, cl_bool is_barrier, cl_int
> *error)
> {
> - user_event * u_iter = *p_u_ev;
> - user_event * u_ev;
> -
> - while(u_iter)
> - {
> - if(u_iter->event == event)
> - return CL_SUCCESS;
> - u_iter = u_iter->next;
> - }
> + cl_event e = NULL;
> + cl_int err = CL_SUCCESS;
> + cl_command_type type = CL_COMMAND_MARKER;
> + enqueue_type eq_type = EnqueueMarker;
>
> - TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
> - u_ev->event = event;
> - u_ev->next = *p_u_ev;
> - *p_u_ev = u_ev;
> + if (is_barrier) {
> + type = CL_COMMAND_BARRIER;
> + eq_type = EnqueueBarrier;
> + }
>
> + if (event_wait_list) {
> + assert(num_events_in_wait_list > 0);
>
> - return CL_SUCCESS;
> -error:
> - return CL_FALSE;
> -}
> + e = cl_event_create(queue->ctx, queue, num_events_in_wait_list,
> + event_wait_list, type, &err);
> + if (err != CL_SUCCESS) {
> + *error = err;
> + return NULL;
> + }
> + } else { /* The marker depends on all events in the queue now. */
> + cl_command_queue_enqueue_worker worker = &queue->worker;
> + cl_uint i;
> + cl_uint event_num;
> + cl_event *depend_events;
> +
> + CL_OBJECT_LOCK(queue);
> +
> + /* First, wait for the command queue retire all in executing event. */
> + while (1) {
> + if (worker->quit) { // already destroy the queue?
> + CL_OBJECT_UNLOCK(queue);
> + *error = CL_INVALID_COMMAND_QUEUE;
> + return NULL;
> + }
>
> -cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event)
> -{
> - user_event * u_iter = *p_u_ev;
> - user_event * u_prev = *p_u_ev;
> -
> - while(u_iter){
> - if(u_iter->event == event ){
> - if(u_iter == *p_u_ev){
> - *p_u_ev = u_iter->next;
> - }else{
> - u_prev->next = u_iter->next;
> + if (worker->in_exec_status != CL_COMPLETE) {
> + CL_OBJECT_WAIT_ON_COND(queue);
> + continue;
> }
> - cl_free(u_iter);
> +
> break;
> }
> - u_prev = u_iter;
> - u_iter = u_iter->next;
> +
> + event_num = 0;
> + depend_events = NULL;
> + if (!list_empty(&worker->enqueued_events)) {
> + depend_events =
> cl_command_queue_record_in_queue_events(queue, &event_num);
> + }
> +
> + CL_OBJECT_UNLOCK(queue);
> +
> + e = cl_event_create(queue->ctx, queue, event_num, depend_events,
> type, &err);
> +
> + for (i = 0; i < event_num; i++) { //unref the temp
> + cl_event_delete(depend_events[i]);
> + }
> + if (depend_events)
> + cl_free(depend_events);
> +
> + if (err != CL_SUCCESS) {
> + *error = err;
> + return NULL;
> + }
> }
>
> - return CL_SUCCESS;
> + e->exec_data.type = eq_type;
> + *error = CL_SUCCESS;
> + return e;
> }
> diff --git a/src/cl_event.h b/src/cl_event.h
> index 85cd53e..f67299c 100644
> --- a/src/cl_event.h
> +++ b/src/cl_event.h
> @@ -14,111 +14,75 @@
> * You should have received a copy of the GNU Lesser General Public
> * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> *
> - * Author: Benjamin Segovia <benjamin.segovia at intel.com>
> */
>
> -#ifndef __CL_EVENT_H__
> -#define __CL_EVENT_H__
> +#ifndef __CL_EVENT_H_
> +#define __CL_EVENT_H_
>
> #include <semaphore.h>
>
> #include "cl_base_object.h"
> -#include "cl_driver.h"
> #include "cl_enqueue.h"
> #include "CL/cl.h"
>
> -#define CL_ENQUEUE_EXECUTE_IMM 0
> -#define CL_ENQUEUE_EXECUTE_DEFER 1
> +typedef void(CL_CALLBACK *cl_event_notify_cb)(cl_event event, cl_int
> event_command_exec_status, void *user_data);
>
> -typedef struct _user_event {
> - cl_event event; /* The user event */
> - struct _user_event* next; /* Next user event in list */
> -} user_event;
> +typedef struct _cl_event_user_callback {
> + cl_int status; /* The execution status */
> + cl_bool executed; /* Indicat the callback function been called or not
> */
> + cl_event_notify_cb pfn_notify; /* Callback function */
> + void *user_data; /* Callback user data */
> + list_head node; /* Event callback list node */
> +} _cl_event_user_callback;
>
> -typedef struct _enqueue_callback {
> - cl_event event; /* The event relative this enqueue callback */
> - enqueue_data data; /* Hold all enqueue callback's infomation */
> - cl_uint num_events; /* num events in wait list */
> - cl_event* wait_list; /* All event wait list this callback wait on */
> - user_event* wait_user_events; /* The head of user event list the
> callback wait on */
> - struct _enqueue_callback* next; /* The next enqueue callback in wait
> list */
> -} enqueue_callback;
> +typedef _cl_event_user_callback *cl_event_user_callback;
>
> -typedef void (CL_CALLBACK *EVENT_NOTIFY)(cl_event event, cl_int
> event_command_exec_status, void *user_data);
> -
> -typedef struct _user_callback {
> - cl_int status; /* The execution status */
> - cl_bool executed; /* Indicat the callback function been called or not
> */
> - EVENT_NOTIFY pfn_notify; /* Callback function */
> - void* user_data; /* Callback user data */
> - struct _user_callback* next; /* Next event callback in list */
> -} user_callback;
> -
> -struct _cl_event {
> - _cl_base_object base;
> - cl_context ctx; /* The context associated with event */
> - cl_command_queue queue; /* The command queue associated with
> event */
> - cl_command_type type; /* The command type associated with event
> */
> - cl_int status; /* The execution status */
> - cl_gpgpu gpgpu; /* Current gpgpu, owned by this structure. */
> - cl_gpgpu_event gpgpu_event; /* The event object communicate with
> hardware */
> - user_callback* user_cb; /* The event callback functions */
> - enqueue_callback* enqueue_cb; /* This event's enqueue */
> - enqueue_callback* waits_head; /* The head of enqueues list wait on this
> event */
> - cl_bool emplict; /* Identify this event whether created by api
> emplict*/
> - cl_ulong timestamp[4];/* The time stamps for profiling. */
> - cl_ulong queued_timestamp;
> - cl_event last_next, last_prev;/* We need a list to monitor untouchable api
> event*/
> -};
> +typedef struct _cl_event {
> + _cl_base_object base;
> + cl_context ctx; /* The context associated with event */
> + cl_command_queue queue; /* The command queue associated with
> event */
> + cl_command_type event_type; /* Event type. */
> + cl_bool is_barrier; /* Is this event a barrier */
> + cl_int status; /* The execution status */
> + cl_event *depend_events; /* The events must complete before this. */
> + cl_uint depend_event_num; /* The depend events number. */
> + list_head callbacks; /* The events The event callback functions */
> + list_head enqueue_node; /* The node in the enqueue list. */
> + cl_ulong timestamp[4]; /* The time stamps for profiling. */
> + cl_ulong queued_timestamp;
> + enqueue_data exec_data; /* Context for execute this event. */
> +} _cl_event;
>
> #define CL_OBJECT_EVENT_MAGIC 0x8324a9f810ebf90fLL
> -#define CL_OBJECT_IS_EVENT(obj) (((cl_base_object)obj)->magic ==
> CL_OBJECT_EVENT_MAGIC)
> +#define CL_OBJECT_IS_EVENT(obj) ((obj && \
> + ((cl_base_object)obj)->magic == CL_OBJECT_EVENT_MAGIC && \
> + CL_OBJECT_GET_REF(obj) >= 1))
> +
> +#define CL_EVENT_IS_MARKER(E) (E->event_type ==
> CL_COMMAND_MARKER)
> +#define CL_EVENT_IS_BARRIER(E) (E->event_type ==
> CL_COMMAND_BARRIER)
> +#define CL_EVENT_IS_USER(E) (E->event_type == CL_COMMAND_USER)
>
> /* Create a new event object */
> -cl_event cl_event_new(cl_context, cl_command_queue,
> cl_command_type, cl_bool);
> -/* Unref the object and delete it if no more reference on it */
> -void cl_event_delete(cl_event);
> -/* Add one more reference to this object */
> -void cl_event_add_ref(cl_event);
> -/* Register a user callback function for specific commond execution status
> */
> -cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *);
> -/* Execute the event's callback if the event's status supersedes the
> callback's status. Free the callback if specified */
> -void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb);
> -/* Check events wait list for enqueue commonds */
> -cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *,
> cl_context);
> -/* Wait the all events in wait list complete */
> -cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue);
> -/* New a enqueue suspend task */
> -void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint,
> const cl_event *);
> -/* Set the event status and call all callbacks */
> -void cl_event_set_status(cl_event, cl_int);
> -/* Check and update event status */
> -void cl_event_update_status(cl_event, cl_int);
> -/* Create the marker event */
> -cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const
> cl_event *, cl_event*);
> -/* Create the barrier event */
> -cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const
> cl_event *, cl_event*);
> -/* Get the cpu time */
> -cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time);
> -/*Get the cpu time for queued*/
> -cl_int cl_event_get_queued_cpu_timestamp(cl_event event);
> -/*get timestamp delate between end and start*/
> -cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong
> end_timestamp);
> -/*Get start time stamp*/
> -cl_ulong cl_event_get_start_timestamp(cl_event event);
> -/*Get end time stamp*/
> -cl_ulong cl_event_get_end_timestamp(cl_event event);
> -/* Do the event profiling */
> -cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info
> param_name);
> -/* insert the user event */
> -cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event);
> -/* remove the user event */
> -cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event
> event);
> -/* flush the event's pending gpgpu batch buffer and notify driver this gpgpu
> event has been flushed. */
> -cl_int cl_event_flush(cl_event event);
> -/* monitor or block wait all events in the last_event list */
> -void cl_event_update_last_events(cl_command_queue queuet, int wait);
> -/* insert the event into the last_event list in queue */
> -void cl_event_insert_last_events(cl_command_queue queue, cl_event
> event);
> +extern cl_event cl_event_create(cl_context ctx, cl_command_queue
> queue, cl_uint num_events,
> + const cl_event *event_list, cl_command_type type, cl_int
> *errcode_ret);
> +extern cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list,
> const cl_event *event_wait_list,
> + cl_event* event, cl_context ctx);
> +extern void cl_event_exec(cl_event event, cl_int exec_status);
> +/* 0 means ready, >0 means not ready, <0 means error. */
> +extern cl_int cl_event_is_ready(cl_event event);
> +extern cl_int cl_event_get_status(cl_event event);
> +extern void cl_event_add_ref(cl_event event);
> +extern void cl_event_delete(cl_event event);
> +extern cl_int cl_event_set_status(cl_event event, cl_int status);
> +extern cl_int cl_event_set_callback(cl_event event, cl_int exec_type,
> + cl_event_notify_cb pfn_notify, void *user_data);
> +extern cl_int cl_event_wait_for_events_list(cl_uint num_events, const
> cl_event *event_list);
> +extern cl_int cl_event_wait_for_event_ready(cl_event event);
> +extern cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,
> cl_ulong end_timestamp);
> +extern cl_ulong cl_event_get_start_timestamp(cl_event event);
> +extern cl_ulong cl_event_get_end_timestamp(cl_event event);
> +extern cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info
> param_name);
> +extern cl_event cl_event_create_marker_or_barrier(cl_command_queue
> queue, cl_uint num_events_in_wait_list,
> + const cl_event *event_wait_list, cl_bool is_barrier,
> + cl_int* error);
> #endif /* __CL_EVENT_H__ */
> -
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 06a4d5a..333ffc9 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -28,6 +28,7 @@
> #include "cl_kernel.h"
> #include "cl_command_queue.h"
> #include "cl_cmrt.h"
> +#include "cl_enqueue.h"
>
> #include "CL/cl.h"
> #include "CL/cl_intel.h"
> @@ -1264,7 +1265,7 @@ cl_mem_add_ref(cl_mem mem)
> #define LOCAL_SZ_2 4
>
> LOCAL cl_int
> -cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem
> dst_buf,
> +cl_mem_copy(cl_command_queue queue, cl_event event, cl_mem
> src_buf, cl_mem dst_buf,
> size_t src_offset, size_t dst_offset, size_t cb)
> {
> cl_int ret = CL_SUCCESS;
> @@ -1317,7 +1318,7 @@ cl_mem_copy(cl_command_queue queue,
> cl_mem src_buf, cl_mem dst_buf,
> cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf);
> cl_kernel_set_arg(ker, 3, sizeof(int), &dw_dst_offset);
> cl_kernel_set_arg(ker, 4, sizeof(int), &cb);
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
> @@ -1358,7 +1359,7 @@ cl_mem_copy(cl_command_queue queue,
> cl_mem src_buf, cl_mem dst_buf,
> cl_kernel_set_arg(ker, 4, sizeof(int), &dw_num);
> cl_kernel_set_arg(ker, 5, sizeof(int), &first_mask);
> cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask);
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
> @@ -1388,7 +1389,7 @@ cl_mem_copy(cl_command_queue queue,
> cl_mem src_buf, cl_mem dst_buf,
> cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask);
> cl_kernel_set_arg(ker, 7, sizeof(int), &shift);
> cl_kernel_set_arg(ker, 8, sizeof(int), &dw_mask);
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
> @@ -1420,7 +1421,7 @@ cl_mem_copy(cl_command_queue queue,
> cl_mem src_buf, cl_mem dst_buf,
> cl_kernel_set_arg(ker, 7, sizeof(int), &shift);
> cl_kernel_set_arg(ker, 8, sizeof(int), &dw_mask);
> cl_kernel_set_arg(ker, 9, sizeof(int), &src_less);
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
> @@ -1493,13 +1494,13 @@ cl_image_fill(cl_command_queue queue, const
> void * pattern, struct _cl_mem_image
> cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]);
> cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]);
>
> - ret = cl_command_queue_ND_range(queue, ker, 3, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, NULL, 3, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
>
> LOCAL cl_int
> -cl_mem_fill(cl_command_queue queue, const void * pattern, size_t
> pattern_size,
> +cl_mem_fill(cl_command_queue queue, cl_event e, const void * pattern,
> size_t pattern_size,
> cl_mem buffer, size_t offset, size_t size)
> {
> cl_int ret = CL_SUCCESS;
> @@ -1596,13 +1597,13 @@ cl_mem_fill(cl_command_queue queue, const
> void * pattern, size_t pattern_size,
> if (is_128)
> cl_kernel_set_arg(ker, 4, pattern_size, pattern1);
>
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, e, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
>
> LOCAL cl_int
> -cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf,
> cl_mem dst_buf,
> +cl_mem_copy_buffer_rect(cl_command_queue queue, cl_event event,
> cl_mem src_buf, cl_mem dst_buf,
> const size_t *src_origin, const size_t *dst_origin, const size_t
> *region,
> size_t src_row_pitch, size_t src_slice_pitch,
> size_t dst_row_pitch, size_t dst_slice_pitch) {
> @@ -1617,7 +1618,7 @@ cl_mem_copy_buffer_rect(cl_command_queue
> queue, cl_mem src_buf, cl_mem dst_buf,
> cl_int src_offset = src_origin[2]*src_slice_pitch +
> src_origin[1]*src_row_pitch + src_origin[0];
> cl_int dst_offset = dst_origin[2]*dst_slice_pitch +
> dst_origin[1]*dst_row_pitch + dst_origin[0];
> cl_int size = region[0]*region[1]*region[2];
> - ret = cl_mem_copy(queue, src_buf, dst_buf,src_offset, dst_offset, size);
> + ret = cl_mem_copy(queue, NULL, src_buf, dst_buf,src_offset, dst_offset,
> size);
> return ret;
> }
>
> @@ -1669,14 +1670,15 @@ cl_mem_copy_buffer_rect(cl_command_queue
> queue, cl_mem src_buf, cl_mem dst_buf,
> cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_row_pitch);
> cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_slice_pitch);
>
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
> return ret;
> }
>
> LOCAL cl_int
> -cl_mem_kernel_copy_image(cl_command_queue queue, struct
> _cl_mem_image* src_image, struct _cl_mem_image* dst_image,
> - const size_t *src_origin, const size_t *dst_origin, const size_t
> *region) {
> +cl_mem_kernel_copy_image(cl_command_queue queue, cl_event event,
> struct _cl_mem_image* src_image,
> + struct _cl_mem_image* dst_image, const size_t *src_origin,
> + const size_t *dst_origin, const size_t *region) {
> cl_int ret;
> cl_kernel ker = NULL;
> size_t global_off[] = {0,0,0};
> @@ -1817,7 +1819,7 @@ cl_mem_kernel_copy_image(cl_command_queue
> queue, struct _cl_mem_image* src_image
> cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_origin[1]);
> cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]);
>
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
>
> fail:
>
> @@ -1830,7 +1832,7 @@ fail:
> }
>
> LOCAL cl_int
> -cl_mem_copy_image_to_buffer(cl_command_queue queue, struct
> _cl_mem_image* image, cl_mem buffer,
> +cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event
> event, struct _cl_mem_image* image, cl_mem buffer,
> const size_t *src_origin, const size_t dst_offset, const size_t
> *region) {
> cl_int ret;
> cl_kernel ker = NULL;
> @@ -1919,7 +1921,7 @@
> cl_mem_copy_image_to_buffer(cl_command_queue queue, struct
> _cl_mem_image* image,
> cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]);
> cl_kernel_set_arg(ker, 8, sizeof(cl_int), &kn_dst_offset);
>
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
>
> fail:
>
> @@ -1933,7 +1935,7 @@ fail:
>
>
> LOCAL cl_int
> -cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem
> buffer, struct _cl_mem_image* image,
> +cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event
> event, cl_mem buffer, struct _cl_mem_image* image,
> const size_t src_offset, const size_t *dst_origin, const size_t
> *region) {
> cl_int ret;
> cl_kernel ker = NULL;
> @@ -2019,7 +2021,7 @@
> cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem
> buffer, struct _cl_me
> cl_kernel_set_arg(ker, 7, sizeof(cl_int), &dst_origin[2]);
> cl_kernel_set_arg(ker, 8, sizeof(cl_int), &kn_src_offset);
>
> - ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz,
> local_sz);
> + ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off,
> global_sz, local_sz);
> cl_kernel_delete(ker);
>
> image->intel_fmt = intel_fmt;
> @@ -2308,3 +2310,83 @@ error:
> mem = NULL;
> goto exit;
> }
> +
> +LOCAL cl_int
> +cl_mem_record_map_mem(cl_mem mem, void *ptr, void **mem_ptr,
> size_t offset,
> + size_t size, const size_t *origin, const size_t *region)
> +{
> + // TODO: Need to add MT safe logic.
> +
> + cl_int slot = -1;
> + int err = CL_SUCCESS;
> + size_t sub_offset = 0;
> +
> + if(mem->type == CL_MEM_SUBBUFFER_TYPE) {
> + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
> + sub_offset = buffer->sub_offset;
> + }
> +
> + ptr = (char*)ptr + offset + sub_offset;
> + if(mem->flags & CL_MEM_USE_HOST_PTR) {
> + assert(mem->host_ptr);
> + //only calc ptr here, will do memcpy in enqueue
> + *mem_ptr = (char *)mem->host_ptr + offset + sub_offset;
> + } else {
> + *mem_ptr = ptr;
> + }
> + /* Record the mapped address. */
> + if (!mem->mapped_ptr_sz) {
> + mem->mapped_ptr_sz = 16;
> + mem->mapped_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
> + if (!mem->mapped_ptr) {
> + cl_mem_unmap_auto(mem);
> + err = CL_OUT_OF_HOST_MEMORY;
> + goto error;
> + }
> + memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> + slot = 0;
> + } else {
> + int i = 0;
> + for (; i < mem->mapped_ptr_sz; i++) {
> + if (mem->mapped_ptr[i].ptr == NULL) {
> + slot = i;
> + break;
> + }
> + }
> + if (i == mem->mapped_ptr_sz) {
> + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
> + if (!new_ptr) {
> + cl_mem_unmap_auto(mem);
> + err = CL_OUT_OF_HOST_MEMORY;
> + goto error;
> + }
> + memset(new_ptr, 0, 2 * mem->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> + memcpy(new_ptr, mem->mapped_ptr,
> + mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + slot = mem->mapped_ptr_sz;
> + mem->mapped_ptr_sz *= 2;
> + free(mem->mapped_ptr);
> + mem->mapped_ptr = new_ptr;
> + }
> + }
> + assert(slot != -1);
> + mem->mapped_ptr[slot].ptr = *mem_ptr;
> + mem->mapped_ptr[slot].v_ptr = ptr;
> + mem->mapped_ptr[slot].size = size;
> + if(origin) {
> + assert(region);
> + mem->mapped_ptr[slot].origin[0] = origin[0];
> + mem->mapped_ptr[slot].origin[1] = origin[1];
> + mem->mapped_ptr[slot].origin[2] = origin[2];
> + mem->mapped_ptr[slot].region[0] = region[0];
> + mem->mapped_ptr[slot].region[1] = region[1];
> + mem->mapped_ptr[slot].region[2] = region[2];
> + }
> + mem->map_ref++;
> +error:
> + if (err != CL_SUCCESS)
> + *mem_ptr = NULL;
> + return err;
> +}
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 9bb5c47..82f30f6 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -101,7 +101,17 @@ typedef struct _cl_mem {
> } _cl_mem;
>
> #define CL_OBJECT_MEM_MAGIC 0x381a27b9ee6504dfLL
> -#define CL_OBJECT_IS_MEM(obj) (((cl_base_object)obj)->magic ==
> CL_OBJECT_MEM_MAGIC)
> +#define CL_OBJECT_IS_MEM(obj) ((obj && \
> + ((cl_base_object)obj)->magic == CL_OBJECT_MEM_MAGIC && \
> + CL_OBJECT_GET_REF(obj) >= 1))
> +#define CL_OBJECT_IS_IMAGE(mem) ((mem && \
> + ((cl_base_object)mem)->magic == CL_OBJECT_MEM_MAGIC && \
> + CL_OBJECT_GET_REF(mem) >= 1 && \
> + mem->type >= CL_MEM_IMAGE_TYPE))
> +#define CL_OBJECT_IS_BUFFER(mem) ((mem && \
> + ((cl_base_object)mem)->magic == CL_OBJECT_MEM_MAGIC && \
> + CL_OBJECT_GET_REF(mem) >= 1 && \
> + mem->type < CL_MEM_IMAGE_TYPE))
>
> struct _cl_mem_image {
> _cl_mem base;
> @@ -221,30 +231,30 @@ extern void cl_mem_gl_delete(struct
> _cl_mem_gl_image *);
> extern void cl_mem_add_ref(cl_mem);
>
> /* api clEnqueueCopyBuffer help function */
> -extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf,
> cl_mem dst_buf,
> +extern cl_int cl_mem_copy(cl_command_queue queue, cl_event event,
> cl_mem src_buf, cl_mem dst_buf,
> size_t src_offset, size_t dst_offset, size_t cb);
>
> -extern cl_int cl_mem_fill(cl_command_queue queue, const void * pattern,
> size_t pattern_size,
> +extern cl_int cl_mem_fill(cl_command_queue queue, cl_event e, const
> void * pattern, size_t pattern_size,
> cl_mem buffer, size_t offset, size_t size);
>
> extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern,
> struct _cl_mem_image*,
> const size_t *, const size_t *);
>
> /* api clEnqueueCopyBufferRect help function */
> -extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem,
> cl_mem,
> +extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_event
> event, cl_mem, cl_mem,
> const size_t *, const size_t *, const size_t *,
> size_t, size_t, size_t, size_t);
>
> /* api clEnqueueCopyImage help function */
> -extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct
> _cl_mem_image*, struct _cl_mem_image*,
> - const size_t *, const size_t *, const size_t *);
> +extern cl_int cl_mem_kernel_copy_image(cl_command_queue, cl_event
> event, struct _cl_mem_image*,
> + struct _cl_mem_image*, const size_t *, const size_t *,
> const size_t *);
>
> /* api clEnqueueCopyImageToBuffer help function */
> -extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct
> _cl_mem_image*, cl_mem,
> +extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue,
> cl_event, struct _cl_mem_image*, cl_mem,
> const size_t *, const size_t, const size_t *);
>
> /* api clEnqueueCopyBufferToImage help function */
> -extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue,
> cl_mem, struct _cl_mem_image*,
> +extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue,
> cl_event, cl_mem, struct _cl_mem_image*,
> const size_t, const size_t *, const size_t *);
>
> /* Directly map a memory object */
> @@ -318,5 +328,8 @@ extern cl_mem
> cl_mem_new_image_from_fd(cl_context ctx,
> size_t row_pitch,
> cl_int *errcode);
>
> +extern cl_int cl_mem_record_map_mem(cl_mem mem, void *ptr, void
> **mem_ptr, size_t offset,
> + size_t size, const size_t *origin, const size_t *region);
> +
> #endif /* __CL_MEM_H__ */
>
> --
> 2.7.4
>
>
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list