[Beignet] [PATCH 6/9] Modify all event related functions using new event handle.
junyan.he at inbox.com
junyan.he at inbox.com
Wed Sep 21 09:47:21 UTC 2016
From: Junyan He <junyan.he at intel.com>
Rewrite the cl_event, and modify all the event functions
using this new event manner. Event will co-operate with
command queue's thread together.
v2:
Fix a logic problem in event create failed.
Signed-off-by: Junyan He <junyan.he at intel.com>
---
src/CMakeLists.txt | 5 +
src/cl_api.c | 1888 +---------------------------------------
src/cl_api_kernel.c | 27 +-
src/cl_command_queue.c | 98 ++-
src/cl_command_queue.h | 7 +-
src/cl_command_queue_enqueue.c | 8 +-
src/cl_command_queue_gen7.c | 21 +-
src/cl_enqueue.c | 500 ++++++-----
src/cl_enqueue.h | 41 +-
src/cl_event.c | 1067 ++++++++++-------------
src/cl_event.h | 146 ++--
src/cl_mem.c | 118 ++-
src/cl_mem.h | 29 +-
13 files changed, 1070 insertions(+), 2885 deletions(-)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 009d057..626b43f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -68,6 +68,10 @@ set(OPENCL_SRC
${KERNEL_STR_FILES}
cl_base_object.c
cl_api.c
+ cl_api_mem.c
+ cl_api_kernel.c
+ cl_api_command_queue.c
+ cl_api_event.c
cl_alloc.c
cl_kernel.c
cl_program.c
@@ -85,6 +89,7 @@ set(OPENCL_SRC
cl_command_queue.c
cl_command_queue.h
cl_command_queue_gen7.c
+ cl_command_queue_enqueue.c
cl_thread.c
cl_driver.h
cl_driver.cpp
diff --git a/src/cl_api.c b/src/cl_api.c
index a2fee15..f8c48de 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -67,92 +67,6 @@ typedef intptr_t cl_device_partition_property;
return RET; \
} while(0)
-static inline cl_int
-handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
- cl_event* event, enqueue_data* data, cl_command_type type)
-{
- cl_int status = cl_event_wait_events(num, wait_list, queue);
- cl_event e = NULL;
- if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) {
- e = cl_event_new(queue->ctx, queue, type, event!=NULL);
-
- /* if need profiling, add the submit timestamp here. */
- if (e->type != CL_COMMAND_USER &&
- e->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED);
- cl_event_get_queued_cpu_timestamp(e);
- }
-
- if(event != NULL)
- *event = e;
- if(status == CL_ENQUEUE_EXECUTE_DEFER) {
- cl_event_new_enqueue_callback(e, data, num, wait_list);
- }
- }
- set_current_event(queue, e);
- return status;
-}
-
-/* The following code checking overlap is from Appendix of openCL spec 1.1 */
-cl_bool check_copy_overlap(const size_t src_offset[3],
- const size_t dst_offset[3],
- const size_t region[3],
- size_t row_pitch, size_t slice_pitch)
-{
- const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
- const size_t src_max[] = {src_offset[0] + region[0],
- src_offset[1] + region[1],
- src_offset[2] + region[2]};
- const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
- const size_t dst_max[] = {dst_offset[0] + region[0],
- dst_offset[1] + region[1],
- dst_offset[2] + region[2]};
- // Check for overlap
- cl_bool overlap = CL_TRUE;
- unsigned i;
- size_t dst_start = dst_offset[2] * slice_pitch +
- dst_offset[1] * row_pitch + dst_offset[0];
- size_t dst_end = dst_start + (region[2] * slice_pitch +
- region[1] * row_pitch + region[0]);
- size_t src_start = src_offset[2] * slice_pitch +
- src_offset[1] * row_pitch + src_offset[0];
- size_t src_end = src_start + (region[2] * slice_pitch +
- region[1] * row_pitch + region[0]);
-
- for (i=0; i != 3; ++i) {
- overlap = overlap && (src_min[i] < dst_max[i])
- && (src_max[i] > dst_min[i]);
- }
-
- if (!overlap) {
- size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ?
- src_offset[0] + region[0] - row_pitch : 0;
- size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ?
- dst_offset[0] + region[0] - row_pitch : 0;
- if ( (delta_src_x > 0 && delta_src_x > dst_offset[0]) ||
- (delta_dst_x > 0 && delta_dst_x > src_offset[0]) ) {
- if ( (src_start <= dst_start && dst_start < src_end) ||
- (dst_start <= src_start && src_start < dst_end) )
- overlap = CL_TRUE;
- }
- if (region[2] > 1) {
- size_t src_height = slice_pitch / row_pitch;
- size_t dst_height = slice_pitch / row_pitch;
- size_t delta_src_y = (src_offset[1] + region[1] > src_height) ?
- src_offset[1] + region[1] - src_height : 0;
- size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ?
- dst_offset[1] + region[1] - dst_height : 0;
- if ( (delta_src_y > 0 && delta_src_y > dst_offset[1]) ||
- (delta_dst_y > 0 && delta_dst_y > src_offset[1]) ) {
- if ( (src_start <= dst_start && dst_start < src_end) ||
- (dst_start <= src_start && src_start < dst_end) )
- overlap = CL_TRUE;
- }
- }
- }
- return overlap;
-}
-
static cl_int
cl_check_device_type(cl_device_type device_type)
{
@@ -448,16 +362,6 @@ error:
}
cl_int
-clReleaseCommandQueue(cl_command_queue command_queue)
-{
- cl_int err = CL_SUCCESS;
- CHECK_QUEUE (command_queue);
- cl_command_queue_delete(command_queue);
-error:
- return err;
-}
-
-cl_int
clGetCommandQueueInfo(cl_command_queue command_queue,
cl_command_queue_info param_name,
size_t param_value_size,
@@ -1369,26 +1273,6 @@ clGetKernelSubGroupInfoKHR(cl_kernel kernel,
}
cl_int
-clWaitForEvents(cl_uint num_events,
- const cl_event * event_list)
-{
- cl_int err = CL_SUCCESS;
- cl_context ctx = NULL;
-
- if(num_events > 0 && event_list)
- ctx = event_list[0]->ctx;
-
- TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx);
-
- while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) {
- usleep(8000); //sleep 8ms to wait other thread
- }
-
-error:
- return err;
-}
-
-cl_int
clGetEventInfo(cl_event event,
cl_event_info param_name,
size_t param_value_size,
@@ -1403,9 +1287,9 @@ clGetEventInfo(cl_event event,
} else if (param_name == CL_EVENT_CONTEXT) {
FILL_GETINFO_RET (cl_context, 1, &event->ctx, CL_SUCCESS);
} else if (param_name == CL_EVENT_COMMAND_TYPE) {
- FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS);
+ FILL_GETINFO_RET (cl_command_type, 1, &event->event_type, CL_SUCCESS);
} else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) {
- cl_event_update_status(event, 0);
+ cl_event_get_status(event);
FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS);
} else if (param_name == CL_EVENT_REFERENCE_COUNT) {
cl_uint ref = CL_OBJECT_GET_REF(event);
@@ -1419,22 +1303,6 @@ error:
}
-cl_event
-clCreateUserEvent(cl_context context,
- cl_int * errcode_ret)
-{
- cl_int err = CL_SUCCESS;
- cl_event event = NULL;
- CHECK_CONTEXT(context);
-
- TRY_ALLOC(event, cl_event_new(context, NULL, CL_COMMAND_USER, CL_TRUE));
-
-error:
- if(errcode_ret)
- *errcode_ret = err;
- return event;
-}
-
cl_int
clRetainEvent(cl_event event)
{
@@ -1459,48 +1327,6 @@ error:
return err;
}
-cl_int
-clSetUserEventStatus(cl_event event,
- cl_int execution_status)
-{
- cl_int err = CL_SUCCESS;
-
- CHECK_EVENT(event);
- if(execution_status > CL_COMPLETE) {
- err = CL_INVALID_VALUE;
- goto error;
- }
- if(event->status != CL_SUBMITTED) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- cl_event_set_status(event, execution_status);
-error:
- return err;
-}
-
-cl_int
-clSetEventCallback(cl_event event,
- cl_int command_exec_callback_type,
- void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *),
- void * user_data)
-{
- cl_int err = CL_SUCCESS;
-
- CHECK_EVENT(event);
- if((pfn_notify == NULL) ||
- (command_exec_callback_type > CL_SUBMITTED) ||
- (command_exec_callback_type < CL_COMPLETE)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
- err = cl_event_set_callback(event, command_exec_callback_type, pfn_notify, user_data);
-
-error:
- return err;
-
-}
cl_int
clGetEventProfilingInfo(cl_event event,
@@ -1513,9 +1339,9 @@ clGetEventProfilingInfo(cl_event event,
cl_ulong ret_val;
CHECK_EVENT(event);
- cl_event_update_status(event, 0);
+ //cl_event_update_status(event, 0);
- if (event->type == CL_COMMAND_USER ||
+ if (event->event_type == CL_COMMAND_USER ||
!(event->queue->props & CL_QUEUE_PROFILING_ENABLE) ||
event->status != CL_COMPLETE) {
err = CL_PROFILING_INFO_NOT_AVAILABLE;
@@ -1552,1712 +1378,6 @@ error:
return err;
}
-cl_int
-clFlush(cl_command_queue command_queue)
-{
- /* have nothing to do now, as currently
- * clEnqueueNDRangeKernel will flush at
- * the end of each calling. we may need
- * to optimize it latter.*/
- return 0;
-}
-
-cl_int
-clFinish(cl_command_queue command_queue)
-{
- cl_int err = CL_SUCCESS;
-
- CHECK_QUEUE (command_queue);
-
-#ifdef HAS_CMRT
- if (command_queue->cmrt_event != NULL)
- return cmrt_wait_for_task_finished(command_queue);
-#endif
-
- err = cl_command_queue_finish(command_queue);
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueReadBuffer(cl_command_queue command_queue,
- cl_mem buffer,
- cl_bool blocking_read,
- size_t offset,
- size_t size,
- void * ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, defer_enqueue_data = { 0 };
- CHECK_QUEUE(command_queue);
- CHECK_MEM(buffer);
- if (command_queue->ctx != buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!ptr || !size || offset + size > buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
-
- data = &defer_enqueue_data;
- data->type = EnqueueReadBuffer;
- data->mem_obj = buffer;
- data->ptr = ptr;
- data->offset = offset;
- data->size = size;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueReadBufferRect(cl_command_queue command_queue,
- cl_mem buffer,
- cl_bool blocking_read,
- const size_t * buffer_origin,
- const size_t * host_origin,
- const size_t * region,
- size_t buffer_row_pitch,
- size_t buffer_slice_pitch,
- size_t host_row_pitch,
- size_t host_slice_pitch,
- void * ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(buffer);
-
- if (command_queue->ctx != buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if(buffer_row_pitch == 0)
- buffer_row_pitch = region[0];
- if(buffer_slice_pitch == 0)
- buffer_slice_pitch = region[1] * buffer_row_pitch;
-
- if(host_row_pitch == 0)
- host_row_pitch = region[0];
- if(host_slice_pitch == 0)
- host_slice_pitch = region[1] * host_row_pitch;
-
- if (buffer_row_pitch < region[0] ||
- host_row_pitch < region[0]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) ||
- (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch
- + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch
- + buffer_origin[0] + region[0] > buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueReadBufferRect;
- data->mem_obj = buffer;
- data->ptr = ptr;
- data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2];
- data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2];
- data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
- data->row_pitch = buffer_row_pitch;
- data->slice_pitch = buffer_slice_pitch;
- data->host_row_pitch = host_row_pitch;
- data->host_slice_pitch = host_slice_pitch;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_READ_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
- error:
- return err;
-}
-
-cl_int
-clEnqueueWriteBuffer(cl_command_queue command_queue,
- cl_mem buffer,
- cl_bool blocking_write,
- size_t offset,
- size_t size,
- const void * ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(buffer);
- if (command_queue->ctx != buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!ptr || !size || offset + size > buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueWriteBuffer;
- data->mem_obj = buffer;
- data->const_ptr = ptr;
- data->offset = offset;
- data->size = size;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_WRITE_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
- error:
- return err;
-}
-
-cl_int
-clEnqueueWriteBufferRect(cl_command_queue command_queue,
- cl_mem buffer,
- cl_bool blocking_write,
- const size_t * buffer_origin,
- const size_t * host_origin,
- const size_t * region,
- size_t buffer_row_pitch,
- size_t buffer_slice_pitch,
- size_t host_row_pitch,
- size_t host_slice_pitch,
- const void * ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(buffer);
-
- if (command_queue->ctx != buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if(buffer_row_pitch == 0)
- buffer_row_pitch = region[0];
- if(buffer_slice_pitch == 0)
- buffer_slice_pitch = region[1] * buffer_row_pitch;
-
- if(host_row_pitch == 0)
- host_row_pitch = region[0];
- if(host_slice_pitch == 0)
- host_slice_pitch = region[1] * host_row_pitch;
-
- if (buffer_row_pitch < region[0] ||
- host_row_pitch < region[0]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) ||
- (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch
- + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch
- + buffer_origin[0] + region[0] > buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueWriteBufferRect;
- data->mem_obj = buffer;
- data->const_ptr = ptr;
- data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2];
- data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2];
- data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
- data->row_pitch = buffer_row_pitch;
- data->slice_pitch = buffer_slice_pitch;
- data->host_row_pitch = host_row_pitch;
- data->host_slice_pitch = host_slice_pitch;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_WRITE_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueFillImage(cl_command_queue command_queue,
- cl_mem image,
- const void * fill_color,
- const size_t * porigin,
- const size_t * pregion,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_IMAGE(image, src_image);
- FIXUP_IMAGE_REGION(src_image, pregion, region);
- FIXUP_IMAGE_ORIGIN(src_image, porigin, origin);
-
- if (command_queue->ctx != image->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (fill_color == NULL) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- err = cl_image_fill(command_queue, fill_color, src_image, origin, region);
- if (err) {
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueFillImage;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue);
-
- return 0;
-
- error:
- return err;
-}
-
-cl_int
-clEnqueueFillBuffer(cl_command_queue command_queue,
- cl_mem buffer,
- const void * pattern,
- size_t pattern_size,
- size_t offset,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
- static size_t valid_sz[] = {1, 2, 4, 8, 16, 32, 64, 128};
- int i = 0;
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(buffer);
-
- if (command_queue->ctx != buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (offset + size > buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (pattern == NULL) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- for (i = 0; i < sizeof(valid_sz) / sizeof(size_t); i++) {
- if (valid_sz[i] == pattern_size)
- break;
- }
- if (i == sizeof(valid_sz) / sizeof(size_t)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (offset % pattern_size || size % pattern_size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- err = cl_mem_fill(command_queue, pattern, pattern_size, buffer, offset, size);
- if (err) {
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueFillBuffer;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_fill_buffer", "", command_queue);
-
- return 0;
-
- error:
- return err;
-}
-
-cl_int
-clEnqueueCopyBuffer(cl_command_queue command_queue,
- cl_mem src_buffer,
- cl_mem dst_buffer,
- size_t src_offset,
- size_t dst_offset,
- size_t cb,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(src_buffer);
- CHECK_MEM(dst_buffer);
-
- if (command_queue->ctx != src_buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (command_queue->ctx != dst_buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (src_offset + cb > src_buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
- if (dst_offset + cb > dst_buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- /* Check overlap */
- if (src_buffer == dst_buffer
- && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1)
- && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) {
- err = CL_MEM_COPY_OVERLAP;
- goto error;
- }
-
- /* Check sub overlap */
- if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type == CL_MEM_SUBBUFFER_TYPE ) {
- struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer;
- struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer;
- size_t src_sub_offset = src_b->sub_offset;
- size_t dst_sub_offset = dst_b->sub_offset;
-
- if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset
- && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1)
- && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset
- && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) {
- err = CL_MEM_COPY_OVERLAP;
- goto error;
- }
- }
-
- err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueCopyBuffer;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_COPY_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy", "", command_queue);
-
- return 0;
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueCopyBufferRect(cl_command_queue command_queue,
- cl_mem src_buffer,
- cl_mem dst_buffer,
- const size_t * src_origin,
- const size_t * dst_origin,
- const size_t * region,
- size_t src_row_pitch,
- size_t src_slice_pitch,
- size_t dst_row_pitch,
- size_t dst_slice_pitch,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(src_buffer);
- CHECK_MEM(dst_buffer);
-
- if ((command_queue->ctx != src_buffer->ctx) ||
- (command_queue->ctx != dst_buffer->ctx)) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if(src_row_pitch == 0)
- src_row_pitch = region[0];
- if(src_slice_pitch == 0)
- src_slice_pitch = region[1] * src_row_pitch;
-
- if(dst_row_pitch == 0)
- dst_row_pitch = region[0];
- if(dst_slice_pitch == 0)
- dst_slice_pitch = region[1] * dst_row_pitch;
-
- if (src_row_pitch < region[0] ||
- dst_row_pitch < region[0]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((src_slice_pitch < region[1] * src_row_pitch || src_slice_pitch % src_row_pitch != 0 ) ||
- (dst_slice_pitch < region[1] * dst_row_pitch || dst_slice_pitch % dst_row_pitch != 0 )) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((src_origin[2] + region[2] - 1) * src_slice_pitch
- + (src_origin[1] + region[1] - 1) * src_row_pitch
- + src_origin[0] + region[0] > src_buffer->size
- ||(dst_origin[2] + region[2] - 1) * dst_slice_pitch
- + (dst_origin[1] + region[1] - 1) * dst_row_pitch
- + dst_origin[0] + region[0] > dst_buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (src_buffer == dst_buffer && (src_row_pitch != dst_row_pitch || src_slice_pitch != dst_slice_pitch)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (src_buffer == dst_buffer &&
- check_copy_overlap(src_origin, dst_origin, region, src_row_pitch, src_slice_pitch)) {
- err = CL_MEM_COPY_OVERLAP;
- goto error;
- }
-
- cl_mem_copy_buffer_rect(command_queue, src_buffer, dst_buffer, src_origin, dst_origin, region,
- src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueCopyBufferRect;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_COPY_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_rect", "", command_queue);
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueReadImage(cl_command_queue command_queue,
- cl_mem mem,
- cl_bool blocking_read,
- const size_t * porigin,
- const size_t * pregion,
- size_t row_pitch,
- size_t slice_pitch,
- void * ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_IMAGE(mem, image);
- FIXUP_IMAGE_REGION(image, pregion, region);
- FIXUP_IMAGE_ORIGIN(image, porigin, origin);
- if (command_queue->ctx != mem->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!row_pitch)
- row_pitch = image->bpp*region[0];
- else if (row_pitch < image->bpp*region[0]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (image->slice_pitch) {
- if (!slice_pitch)
- slice_pitch = row_pitch*region[1];
- else if (slice_pitch < row_pitch*region[1]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
- }
- else if (slice_pitch) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!ptr) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueReadImage;
- data->mem_obj = mem;
- data->ptr = ptr;
- data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2];
- data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
- data->row_pitch = row_pitch;
- data->slice_pitch = slice_pitch;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_READ_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueWriteImage(cl_command_queue command_queue,
- cl_mem mem,
- cl_bool blocking_write,
- const size_t * porigin,
- const size_t * pregion,
- size_t row_pitch,
- size_t slice_pitch,
- const void * ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_IMAGE(mem, image);
- FIXUP_IMAGE_REGION(image, pregion, region);
- FIXUP_IMAGE_ORIGIN(image, porigin, origin);
- if (command_queue->ctx != mem->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!row_pitch)
- row_pitch = image->bpp*region[0];
- else if (row_pitch < image->bpp*region[0]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (image->slice_pitch) {
- if (!slice_pitch)
- slice_pitch = row_pitch*region[1];
- else if (slice_pitch < row_pitch*region[1]) {
- err = CL_INVALID_VALUE;
- goto error;
- }
- }
- else if (slice_pitch) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!ptr) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueWriteImage;
- data->mem_obj = mem;
- data->const_ptr = ptr;
- data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2];
- data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
- data->row_pitch = row_pitch;
- data->slice_pitch = slice_pitch;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_WRITE_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueCopyImage(cl_command_queue command_queue,
- cl_mem src_mem,
- cl_mem dst_mem,
- const size_t * psrc_origin,
- const size_t * pdst_origin,
- const size_t * pregion,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
- cl_bool overlap = CL_TRUE;
- cl_int i = 0;
-
- CHECK_QUEUE(command_queue);
- CHECK_IMAGE(src_mem, src_image);
- CHECK_IMAGE(dst_mem, dst_image);
- FIXUP_IMAGE_REGION(src_image, pregion, region);
- FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin);
- FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin);
- if (command_queue->ctx != src_mem->ctx ||
- command_queue->ctx != dst_mem->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (src_image->fmt.image_channel_order != dst_image->fmt.image_channel_order ||
- src_image->fmt.image_channel_data_type != dst_image->fmt.image_channel_data_type) {
- err = CL_IMAGE_FORMAT_MISMATCH;
- goto error;
- }
-
- if (!src_origin || !region || src_origin[0] + region[0] > src_image->w ||
- src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w ||
- dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) ||
- (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1))) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (src_image == dst_image) {
- for(i = 0; i < 3; i++)
- overlap = overlap && (src_origin[i] < dst_origin[i] + region[i])
- && (dst_origin[i] < src_origin[i] + region[i]);
- if(overlap == CL_TRUE) {
- err = CL_MEM_COPY_OVERLAP;
- goto error;
- }
- }
-
- cl_mem_kernel_copy_image(command_queue, src_image, dst_image, src_origin, dst_origin, region);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueCopyImage;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_COPY_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_mem_kernel_copy_image", "", command_queue);
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
- cl_mem src_mem,
- cl_mem dst_buffer,
- const size_t * psrc_origin,
- const size_t * pregion,
- size_t dst_offset,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_IMAGE(src_mem, src_image);
- CHECK_MEM(dst_buffer);
- FIXUP_IMAGE_REGION(src_image, pregion, region);
- FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin);
- if (command_queue->ctx != src_mem->ctx ||
- command_queue->ctx != dst_buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp > dst_buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!src_origin || !region || src_origin[0] + region[0] > src_image->w ||
- src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer, src_origin, dst_offset, region);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueCopyImageToBuffer;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_image_to_buffer", "", command_queue);
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueCopyBufferToImage(cl_command_queue command_queue,
- cl_mem src_buffer,
- cl_mem dst_mem,
- size_t src_offset,
- const size_t * pdst_origin,
- const size_t * pregion,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(src_buffer);
- CHECK_IMAGE(dst_mem, dst_image);
- FIXUP_IMAGE_REGION(dst_image, pregion, region);
- FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin);
- if (command_queue->ctx != src_buffer->ctx ||
- command_queue->ctx != dst_mem->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (src_offset + region[0]*region[1]*region[2]*dst_image->bpp > src_buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w ||
- dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- cl_mem_copy_buffer_to_image(command_queue, src_buffer, dst_image, src_offset, dst_origin, region);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, dst_mem->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueCopyBufferToImage;
- data->queue = command_queue;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
- if(b_output_kernel_perf)
- time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_to_image", "", command_queue);
-
-error:
- return err;
-}
-
-static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr,
- size_t offset, size_t size,
- const size_t *origin, const size_t *region)
-{
- cl_int slot = -1;
- int err = CL_SUCCESS;
- size_t sub_offset = 0;
-
- if(mem->type == CL_MEM_SUBBUFFER_TYPE) {
- struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- sub_offset = buffer->sub_offset;
- }
-
- ptr = (char*)ptr + offset + sub_offset;
- if(mem->flags & CL_MEM_USE_HOST_PTR) {
- assert(mem->host_ptr);
- //only calc ptr here, will do memcpy in enqueue
- *mem_ptr = (char *)mem->host_ptr + offset + sub_offset;
- } else {
- *mem_ptr = ptr;
- }
- /* Record the mapped address. */
- if (!mem->mapped_ptr_sz) {
- mem->mapped_ptr_sz = 16;
- mem->mapped_ptr = (cl_mapped_ptr *)malloc(
- sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
- if (!mem->mapped_ptr) {
- cl_mem_unmap_auto(mem);
- err = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- slot = 0;
- } else {
- int i = 0;
- for (; i < mem->mapped_ptr_sz; i++) {
- if (mem->mapped_ptr[i].ptr == NULL) {
- slot = i;
- break;
- }
- }
- if (i == mem->mapped_ptr_sz) {
- cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
- sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
- if (!new_ptr) {
- cl_mem_unmap_auto(mem);
- err = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- memcpy(new_ptr, mem->mapped_ptr,
- mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- slot = mem->mapped_ptr_sz;
- mem->mapped_ptr_sz *= 2;
- free(mem->mapped_ptr);
- mem->mapped_ptr = new_ptr;
- }
- }
- assert(slot != -1);
- mem->mapped_ptr[slot].ptr = *mem_ptr;
- mem->mapped_ptr[slot].v_ptr = ptr;
- mem->mapped_ptr[slot].size = size;
- if(origin) {
- assert(region);
- mem->mapped_ptr[slot].origin[0] = origin[0];
- mem->mapped_ptr[slot].origin[1] = origin[1];
- mem->mapped_ptr[slot].origin[2] = origin[2];
- mem->mapped_ptr[slot].region[0] = region[0];
- mem->mapped_ptr[slot].region[1] = region[1];
- mem->mapped_ptr[slot].region[2] = region[2];
- }
- mem->map_ref++;
-error:
- if (err != CL_SUCCESS)
- *mem_ptr = NULL;
- return err;
-}
-
-void *
-clEnqueueMapBuffer(cl_command_queue command_queue,
- cl_mem buffer,
- cl_bool blocking_map,
- cl_map_flags map_flags,
- size_t offset,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event,
- cl_int * errcode_ret)
-{
- cl_int err = CL_SUCCESS;
- void *ptr = NULL;
- void *mem_ptr = NULL;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(buffer);
- if (command_queue->ctx != buffer->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!size || offset + size > buffer->size) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((map_flags & CL_MAP_READ &&
- buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) ||
- (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) &&
- buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)))
- {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
-#ifdef HAS_CMRT
- if (command_queue->cmrt_event != NULL)
- cmrt_wait_for_task_finished(command_queue);
-#endif
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueMapBuffer;
- data->mem_obj = buffer;
- data->offset = offset;
- data->size = size;
- data->ptr = ptr;
- data->unsync_map = 1;
- if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
- data->write_map = 1;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- data->unsync_map = 0;
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if (err != CL_SUCCESS)
- goto error;
- ptr = data->ptr;
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- } else {
- if (buffer->is_userptr)
- ptr = buffer->host_ptr;
- else {
- if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
- err = CL_MAP_FAILURE;
- goto error;
- }
- }
- }
- err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL);
- if (err != CL_SUCCESS)
- goto error;
-
-error:
- if (errcode_ret)
- *errcode_ret = err;
- return mem_ptr;
-}
-
-void *
-clEnqueueMapImage(cl_command_queue command_queue,
- cl_mem mem,
- cl_bool blocking_map,
- cl_map_flags map_flags,
- const size_t * porigin,
- const size_t * pregion,
- size_t * image_row_pitch,
- size_t * image_slice_pitch,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event,
- cl_int * errcode_ret)
-{
- cl_int err = CL_SUCCESS;
- void *ptr = NULL;
- void *mem_ptr = NULL;
- size_t offset = 0;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_IMAGE(mem, image);
- FIXUP_IMAGE_REGION(image, pregion, region);
- FIXUP_IMAGE_ORIGIN(image, porigin, origin);
- if (command_queue->ctx != mem->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (!image_row_pitch || (image->slice_pitch && !image_slice_pitch)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if ((map_flags & CL_MAP_READ &&
- mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) ||
- (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) &&
- mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)))
- {
- err = CL_INVALID_OPERATION;
- goto error;
- }
-
-#ifdef HAS_CMRT
- if (command_queue->cmrt_event != NULL)
- cmrt_wait_for_task_finished(command_queue);
-#endif
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueMapImage;
- data->mem_obj = mem;
- data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2];
- data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
- data->ptr = ptr;
- data->unsync_map = 1;
- if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
- data->write_map = 1;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
- data->unsync_map = 0;
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if (err != CL_SUCCESS)
- goto error;
- ptr = data->ptr;
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- } else {
- if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
- err = CL_MAP_FAILURE;
- goto error;
- }
- }
-
- if(mem->flags & CL_MEM_USE_HOST_PTR) {
- if (image_slice_pitch)
- *image_slice_pitch = image->host_slice_pitch;
- *image_row_pitch = image->host_row_pitch;
-
- offset = image->bpp*origin[0] + image->host_row_pitch*origin[1] + image->host_slice_pitch*origin[2];
- } else {
- if (image_slice_pitch)
- *image_slice_pitch = image->slice_pitch;
- if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- *image_row_pitch = image->slice_pitch;
- else
- *image_row_pitch = image->row_pitch;
-
- offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
- }
- err = _cl_map_mem(mem, ptr, &mem_ptr, offset, 0, origin, region);
-
-error:
- if (errcode_ret)
- *errcode_ret = err;
- return mem_ptr; //TODO: map and unmap first
-}
-
-cl_int
-clEnqueueUnmapMemObject(cl_command_queue command_queue,
- cl_mem memobj,
- void * mapped_ptr,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_MEM(memobj);
- if (command_queue->ctx != memobj->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, memobj->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueUnmapMemObject;
- data->mem_obj = memobj;
- data->ptr = mapped_ptr;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_UNMAP_MEM_OBJECT) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueMigrateMemObjects(cl_command_queue command_queue,
- cl_uint num_mem_objects,
- const cl_mem * mem_objects,
- cl_mem_migration_flags flags,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- /* So far, we just support 1 device and no subdevice. So all the command queues
- belong to the small context. There is no need to migrate the mem objects by now. */
- cl_int err = CL_SUCCESS;
- cl_uint i = 0;
- enqueue_data *data, defer_enqueue_data = { 0 };
-
- if (!flags & CL_MIGRATE_MEM_OBJECT_HOST)
- CHECK_QUEUE(command_queue);
-
- if (num_mem_objects == 0 || mem_objects == NULL) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- if (flags && flags & ~(CL_MIGRATE_MEM_OBJECT_HOST |
- CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- for (i = 0; i < num_mem_objects; i++) {
- CHECK_MEM(mem_objects[i]);
- if (mem_objects[i]->ctx != command_queue->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
- }
-
- /* really nothing to do, fill the event. */
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
- data = &defer_enqueue_data;
- data->type = EnqueueMigrateMemObj;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueNDRangeKernel(cl_command_queue command_queue,
- cl_kernel kernel,
- cl_uint work_dim,
- const size_t * global_work_offset,
- const size_t * global_work_size,
- const size_t * local_work_size,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- size_t fixed_global_off[] = {0,0,0};
- size_t fixed_global_sz[] = {1,1,1};
- size_t fixed_local_sz[] = {1,1,1};
- cl_int err = CL_SUCCESS;
- cl_uint i;
- enqueue_data *data, no_wait_data = { 0 };
-
- CHECK_QUEUE(command_queue);
- CHECK_KERNEL(kernel);
-
- /* Check number of dimensions we have */
- if (UNLIKELY(work_dim == 0 || work_dim > 3)) {
- err = CL_INVALID_WORK_DIMENSION;
- goto error;
- }
-
- /* We need a work size per dimension */
- if (UNLIKELY(global_work_size == NULL)) {
- err = CL_INVALID_GLOBAL_WORK_SIZE;
- goto error;
- }
-
- if (kernel->vme) {
- if (work_dim != 2) {
- err = CL_INVALID_WORK_DIMENSION;
- goto error;
- }
- if (local_work_size != NULL) {
- err = CL_INVALID_WORK_GROUP_SIZE;
- goto error;
- }
- }
-
- if (global_work_offset != NULL)
- for (i = 0; i < work_dim; ++i) {
- if (UNLIKELY(global_work_offset[i] + global_work_size[i] > (size_t)-1)) {
- err = CL_INVALID_GLOBAL_OFFSET;
- goto error;
- }
- }
-
- /* Local sizes must be non-null and divide global sizes */
- if (local_work_size != NULL)
- for (i = 0; i < work_dim; ++i)
- if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) {
- err = CL_INVALID_WORK_GROUP_SIZE;
- goto error;
- }
-
- /* Queue and kernel must share the same context */
- assert(kernel->program);
- if (command_queue->ctx != kernel->program->ctx) {
- err = CL_INVALID_CONTEXT;
- goto error;
- }
-
-#ifdef HAS_CMRT
- if (kernel->cmrt_kernel != NULL) {
- err = cmrt_enqueue(command_queue, kernel, global_work_size, local_work_size);
- goto error;
- }
-#endif
-
- /* XXX No event right now */
- //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported");
- //FATAL_IF(event_wait_list != NULL, "Events are not supported");
- //FATAL_IF(event != NULL, "Events are not supported");
-
- if (local_work_size != NULL) {
- for (i = 0; i < work_dim; ++i)
- fixed_local_sz[i] = local_work_size[i];
- } else {
- if (kernel->vme) {
- fixed_local_sz[0] = 16;
- fixed_local_sz[1] = 1;
- } else {
- uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256; //MAX_WORK_GROUP_SIZE may too large
- size_t realGroupSize = 1;
- for (i = 0; i< work_dim; i++) {
- for (j = maxDimSize; j > 1; j--) {
- if (global_work_size[i] % j == 0 && j <= maxGroupSize) {
- fixed_local_sz[i] = j;
- maxGroupSize = maxGroupSize /j;
- maxDimSize = maxGroupSize > maxDimSize ? maxDimSize : maxGroupSize;
- break; //choose next work_dim
- }
- }
- realGroupSize *= fixed_local_sz[i];
- }
-
- //in a loop of conformance test (such as test_api repeated_setup_cleanup), in each loop:
- //create a new context, a new command queue, and uses 'globalsize[0]=1000, localsize=NULL' to enqueu kernel
- //it triggers the following message for many times.
- //to avoid too many messages, only print it for the first time of the process.
- //just use static variable since it doesn't matter to print a few times at multi-thread case.
- static int warn_no_good_localsize = 1;
- if (realGroupSize % 8 != 0 && warn_no_good_localsize) {
- warn_no_good_localsize = 0;
- DEBUGP(DL_WARNING, "unable to find good values for local_work_size[i], please provide local_work_size[] explicitly, you can find good values with trial-and-error method.");
- }
- }
- }
-
- if (kernel->vme) {
- fixed_global_sz[0] = (global_work_size[0]+15) / 16 * 16;
- fixed_global_sz[1] = (global_work_size[1]+15) / 16;
- } else {
- for (i = 0; i < work_dim; ++i)
- fixed_global_sz[i] = global_work_size[i];
- }
- if (global_work_offset != NULL)
- for (i = 0; i < work_dim; ++i)
- fixed_global_off[i] = global_work_offset[i];
-
- if (kernel->compile_wg_sz[0] || kernel->compile_wg_sz[1] || kernel->compile_wg_sz[2]) {
- if (fixed_local_sz[0] != kernel->compile_wg_sz[0]
- || fixed_local_sz[1] != kernel->compile_wg_sz[1]
- || fixed_local_sz[2] != kernel->compile_wg_sz[2])
- {
- err = CL_INVALID_WORK_GROUP_SIZE;
- goto error;
- }
- }
-
- /* Do device specific checks are enqueue the kernel */
- err = cl_command_queue_ND_range(command_queue,
- kernel,
- work_dim,
- fixed_global_off,
- fixed_global_sz,
- fixed_local_sz);
- if(err != CL_SUCCESS)
- goto error;
-
- data = &no_wait_data;
- data->type = EnqueueNDRangeKernel;
- data->queue = command_queue;
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_NDRANGE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) {
- if (event && (*event)->type != CL_COMMAND_USER
- && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
- }
-
- err = cl_command_queue_flush(command_queue);
- }
-
-error:
- if(b_output_kernel_perf)
- {
- if(kernel->program->build_opts != NULL)
- time_end(command_queue->ctx, cl_kernel_get_name(kernel), kernel->program->build_opts, command_queue);
- else
- time_end(command_queue->ctx, cl_kernel_get_name(kernel), "", command_queue);
- }
-
- return err;
-}
-
-cl_int
-clEnqueueTask(cl_command_queue command_queue,
- cl_kernel kernel,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- const size_t global_size[3] = {1, 0, 0};
- const size_t local_size[3] = {1, 0, 0};
-
- return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size,
- num_events_in_wait_list, event_wait_list, event);
-}
-
-cl_int
-clEnqueueNativeKernel(cl_command_queue command_queue,
- void (*user_func)(void *),
- void * args,
- size_t cb_args,
- cl_uint num_mem_objects,
- const cl_mem * mem_list,
- const void ** args_mem_loc,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
-{
- cl_int err = CL_SUCCESS;
- void *new_args = NULL;
- enqueue_data *data, no_wait_data = { 0 };
- cl_int i;
-
- if(user_func == NULL ||
- (args == NULL && cb_args > 0) ||
- (args == NULL && num_mem_objects ==0) ||
- (args != NULL && cb_args == 0) ||
- (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) ||
- (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- //Per spec, need copy args
- if (cb_args)
- {
- new_args = malloc(cb_args);
- if (!new_args)
- {
- err = CL_OUT_OF_HOST_MEMORY;
- goto error;
- }
- memcpy(new_args, args, cb_args);
-
- for (i=0; i<num_mem_objects; ++i)
- {
- CHECK_MEM(mem_list[i]);
- args_mem_loc[i] = new_args + (args_mem_loc[i] - args); //change to new args
- }
- }
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
-
- data = &no_wait_data;
- data->type = EnqueueNativeKernel;
- data->mem_list = mem_list;
- data->ptr = new_args;
- data->size = cb_args;
- data->offset = (size_t)num_mem_objects;
- data->const_ptr = args_mem_loc;
- data->user_func = user_func;
-
- if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
- event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) {
- err = cl_enqueue_handle(event ? *event : NULL, data);
- if(event) cl_event_set_status(*event, CL_COMPLETE);
- }
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueMarker(cl_command_queue command_queue,
- cl_event *event)
-{
- cl_int err = CL_SUCCESS;
- CHECK_QUEUE(command_queue);
- if(event == NULL) {
- err = CL_INVALID_VALUE;
- goto error;
- }
-
- cl_event_marker_with_wait_list(command_queue, 0, NULL, event);
-error:
- return err;
-}
-
-cl_int
-clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event *event)
-{
- cl_int err = CL_SUCCESS;
- CHECK_QUEUE(command_queue);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
-
- cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event);
-error:
- return err;
-}
-
-cl_int
-clEnqueueWaitForEvents(cl_command_queue command_queue,
- cl_uint num_events,
- const cl_event * event_list)
-{
- cl_int err = CL_SUCCESS;
- CHECK_QUEUE(command_queue);
- err = clWaitForEvents(num_events, event_list);
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueBarrier(cl_command_queue command_queue)
-{
- cl_int err = CL_SUCCESS;
- CHECK_QUEUE(command_queue);
-
- cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL);
-
-error:
- return err;
-}
-
-cl_int
-clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event *event)
-{
- cl_int err = CL_SUCCESS;
- CHECK_QUEUE(command_queue);
-
- TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
-
- cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event);
-error:
- return err;
-}
-
#define EXTFUNC(x) \
if (strcmp(#x, func_name) == 0) \
return (void *)x;
diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
index a1075d7..ef494e6 100644
--- a/src/cl_api_kernel.c
+++ b/src/cl_api_kernel.c
@@ -130,10 +130,19 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
}
realGroupSize *= fixed_local_sz[i];
}
- if (realGroupSize % 8 != 0)
+
+ //in a loop of conformance test (such as test_api repeated_setup_cleanup), in each loop:
+ //create a new context, a new command queue, and uses 'globalsize[0]=1000, localsize=NULL' to enqueu kernel
+ //it triggers the following message for many times.
+ //to avoid too many messages, only print it for the first time of the process.
+ //just use static variable since it doesn't matter to print a few times at multi-thread case.
+ static int warn_no_good_localsize = 1;
+ if (realGroupSize % 8 != 0 && warn_no_good_localsize) {
+ warn_no_good_localsize = 0;
DEBUGP(DL_WARNING, "unable to find good values for local_work_size[i], please provide\n"
- " local_work_size[] explicitly, you can find good values with\n"
- " trial-and-error method.");
+ " local_work_size[] explicitly, you can find good values with\n"
+ " trial-and-error method.");
+ }
}
}
@@ -253,10 +262,10 @@ clEnqueueNativeKernel(cl_command_queue command_queue,
//Per spec, need copy args
if (cb_args) {
- new_args = CL_MALLOC(cb_args);
+ new_args = cl_malloc(cb_args);
if (num_mem_objects) {
- new_args_mem_loc = CL_MALLOC(sizeof(void *) * num_mem_objects);
- new_mem_list = CL_MALLOC(sizeof(cl_mem) * num_mem_objects);
+ new_args_mem_loc = cl_malloc(sizeof(void *) * num_mem_objects);
+ new_mem_list = cl_malloc(sizeof(cl_mem) * num_mem_objects);
memcpy(new_mem_list, mem_list, sizeof(cl_mem) * num_mem_objects);
}
@@ -320,11 +329,11 @@ clEnqueueNativeKernel(cl_command_queue command_queue,
if (err != CL_SUCCESS) {
if (new_args)
- CL_FREE(new_args);
+ cl_free(new_args);
if (new_mem_list)
- CL_FREE(new_mem_list);
+ cl_free(new_mem_list);
if (new_args_mem_loc)
- CL_FREE(new_args_mem_loc);
+ cl_free(new_args_mem_loc);
}
if (err == CL_SUCCESS && event) {
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 8d3c6b0..54a487c 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -45,14 +45,16 @@ cl_command_queue_new(cl_context ctx)
assert(ctx);
TRY_ALLOC_NO_ERR (queue, CALLOC(struct _cl_command_queue));
CL_OBJECT_INIT_BASE(queue, CL_OBJECT_COMMAND_QUEUE_MAGIC);
+ cl_command_queue_init_enqueue(queue);
- queue->cmrt_event = NULL;
if ((queue->thread_data = cl_thread_data_create()) == NULL) {
goto error;
}
/* Append the command queue in the list */
cl_context_add_queue(ctx, queue);
+ queue->ctx = ctx;
+ queue->cmrt_event = NULL;
exit:
return queue;
@@ -69,6 +71,8 @@ cl_command_queue_delete(cl_command_queue queue)
if (CL_OBJECT_DEC_REF(queue) > 1)
return;
+ cl_command_queue_destroy_enqueue(queue);
+
#ifdef HAS_CMRT
if (queue->cmrt_event != NULL)
cmrt_destroy_event(queue);
@@ -76,7 +80,7 @@ cl_command_queue_delete(cl_command_queue queue)
// If there is a list of valid events, we need to give them
// a chance to call the call-back function.
- cl_event_update_last_events(queue,1);
+ //cl_event_update_last_events(queue,1);
cl_thread_data_destroy(queue);
queue->thread_data = NULL;
@@ -112,10 +116,9 @@ set_image_info(char *curbe,
}
LOCAL cl_int
-cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
+cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgpu)
{
uint32_t i;
- GET_QUEUE_THREAD_GPGPU(queue);
for (i = 0; i < k->image_sz; i++) {
int id = k->images[i].arg_idx;
@@ -149,9 +152,9 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
}
LOCAL cl_int
-cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
+cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgpu)
{
- GET_QUEUE_THREAD_GPGPU(queue);
+ //GET_QUEUE_THREAD_GPGPU(queue);
/* Bind all user buffers (given by clSetKernelArg) */
uint32_t i;
@@ -175,7 +178,8 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
return CL_SUCCESS;
}
-extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, uint32_t, const size_t *, const size_t *, const size_t *);
+extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, cl_event,
+ uint32_t, const size_t *, const size_t *, const size_t *);
static cl_int
cl_kernel_check_args(cl_kernel k)
@@ -190,6 +194,7 @@ cl_kernel_check_args(cl_kernel k)
LOCAL cl_int
cl_command_queue_ND_range(cl_command_queue queue,
cl_kernel k,
+ cl_event event,
const uint32_t work_dim,
const size_t *global_wk_off,
const size_t *global_wk_sz,
@@ -203,8 +208,10 @@ cl_command_queue_ND_range(cl_command_queue queue,
/* Check that the user did not forget any argument */
TRY (cl_kernel_check_args, k);
+
if (ver == 7 || ver == 75 || ver == 8 || ver == 9)
- TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, global_wk_sz, local_wk_sz);
+ TRY (cl_command_queue_ND_range_gen7, queue, k, event,
+ work_dim, global_wk_off, global_wk_sz, local_wk_sz);
else
FATAL ("Unknown Gen Device");
@@ -213,7 +220,7 @@ error:
}
LOCAL int
-cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
+cl_command_queue_flush_gpgpu(cl_gpgpu gpgpu)
{
void* printf_info = cl_gpgpu_get_printf_info(gpgpu);
void* profiling_info;
@@ -246,15 +253,15 @@ cl_command_queue_flush(cl_command_queue queue)
{
int err;
GET_QUEUE_THREAD_GPGPU(queue);
- err = cl_command_queue_flush_gpgpu(queue, gpgpu);
+ err = cl_command_queue_flush_gpgpu(gpgpu);
// We now keep a list of uncompleted events and check if they compelte
// every flush. This can make sure all events created have chance to be
// update status, so the callback functions or reference can be handled.
- cl_event_update_last_events(queue,0);
+ //cl_event_update_last_events(queue,0);
cl_event current_event = get_current_event(queue);
if (current_event && err == CL_SUCCESS) {
- err = cl_event_flush(current_event);
+ //err = cl_event_flush(current_event);
set_current_event(queue, NULL);
}
cl_invalid_thread_gpgpu(queue);
@@ -265,7 +272,7 @@ LOCAL cl_int
cl_command_queue_finish(cl_command_queue queue)
{
cl_gpgpu_sync(cl_get_thread_batch_buf(queue));
- cl_event_update_last_events(queue,1);
+ //cl_event_update_last_events(queue,1);
return CL_SUCCESS;
}
@@ -337,72 +344,69 @@ cl_command_queue_remove_event(cl_command_queue queue, cl_event event)
queue->wait_events_num -= 1;
}
-#define DEFAULT_WAIT_EVENTS_SIZE 16
LOCAL void
cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event)
{
- cl_int i=0;
- cl_event *new_list;
+ cl_int i = 0;
+
+ cl_event_add_ref(event);
assert(queue != NULL);
- if(queue->barrier_events == NULL) {
- queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE;
- TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event, queue->barrier_events_size));
+ CL_OBJECT_LOCK(queue);
+
+ if (queue->barrier_events == NULL) {
+ queue->barrier_events_size = 4;
+ queue->barrier_events = cl_calloc(queue->barrier_events_size, sizeof(cl_event));
+ assert(queue->barrier_events);
}
- for(i=0; i<queue->barrier_events_num; i++) {
- if(queue->barrier_events[i] == event)
- return; //is in the barrier_events, need to insert
+ for (i = 0; i<queue->barrier_events_num; i++) {
+ assert(queue->barrier_events[i] != event);
}
if(queue->barrier_events_num < queue->barrier_events_size) {
queue->barrier_events[queue->barrier_events_num++] = event;
+ CL_OBJECT_UNLOCK(queue);
return;
}
- //barrier_events_num == barrier_events_size, array is full
+ /* Array is full, double expand. */
queue->barrier_events_size *= 2;
- TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->barrier_events_size));
- memcpy(new_list, queue->barrier_events, sizeof(cl_event)*queue->barrier_events_num);
- cl_free(queue->barrier_events);
- queue->barrier_events = new_list;
- queue->barrier_events[queue->barrier_events_num++] = event;
- return;
+ queue->barrier_events = cl_realloc(queue->barrier_events,
+ queue->barrier_events_size * sizeof(cl_event));
+ assert(queue->barrier_events);
-exit:
+ queue->barrier_events[queue->barrier_events_num++] = event;
+ CL_OBJECT_UNLOCK(queue);
return;
-error:
- if(queue->barrier_events)
- cl_free(queue->barrier_events);
- queue->barrier_events = NULL;
- queue->barrier_events_size = 0;
- queue->barrier_events_num = 0;
- goto exit;
-
}
LOCAL void
cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event)
{
- cl_int i=0;
+ cl_int i = 0;
+ assert(queue != NULL);
- if(queue->barrier_events_num == 0)
- return;
+ CL_OBJECT_LOCK(queue);
- for(i=0; i<queue->barrier_events_num; i++) {
+ assert(queue->barrier_events_num > 0);
+ assert(queue->barrier_events);
+
+ for(i = 0; i < queue->barrier_events_num; i++) {
if(queue->barrier_events[i] == event)
break;
}
+ assert(i < queue->barrier_events_num); // Must find it.
- if(i == queue->barrier_events_num)
- return;
-
- if(i == queue->barrier_events_num - 1) {
+ if(i == queue->barrier_events_num - 1) { // The last one.
queue->barrier_events[i] = NULL;
} else {
- for(; i<queue->barrier_events_num-1; i++) {
+ for(; i < queue->barrier_events_num - 1; i++) { // Move forward.
queue->barrier_events[i] = queue->barrier_events[i+1];
}
}
queue->barrier_events_num -= 1;
+ CL_OBJECT_UNLOCK(queue);
+
+ cl_event_delete(event);
}
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 34886f8..470cafb 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -81,6 +81,7 @@ extern void cl_command_queue_add_ref(cl_command_queue);
/* Map ND range kernel from OCL API */
extern cl_int cl_command_queue_ND_range(cl_command_queue queue,
cl_kernel ker,
+ cl_event event,
const uint32_t work_dim,
const size_t *global_work_offset,
const size_t *global_work_size,
@@ -93,16 +94,16 @@ extern cl_int cl_command_queue_set_report_buffer(cl_command_queue, cl_mem);
extern cl_int cl_command_queue_flush(cl_command_queue);
/* Flush for the specified gpgpu */
-extern int cl_command_queue_flush_gpgpu(cl_command_queue, cl_gpgpu);
+extern int cl_command_queue_flush_gpgpu(cl_gpgpu);
/* Wait for the completion of the command queue */
extern cl_int cl_command_queue_finish(cl_command_queue);
/* Bind all the surfaces in the GPGPU state */
-extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
+extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel, cl_gpgpu);
/* Bind all the image surfaces in the GPGPU state */
-extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel);
+extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel, cl_gpgpu);
/* Insert a user event to command's wait_events */
extern void cl_command_queue_insert_event(cl_command_queue, cl_event);
diff --git a/src/cl_command_queue_enqueue.c b/src/cl_command_queue_enqueue.c
index 1848d50..7bc6dd3 100644
--- a/src/cl_command_queue_enqueue.c
+++ b/src/cl_command_queue_enqueue.c
@@ -18,7 +18,7 @@
*/
#include "cl_command_queue.h"
-#include "cl_event_new.h"
+#include "cl_event.h"
#include "cl_alloc.h"
#include <stdio.h>
@@ -203,7 +203,7 @@ cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_nu
}
assert(event_num > 0);
- enqueued_list = CL_CALLOC(event_num, sizeof(cl_event));
+ enqueued_list = cl_calloc(event_num, sizeof(cl_event));
assert(enqueued_list);
i = 0;
@@ -265,7 +265,7 @@ cl_command_queue_wait_flush(cl_command_queue queue)
cl_event_delete(enqueued_list[i]);
}
if (enqueued_list)
- CL_FREE(enqueued_list);
+ cl_free(enqueued_list);
return CL_SUCCESS;
}
@@ -315,7 +315,7 @@ cl_command_queue_wait_finish(cl_command_queue queue)
cl_event_delete(enqueued_list[i]);
}
if (enqueued_list)
- CL_FREE(enqueued_list);
+ cl_free(enqueued_list);
return CL_SUCCESS;
}
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index b6a5920..5ad3b8b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -23,6 +23,7 @@
#include "cl_kernel.h"
#include "cl_device_id.h"
#include "cl_mem.h"
+#include "cl_event.h"
#include "cl_utils.h"
#include "cl_alloc.h"
@@ -123,12 +124,12 @@ error:
}
static int
-cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker)
+cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker, cl_gpgpu gpgpu)
{
/* calculate constant buffer size
* we need raw_size & aligned_size
*/
- GET_QUEUE_THREAD_GPGPU(queue);
+ //GET_QUEUE_THREAD_GPGPU(queue);
int32_t arg;
size_t offset = 0;
uint32_t raw_size = 0, aligned_size =0;
@@ -331,12 +332,14 @@ cl_alloc_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num
LOCAL cl_int
cl_command_queue_ND_range_gen7(cl_command_queue queue,
cl_kernel ker,
+ cl_event event,
const uint32_t work_dim,
const size_t *global_wk_off,
const size_t *global_wk_sz,
const size_t *local_wk_sz)
{
- GET_QUEUE_THREAD_GPGPU(queue);
+ //GET_QUEUE_THREAD_GPGPU(queue);
+ cl_gpgpu gpgpu = cl_gpgpu_new(queue->ctx->drv);
cl_context ctx = queue->ctx;
char *final_curbe = NULL; /* Includes them and one sub-buffer per group */
cl_gpgpu_kernel kernel;
@@ -403,9 +406,9 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
}
/* Bind user buffers */
- cl_command_queue_bind_surface(queue, ker);
+ cl_command_queue_bind_surface(queue, ker, gpgpu);
/* Bind user images */
- if(UNLIKELY(err = cl_command_queue_bind_image(queue, ker) != CL_SUCCESS))
+ if(UNLIKELY(err = cl_command_queue_bind_image(queue, ker, gpgpu) != CL_SUCCESS))
return err;
/* Bind all samplers */
if (ker->vme)
@@ -419,7 +422,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
/* Bind a stack if needed */
cl_bind_stack(gpgpu, ker);
- if (cl_upload_constant_buffer(queue, ker) != 0)
+ if (cl_upload_constant_buffer(queue, ker, gpgpu) != 0)
goto error;
cl_gpgpu_states_setup(gpgpu, &kernel);
@@ -440,7 +443,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
batch_sz = cl_kernel_compute_batch_sz(ker);
if (cl_gpgpu_batch_reset(gpgpu, batch_sz) != 0)
goto error;
- cl_set_thread_batch_buf(queue, cl_gpgpu_ref_batch_buf(gpgpu));
+ //cl_set_thread_batch_buf(queue, cl_gpgpu_ref_batch_buf(gpgpu));
cl_gpgpu_batch_start(gpgpu);
/* Issue the GPGPU_WALKER command */
@@ -448,6 +451,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
/* Close the batch buffer and submit it */
cl_gpgpu_batch_end(gpgpu, 0);
+
+ event->exec_data.gpgpu = gpgpu;
+ event->exec_data.type = EnqueueNDRangeKernel;
+
return CL_SUCCESS;
error:
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 54c0ffa..1ac6bc0 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -16,93 +16,101 @@
*
* Author: Rong Yang <rong.r.yang at intel.com>
*/
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <pthread.h>
+//#include "cl_image.h"
#include "cl_enqueue.h"
-#include "cl_image.h"
#include "cl_driver.h"
#include "cl_event.h"
#include "cl_command_queue.h"
#include "cl_utils.h"
+#include "cl_alloc.h"
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
-
-cl_int cl_enqueue_read_buffer(enqueue_data* data)
+static cl_int
+cl_enqueue_read_buffer(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
cl_mem mem = data->mem_obj;
+
+ if (status != CL_COMPLETE)
+ return err;
+
assert(mem->type == CL_MEM_BUFFER_TYPE ||
mem->type == CL_MEM_SUBBUFFER_TYPE);
- struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
//cl_buffer_get_subdata sometime is very very very slow in linux kernel, in skl and chv,
//and it is randomly. So temporary disable it, use map/copy/unmap to read.
//Should re-enable it after find root cause.
if (0 && !mem->is_userptr) {
if (cl_buffer_get_subdata(mem->bo, data->offset + buffer->sub_offset,
- data->size, data->ptr) != 0)
+ data->size, data->ptr) != 0)
err = CL_MAP_FAILURE;
} else {
- void* src_ptr = cl_mem_map_auto(mem, 0);
+ void *src_ptr = cl_mem_map_auto(mem, 0);
if (src_ptr == NULL)
err = CL_MAP_FAILURE;
else {
//sometimes, application invokes read buffer, instead of map buffer, even if userptr is enabled
//memcpy is not necessary for this case
- if (data->ptr != (char*)src_ptr + data->offset + buffer->sub_offset)
- memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset, data->size);
+ if (data->ptr != (char *)src_ptr + data->offset + buffer->sub_offset)
+ memcpy(data->ptr, (char *)src_ptr + data->offset + buffer->sub_offset, data->size);
cl_mem_unmap_auto(mem);
}
}
return err;
}
-cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
+static cl_int
+cl_enqueue_read_buffer_rect(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
- void* src_ptr;
- void* dst_ptr;
+ void *src_ptr;
+ void *dst_ptr;
- const size_t* origin = data->origin;
- const size_t* host_origin = data->host_origin;
- const size_t* region = data->region;
+ const size_t *origin = data->origin;
+ const size_t *host_origin = data->host_origin;
+ const size_t *region = data->region;
cl_mem mem = data->mem_obj;
+
+ if (status != CL_COMPLETE)
+ return err;
+
assert(mem->type == CL_MEM_BUFFER_TYPE ||
mem->type == CL_MEM_SUBBUFFER_TYPE);
- struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
if (!(src_ptr = cl_mem_map_auto(mem, 0))) {
err = CL_MAP_FAILURE;
goto error;
}
- size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2];
- src_ptr = (char*)src_ptr + offset + buffer->sub_offset;
-
- offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
- dst_ptr = (char *)data->ptr + offset;
-
- if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
- (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
- {
- memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
- }
- else {
- cl_uint y, z;
- for (z = 0; z < region[2]; z++) {
- const char* src = src_ptr;
- char* dst = dst_ptr;
- for (y = 0; y < region[1]; y++) {
- memcpy(dst, src, region[0]);
- src += data->row_pitch;
- dst += data->host_row_pitch;
- }
- src_ptr = (char*)src_ptr + data->slice_pitch;
- dst_ptr = (char*)dst_ptr + data->host_slice_pitch;
- }
- }
+ size_t offset = origin[0] + data->row_pitch * origin[1] + data->slice_pitch * origin[2];
+ src_ptr = (char *)src_ptr + offset + buffer->sub_offset;
+
+ offset = host_origin[0] + data->host_row_pitch * host_origin[1] + data->host_slice_pitch * host_origin[2];
+ dst_ptr = (char *)data->ptr + offset;
+
+ if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
+ (region[2] == 1 || (data->slice_pitch == region[0] * region[1] && data->slice_pitch == data->host_slice_pitch))) {
+ memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch * region[1] : data->slice_pitch * region[2]);
+ } else {
+ cl_uint y, z;
+ for (z = 0; z < region[2]; z++) {
+ const char *src = src_ptr;
+ char *dst = dst_ptr;
+ for (y = 0; y < region[1]; y++) {
+ memcpy(dst, src, region[0]);
+ src += data->row_pitch;
+ dst += data->host_row_pitch;
+ }
+ src_ptr = (char *)src_ptr + data->slice_pitch;
+ dst_ptr = (char *)dst_ptr + data->host_slice_pitch;
+ }
+ }
err = cl_mem_unmap_auto(mem);
@@ -110,75 +118,80 @@ error:
return err;
}
-cl_int cl_enqueue_write_buffer(enqueue_data *data)
+static cl_int
+cl_enqueue_write_buffer(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
cl_mem mem = data->mem_obj;
assert(mem->type == CL_MEM_BUFFER_TYPE ||
mem->type == CL_MEM_SUBBUFFER_TYPE);
- struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
+
+ if (status != CL_COMPLETE)
+ return err;
if (mem->is_userptr) {
- void* dst_ptr = cl_mem_map_auto(mem, 1);
+ void *dst_ptr = cl_mem_map_auto(mem, 1);
if (dst_ptr == NULL)
err = CL_MAP_FAILURE;
else {
- memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size);
+ memcpy((char *)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size);
cl_mem_unmap_auto(mem);
}
- }
- else {
+ } else {
if (cl_buffer_subdata(mem->bo, data->offset + buffer->sub_offset,
- data->size, data->const_ptr) != 0)
+ data->size, data->const_ptr) != 0)
err = CL_MAP_FAILURE;
}
return err;
}
-cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
+static cl_int
+cl_enqueue_write_buffer_rect(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
- void* src_ptr;
- void* dst_ptr;
+ void *src_ptr;
+ void *dst_ptr;
- const size_t* origin = data->origin;
- const size_t* host_origin = data->host_origin;
- const size_t* region = data->region;
+ const size_t *origin = data->origin;
+ const size_t *host_origin = data->host_origin;
+ const size_t *region = data->region;
cl_mem mem = data->mem_obj;
assert(mem->type == CL_MEM_BUFFER_TYPE ||
mem->type == CL_MEM_SUBBUFFER_TYPE);
- struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
+
+ if (status != CL_COMPLETE)
+ return err;
if (!(dst_ptr = cl_mem_map_auto(mem, 1))) {
err = CL_MAP_FAILURE;
goto error;
}
- size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2];
+ size_t offset = origin[0] + data->row_pitch * origin[1] + data->slice_pitch * origin[2];
dst_ptr = (char *)dst_ptr + offset + buffer->sub_offset;
- offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
- src_ptr = (char*)data->const_ptr + offset;
+ offset = host_origin[0] + data->host_row_pitch * host_origin[1] + data->host_slice_pitch * host_origin[2];
+ src_ptr = (char *)data->const_ptr + offset;
if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
- (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
- {
- memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
- }
- else {
+ (region[2] == 1 || (data->slice_pitch == region[0] * region[1] && data->slice_pitch == data->host_slice_pitch))) {
+ memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch * region[1] : data->slice_pitch * region[2]);
+ } else {
cl_uint y, z;
for (z = 0; z < region[2]; z++) {
- const char* src = src_ptr;
- char* dst = dst_ptr;
+ const char *src = src_ptr;
+ char *dst = dst_ptr;
for (y = 0; y < region[1]; y++) {
memcpy(dst, src, region[0]);
src += data->host_row_pitch;
dst += data->row_pitch;
}
- src_ptr = (char*)src_ptr + data->host_slice_pitch;
- dst_ptr = (char*)dst_ptr + data->slice_pitch;
+ src_ptr = (char *)src_ptr + data->host_slice_pitch;
+ dst_ptr = (char *)dst_ptr + data->slice_pitch;
}
}
@@ -188,16 +201,19 @@ error:
return err;
}
-
-cl_int cl_enqueue_read_image(enqueue_data *data)
+static cl_int
+cl_enqueue_read_image(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
- void* src_ptr;
+ void *src_ptr;
cl_mem mem = data->mem_obj;
CHECK_IMAGE(mem, image);
- const size_t* origin = data->origin;
- const size_t* region = data->region;
+ const size_t *origin = data->origin;
+ const size_t *region = data->region;
+
+ if (status != CL_COMPLETE)
+ return err;
if (!(src_ptr = cl_mem_map_auto(mem, 0))) {
err = CL_MAP_FAILURE;
@@ -208,40 +224,42 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
src_ptr = (char*)src_ptr + offset;
if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch &&
- (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch)))
- {
- memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
- }
- else {
+ (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch))) {
+ memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch * region[1] : data->slice_pitch * region[2]);
+ } else {
cl_uint y, z;
for (z = 0; z < region[2]; z++) {
- const char* src = src_ptr;
- char* dst = data->ptr;
+ const char *src = src_ptr;
+ char *dst = data->ptr;
for (y = 0; y < region[1]; y++) {
- memcpy(dst, src, image->bpp*region[0]);
+ memcpy(dst, src, image->bpp * region[0]);
src += image->row_pitch;
dst += data->row_pitch;
}
- src_ptr = (char*)src_ptr + image->slice_pitch;
- data->ptr = (char*)data->ptr + data->slice_pitch;
+ src_ptr = (char *)src_ptr + image->slice_pitch;
+ data->ptr = (char *)data->ptr + data->slice_pitch;
}
}
- err = cl_mem_unmap_auto(mem);
+ err = cl_mem_unmap_auto(mem);
error:
return err;
-
}
-cl_int cl_enqueue_write_image(enqueue_data *data)
+static cl_int
+cl_enqueue_write_image(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
- void* dst_ptr;
+ void *dst_ptr;
cl_mem mem = data->mem_obj;
+
CHECK_IMAGE(mem, image);
+ if (status != CL_COMPLETE)
+ return err;
+
if (!(dst_ptr = cl_mem_map_auto(mem, 1))) {
err = CL_MAP_FAILURE;
goto error;
@@ -255,45 +273,57 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
error:
return err;
-
}
-cl_int cl_enqueue_map_buffer(enqueue_data *data)
+static cl_int
+cl_enqueue_map_buffer(enqueue_data *data, cl_int status)
{
void *ptr = NULL;
cl_int err = CL_SUCCESS;
cl_mem mem = data->mem_obj;
assert(mem->type == CL_MEM_BUFFER_TYPE ||
mem->type == CL_MEM_SUBBUFFER_TYPE);
- struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
- if (mem->is_userptr)
- ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
- else {
- if(data->unsync_map == 1)
- //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- ptr = cl_mem_map_gtt(mem);
- else
+ if (status == CL_SUBMITTED) {
+ if (buffer->base.is_userptr) {
+ ptr = buffer->base.host_ptr;
+ } else {
+ if ((ptr = cl_mem_map_gtt_unsync(&buffer->base)) == NULL) {
+ err = CL_MAP_FAILURE;
+ return err;
+ }
+ }
+ data->ptr = ptr;
+ } else if (status == CL_COMPLETE) {
+ if (mem->is_userptr)
ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
- }
+ else {
+ if (data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+ }
- if (ptr == NULL) {
- err = CL_MAP_FAILURE;
- goto error;
- }
- data->ptr = ptr;
+ if (ptr == NULL) {
+ err = CL_MAP_FAILURE;
+ return err;
+ }
+ data->ptr = ptr;
- if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
- assert(mem->host_ptr);
- ptr = (char*)ptr + data->offset + buffer->sub_offset;
- memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size);
+ if ((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
+ assert(mem->host_ptr);
+ ptr = (char *)ptr + data->offset + buffer->sub_offset;
+ memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size);
+ }
}
-error:
return err;
}
-cl_int cl_enqueue_map_image(enqueue_data *data)
+static cl_int
+cl_enqueue_map_image(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
cl_mem mem = data->mem_obj;
@@ -301,46 +331,59 @@ cl_int cl_enqueue_map_image(enqueue_data *data)
size_t row_pitch = 0;
CHECK_IMAGE(mem, image);
- if(data->unsync_map == 1)
- //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- ptr = cl_mem_map_gtt(mem);
- else
- ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+ if (status == CL_SUBMITTED) {
+ if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
+ data->ptr = ptr;
+ } else if (status == CL_COMPLETE) {
+ if (data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
+
+ if (ptr == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
- if (ptr == NULL) {
- err = CL_MAP_FAILURE;
- goto error;
- }
- data->ptr = (char*)ptr + image->offset;
- if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- row_pitch = image->slice_pitch;
- else
- row_pitch = image->row_pitch;
-
- if(mem->flags & CL_MEM_USE_HOST_PTR) {
- assert(mem->host_ptr);
- if (!mem->is_userptr)
- //src and dst need add offset in function cl_mem_copy_image_region
- cl_mem_copy_image_region(data->origin, data->region,
- mem->host_ptr, image->host_row_pitch, image->host_slice_pitch,
- data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE, CL_TRUE);
+ data->ptr = (char*)ptr + image->offset;
+ if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ row_pitch = image->slice_pitch;
+ else
+ row_pitch = image->row_pitch;
+
+ if(mem->flags & CL_MEM_USE_HOST_PTR) {
+ assert(mem->host_ptr);
+ if (!mem->is_userptr)
+ //src and dst need add offset in function cl_mem_copy_image_region
+ cl_mem_copy_image_region(data->origin, data->region,
+ mem->host_ptr, image->host_row_pitch, image->host_slice_pitch,
+ data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE, CL_TRUE);
+ }
}
error:
return err;
}
-cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
+static cl_int
+cl_enqueue_unmap_mem_object(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
int i, j;
size_t mapped_size = 0;
size_t origin[3], region[3];
- void * v_ptr = NULL;
- void * mapped_ptr = data->ptr;
+ void *v_ptr = NULL;
+ void *mapped_ptr = data->ptr;
cl_mem memobj = data->mem_obj;
size_t row_pitch = 0;
+ if (status != CL_COMPLETE)
+ return err;
+
assert(memobj->mapped_ptr_sz >= memobj->map_ref);
INVALID_VALUE_IF(!mapped_ptr);
for (i = 0; i < memobj->mapped_ptr_sz; i++) {
@@ -348,7 +391,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
memobj->mapped_ptr[i].ptr = NULL;
mapped_size = memobj->mapped_ptr[i].size;
v_ptr = memobj->mapped_ptr[i].v_ptr;
- for(j=0; j<3; j++) {
+ for (j = 0; j < 3; j++) {
region[j] = memobj->mapped_ptr[i].region[j];
origin[j] = memobj->mapped_ptr[i].origin[j];
memobj->mapped_ptr[i].region[j] = 0;
@@ -364,10 +407,10 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
if (memobj->flags & CL_MEM_USE_HOST_PTR) {
- if(memobj->type == CL_MEM_BUFFER_TYPE ||
- memobj->type == CL_MEM_SUBBUFFER_TYPE) {
+ if (memobj->type == CL_MEM_BUFFER_TYPE ||
+ memobj->type == CL_MEM_SUBBUFFER_TYPE) {
assert(mapped_ptr >= memobj->host_ptr &&
- mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
+ mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
/* Sync the data. */
if (!memobj->is_userptr)
memcpy(v_ptr, mapped_ptr, mapped_size);
@@ -381,8 +424,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
if (!memobj->is_userptr)
//v_ptr have added offset, host_ptr have not added offset.
cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image->slice_pitch,
- memobj->host_ptr, image->host_row_pitch, image->host_slice_pitch,
- image, CL_FALSE, CL_TRUE);
+ memobj->host_ptr, image->host_row_pitch, image->host_slice_pitch,
+ image, CL_FALSE, CL_TRUE);
}
} else {
assert(v_ptr == mapped_ptr);
@@ -391,24 +434,24 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
cl_mem_unmap_auto(memobj);
/* shrink the mapped slot. */
- if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
+ if (memobj->mapped_ptr_sz / 2 > memobj->map_ref) {
int j = 0;
cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
- sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
+ sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz / 2));
if (!new_ptr) {
/* Just do nothing. */
goto error;
}
- memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
+ memset(new_ptr, 0, (memobj->mapped_ptr_sz / 2) * sizeof(cl_mapped_ptr));
for (i = 0; i < memobj->mapped_ptr_sz; i++) {
if (memobj->mapped_ptr[i].ptr) {
new_ptr[j] = memobj->mapped_ptr[i];
j++;
- assert(j < memobj->mapped_ptr_sz/2);
+ assert(j < memobj->mapped_ptr_sz / 2);
}
}
- memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
+ memobj->mapped_ptr_sz = memobj->mapped_ptr_sz / 2;
free(memobj->mapped_ptr);
memobj->mapped_ptr = new_ptr;
}
@@ -417,7 +460,8 @@ error:
return err;
}
-cl_int cl_enqueue_native_kernel(enqueue_data *data)
+static cl_int
+cl_enqueue_native_kernel(enqueue_data *data, cl_int status)
{
cl_int err = CL_SUCCESS;
cl_uint num_mem_objects = (cl_uint)data->offset;
@@ -425,18 +469,19 @@ cl_int cl_enqueue_native_kernel(enqueue_data *data)
const void **args_mem_loc = (const void **)data->const_ptr;
cl_uint i;
- for (i=0; i<num_mem_objects; ++i)
- {
- const cl_mem buffer = mem_list[i];
- CHECK_MEM(buffer);
+ if (status != CL_COMPLETE)
+ return err;
+
+ for (i = 0; i < num_mem_objects; ++i) {
+ const cl_mem buffer = mem_list[i];
+ CHECK_MEM(buffer);
- *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer, 0);
+ *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer, 0);
}
data->user_func(data->ptr);
- for (i=0; i<num_mem_objects; ++i)
- {
- cl_mem_unmap_auto(mem_list[i]);
+ for (i = 0; i < num_mem_objects; ++i) {
+ cl_mem_unmap_auto(mem_list[i]);
}
free(data->ptr);
@@ -444,46 +489,113 @@ error:
return err;
}
-cl_int cl_enqueue_handle(cl_event event, enqueue_data* data)
+static cl_int
+cl_enqueue_ndrange(enqueue_data *data, cl_int status)
{
- /* if need profiling, add the submit timestamp here. */
- if (event && event->type != CL_COMMAND_USER
- && event->queue->props & CL_QUEUE_PROFILING_ENABLE) {
- cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT);
+ cl_int err = CL_SUCCESS;
+
+ if (status == CL_SUBMITTED) {
+ err = cl_command_queue_flush_gpgpu(data->gpgpu);
+ } else if (status == CL_COMPLETE) {
+ void *batch_buf = cl_gpgpu_ref_batch_buf(data->gpgpu);
+ cl_gpgpu_sync(batch_buf);
+ cl_gpgpu_unref_batch_buf(batch_buf);
+ /* Finished, we can release the gpgpu now. */
+ cl_gpgpu_delete(data->gpgpu);
+ data->gpgpu = NULL;
}
- switch(data->type) {
- case EnqueueReadBuffer:
- return cl_enqueue_read_buffer(data);
- case EnqueueReadBufferRect:
- return cl_enqueue_read_buffer_rect(data);
- case EnqueueWriteBuffer:
- return cl_enqueue_write_buffer(data);
- case EnqueueWriteBufferRect:
- return cl_enqueue_write_buffer_rect(data);
- case EnqueueReadImage:
- return cl_enqueue_read_image(data);
- case EnqueueWriteImage:
- return cl_enqueue_write_image(data);
- case EnqueueMapBuffer:
- return cl_enqueue_map_buffer(data);
- case EnqueueMapImage:
- return cl_enqueue_map_image(data);
- case EnqueueUnmapMemObject:
- return cl_enqueue_unmap_mem_object(data);
- case EnqueueCopyBufferRect:
- case EnqueueCopyBuffer:
- case EnqueueCopyImage:
- case EnqueueCopyBufferToImage:
- case EnqueueCopyImageToBuffer:
- case EnqueueNDRangeKernel:
- case EnqueueFillBuffer:
- case EnqueueFillImage:
- return cl_event_flush(event);
- case EnqueueNativeKernel:
- return cl_enqueue_native_kernel(data);
- case EnqueueMigrateMemObj:
- default:
- return CL_SUCCESS;
+ return err;
+}
+
+static cl_int
+cl_enqueue_marker_or_barrier(enqueue_data *data, cl_int status)
+{
+ return CL_COMPLETE;
+}
+
+LOCAL void
+cl_enqueue_delete(enqueue_data *data)
+{
+ if (data == NULL)
+ return;
+
+ if (data->type == EnqueueCopyBufferRect ||
+ data->type == EnqueueCopyBuffer ||
+ data->type == EnqueueCopyImage ||
+ data->type == EnqueueCopyBufferToImage ||
+ data->type == EnqueueCopyImageToBuffer ||
+ data->type == EnqueueNDRangeKernel ||
+ data->type == EnqueueFillBuffer ||
+ data->type == EnqueueFillImage) {
+ if (data->gpgpu) {
+ cl_gpgpu_delete(data->gpgpu);
+ data->gpgpu = NULL;
+ }
+ return;
+ }
+
+ if (data->type == EnqueueNativeKernel) {
+ if (data->mem_list) {
+ cl_free((void*)data->mem_list);
+ data->mem_list = NULL;
+ }
+ if (data->ptr) {
+ cl_free((void*)data->ptr);
+ data->ptr = NULL;
+ }
+ if (data->const_ptr) {
+ cl_free((void*)data->const_ptr);
+ data->const_ptr = NULL;
+ }
+ }
+}
+
+LOCAL cl_int
+cl_enqueue_handle(enqueue_data *data, cl_int status)
+{
+ /* if need profiling, add the submit timestamp here. */
+ // if (event && event->event_type != CL_COMMAND_USER &&
+ // event->queue->props & CL_QUEUE_PROFILING_ENABLE) {
+ // cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT);
+ // }
+
+ switch (data->type) {
+ case EnqueueReadBuffer:
+ return cl_enqueue_read_buffer(data, status);
+ case EnqueueReadBufferRect:
+ return cl_enqueue_read_buffer_rect(data, status);
+ case EnqueueWriteBuffer:
+ return cl_enqueue_write_buffer(data, status);
+ case EnqueueWriteBufferRect:
+ return cl_enqueue_write_buffer_rect(data, status);
+ case EnqueueReadImage:
+ return cl_enqueue_read_image(data, status);
+ case EnqueueWriteImage:
+ return cl_enqueue_write_image(data, status);
+ case EnqueueMapBuffer:
+ return cl_enqueue_map_buffer(data, status);
+ case EnqueueMapImage:
+ return cl_enqueue_map_image(data, status);
+ case EnqueueUnmapMemObject:
+ return cl_enqueue_unmap_mem_object(data, status);
+ case EnqueueMarker:
+ case EnqueueBarrier:
+ return cl_enqueue_marker_or_barrier(data, status);
+ case EnqueueCopyBufferRect:
+ case EnqueueCopyBuffer:
+ case EnqueueCopyImage:
+ case EnqueueCopyBufferToImage:
+ case EnqueueCopyImageToBuffer:
+ case EnqueueNDRangeKernel:
+ case EnqueueFillBuffer:
+ case EnqueueFillImage:
+ //return cl_event_flush(event);
+ return cl_enqueue_ndrange(data, status);
+ case EnqueueNativeKernel:
+ return cl_enqueue_native_kernel(data, status);
+ case EnqueueMigrateMemObj:
+ default:
+ return CL_SUCCESS;
}
}
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 09305af..a6cdd3d 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -49,26 +49,29 @@ typedef enum {
} enqueue_type;
typedef struct _enqueue_data {
- enqueue_type type; /* Command type */
- cl_mem mem_obj; /* Enqueue's cl_mem */
- cl_command_queue queue; /* Command queue */
- size_t offset; /* Mem object's offset */
- size_t size; /* Size */
- size_t origin[3]; /* Origin */
- size_t host_origin[3]; /* Origin */
- size_t region[3]; /* Region */
- size_t row_pitch; /* Row pitch */
- size_t slice_pitch; /* Slice pitch */
- size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect */
- size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */
- const void * const_ptr; /* Const ptr for memory read */
- void * ptr; /* Ptr for write and return value */
- const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */
- uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */
- uint8_t write_map; /* Indicate if the clEnqueueMapBuffer is write enable */
- void (*user_func)(void *); /* pointer to a host-callable user function */
+ enqueue_type type; /* Command type */
+ cl_mem mem_obj; /* Enqueue's cl_mem */
+ cl_command_queue queue; /* Command queue */
+ size_t offset; /* Mem object's offset */
+ size_t size; /* Size */
+ size_t origin[3]; /* Origin */
+ size_t host_origin[3]; /* Origin */
+ size_t region[3]; /* Region */
+ size_t row_pitch; /* Row pitch */
+ size_t slice_pitch; /* Slice pitch */
+ size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect */
+ size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */
+ const void *const_ptr; /* Const ptr for memory read */
+ void *ptr; /* Ptr for write and return value */
+ const cl_mem *mem_list; /* mem_list of clEnqueueNativeKernel */
+ uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */
+ uint8_t write_map; /* Indicate if the clEnqueueMapBuffer is write enable */
+ void (*user_func)(void *); /* pointer to a host-callable user function */
+ cl_gpgpu gpgpu;
} enqueue_data;
/* Do real enqueue commands */
-cl_int cl_enqueue_handle(cl_event event, enqueue_data* data);
+extern cl_int cl_enqueue_handle(enqueue_data *data, cl_int status);
+extern void cl_enqueue_delete(enqueue_data *data);
+
#endif /* __CL_ENQUEUE_H__ */
diff --git a/src/cl_event.c b/src/cl_event.c
index 6c7c2e0..4acd619 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -14,750 +14,615 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
- * Author: Rong Yang <rong.r.yang at intel.com>
*/
#include "cl_event.h"
#include "cl_context.h"
-#include "cl_utils.h"
-#include "cl_alloc.h"
-#include "cl_khr_icd.h"
-#include "cl_kernel.h"
#include "cl_command_queue.h"
-
-#include <assert.h>
+#include "cl_alloc.h"
+#include <string.h>
#include <stdio.h>
-void cl_event_update_last_events(cl_command_queue queue, int wait)
+LOCAL cl_int
+cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
{
- cl_event last_event = get_last_event(queue);
- if(!last_event) return;
- cl_event next, now;
- now = last_event;
- while(now){
- next = now->last_next;//get next first in case set status maintain it
- cl_event_update_status(now,wait);//update event status
- now = next;
- }
+ // TODO:
+ return CL_INVALID_VALUE;
}
-void cl_event_insert_last_events(cl_command_queue queue,cl_event event)
+LOCAL cl_ulong
+cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong end_timestamp)
{
- if(!event) return;
- cl_event last_event = get_last_event(queue);
- if(last_event){
- cl_event now = last_event;
- while(now->last_next)
- now = now->last_next;
- now->last_next = event;
- event->last_prev = now;
+ cl_ulong ret_val;
+
+ if (end_timestamp > start_timestamp) {
+ ret_val = end_timestamp - start_timestamp;
+ } else {
+ /*if start time stamp is greater than end timstamp then set ret value to max*/
+ ret_val = ((cl_ulong)1 << 32);
}
- else set_last_event(queue,event);
+
+ return ret_val;
}
-static inline cl_bool
-cl_event_is_gpu_command_type(cl_command_type type)
+LOCAL cl_ulong
+cl_event_get_start_timestamp(cl_event event)
{
- switch(type) {
- case CL_COMMAND_COPY_BUFFER:
- case CL_COMMAND_FILL_BUFFER:
- case CL_COMMAND_COPY_IMAGE:
- case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
- case CL_COMMAND_COPY_BUFFER_TO_IMAGE:
- case CL_COMMAND_COPY_BUFFER_RECT:
- case CL_COMMAND_TASK:
- case CL_COMMAND_NDRANGE_KERNEL:
- return CL_TRUE;
- default:
- return CL_FALSE;
- }
+ cl_ulong ret_val;
+
+ ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event->timestamp[2]);
+
+ return ret_val;
}
-int cl_event_flush(cl_event event)
+LOCAL cl_ulong
+cl_event_get_end_timestamp(cl_event event)
{
- int err = CL_SUCCESS;
- if(!event) {
- err = CL_INVALID_VALUE;
- return err;
- }
+ cl_ulong ret_val;
- assert(event->gpgpu_event != NULL);
- if (event->gpgpu) {
- err = cl_command_queue_flush_gpgpu(event->queue, event->gpgpu);
- cl_gpgpu_delete(event->gpgpu);
- event->gpgpu = NULL;
- }
- cl_gpgpu_event_flush(event->gpgpu_event);
- cl_event_insert_last_events(event->queue,event);
- return err;
+ ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event->timestamp[3]);
+
+ return ret_val;
}
-cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
+LOCAL void
+cl_event_add_ref(cl_event event)
{
- cl_event event = NULL;
- GET_QUEUE_THREAD_GPGPU(queue);
+ assert(event);
+ CL_OBJECT_INC_REF(event);
+}
- /* Allocate and inialize the structure itself */
- TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event));
- CL_OBJECT_INIT_BASE(event, CL_OBJECT_EVENT_MAGIC);
+LOCAL cl_int
+cl_event_get_status(cl_event event)
+{
+ cl_int ret;
+
+ assert(event);
+ CL_OBJECT_LOCK(event);
+ ret = event->status;
+ CL_OBJECT_UNLOCK(event);
+ return ret;
+}
+
+static cl_event
+cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
+ cl_uint num_events, cl_event *event_list)
+{
+ cl_event e = cl_calloc(1, sizeof(_cl_event));
+ if (e == NULL)
+ return NULL;
+
+ CL_OBJECT_INIT_BASE(e, CL_OBJECT_EVENT_MAGIC);
/* Append the event in the context event list */
- cl_context_add_event(ctx, event);
-
- /* Initialize all members and create GPGPU event object */
- event->queue = queue;
- event->type = type;
- event->gpgpu_event = NULL;
- if(type == CL_COMMAND_USER) {
- event->status = CL_SUBMITTED;
+ cl_context_add_event(ctx, e);
+ e->queue = queue;
+
+ list_init(&e->callbacks);
+ list_init(&e->enqueue_node);
+
+ assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_FILL_IMAGE);
+ e->event_type = type;
+ if (type == CL_COMMAND_USER) {
+ e->status = CL_SUBMITTED;
+ } else {
+ e->status = CL_QUEUED;
}
- else {
- event->status = CL_QUEUED;
- if(cl_event_is_gpu_command_type(event->type))
- event->gpgpu_event = cl_gpgpu_event_new(gpgpu);
+
+ if (type == CL_COMMAND_USER) {
+ assert(queue == NULL);
}
- cl_event_add_ref(event); //dec when complete
- event->user_cb = NULL;
- event->enqueue_cb = NULL;
- event->waits_head = NULL;
- event->emplict = emplict;
-
-exit:
- return event;
-error:
- cl_event_delete(event);
- event = NULL;
- goto exit;
+
+ e->depend_events = event_list;
+ e->depend_event_num = num_events;
+ return e;
}
-void cl_event_delete(cl_event event)
+LOCAL void
+cl_event_delete(cl_event event)
{
+ int i;
+ cl_event_user_callback cb;
+
if (UNLIKELY(event == NULL))
return;
- cl_event_update_status(event, 0);
-
if (CL_OBJECT_DEC_REF(event) > 1)
return;
- /* Call all user's callback if haven't execute */
- cl_event_call_callback(event, CL_COMPLETE, CL_TRUE); // CL_COMPLETE status will force all callbacks that are not executed to run
+ cl_enqueue_delete(&event->exec_data);
- /* delete gpgpu event object */
- if(event->gpgpu_event)
- cl_gpgpu_event_delete(event->gpgpu_event);
+ assert(list_empty(&event->enqueue_node));
- /* Remove it from the list */
- cl_context_remove_event(event->ctx, event);
+ if (event->depend_events) {
+ assert(event->depend_event_num);
+ for (i = 0; i < event->depend_event_num; i++) {
+ cl_event_delete(event->depend_events[i]);
+ }
+ cl_free(event->depend_events);
+ }
- if (event->gpgpu) {
- fprintf(stderr, "Warning: a event is deleted with a pending enqueued task.\n");
- cl_gpgpu_delete(event->gpgpu);
- event->gpgpu = NULL;
+ /* Free all the callbacks. Last ref, no need to lock. */
+ while (!list_empty(&event->callbacks)) {
+ cb = list_entry(event->callbacks.next, _cl_event_user_callback, node);
+ list_del(&cb->node);
+ cl_free(cb);
}
+ /* Remove it from the list */
+ assert(event->ctx);
+ cl_context_remove_event(event->ctx, event);
+
CL_OBJECT_DESTROY_BASE(event);
cl_free(event);
}
-void cl_event_add_ref(cl_event event)
+LOCAL cl_event
+cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events,
+ const cl_event *event_list, cl_command_type type, cl_int *errcode_ret)
{
- assert(event);
- CL_OBJECT_INC_REF(event);
-}
+ cl_event e = NULL;
+ cl_event *depend_events = NULL;
+ cl_int err = CL_SUCCESS;
+ cl_uint total_events = 0;
+ int i;
-cl_int cl_event_set_callback(cl_event event ,
- cl_int command_exec_callback_type,
- EVENT_NOTIFY pfn_notify,
- void* user_data)
-{
- assert(event);
- assert(pfn_notify);
+ assert(ctx);
- cl_int err = CL_SUCCESS;
- user_callback *cb;
- TRY_ALLOC(cb, CALLOC(user_callback));
-
- cb->pfn_notify = pfn_notify;
- cb->user_data = user_data;
- cb->status = command_exec_callback_type;
- cb->executed = CL_FALSE;
-
-
- // It is possible that the event enqueued is already completed.
- // clEnqueueReadBuffer can be synchronous and when the callback
- // is registered after, it still needs to get executed.
- CL_OBJECT_LOCK(event); // Thread safety required: operations on the event->status can be made from many different threads
- if(event->status <= command_exec_callback_type) {
- /* Call user callback */
- CL_OBJECT_UNLOCK(event); // pfn_notify can call clFunctions that use the event_lock and from here it's not required
- cb->pfn_notify(event, event->status, cb->user_data);
- cl_free(cb);
- } else {
- // Enqueue to callback list
- cb->next = event->user_cb;
- event->user_cb = cb;
- CL_OBJECT_UNLOCK(event);
- }
+ do {
+ if (event_list)
+ assert(num_events);
-exit:
- return err;
-error:
- err = CL_OUT_OF_HOST_MEMORY;
- cl_free(cb);
- goto exit;
-};
-
-cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event *event,cl_context ctx)
-{
- cl_int err = CL_SUCCESS;
- cl_int i;
- /* check the event_wait_list and num_events_in_wait_list */
- if((event_wait_list == NULL) &&
- (num_events_in_wait_list > 0))
- goto error;
-
- if ((event_wait_list != NULL) &&
- (num_events_in_wait_list == 0)){
- goto error;
- }
+ if (queue == NULL) {
+ assert(type == CL_COMMAND_USER);
+ assert(event_list == NULL);
+ assert(num_events == 0);
- /* check the event and context */
- for(i=0; i<num_events_in_wait_list; i++) {
- CHECK_EVENT(event_wait_list[i]);
- if(event_wait_list[i]->status < CL_COMPLETE) {
- err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
- goto exit;
- }
- if(event && event == &event_wait_list[i])
- goto error;
- if(event_wait_list[i]->ctx != ctx) {
- err = CL_INVALID_CONTEXT;
- goto exit;
- }
- }
+ e = cl_event_new(ctx, queue, type, 0, NULL);
+ if (e == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
+ }
+ } else {
+ CL_OBJECT_LOCK(queue);
+ total_events = queue->barrier_events_num + num_events;
+
+ if (total_events) {
+ depend_events = cl_calloc(total_events, sizeof(cl_event));
+ if (depend_events == NULL) {
+ CL_OBJECT_UNLOCK(queue);
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
+ }
+ }
-exit:
- return err;
-error:
- err = CL_INVALID_EVENT_WAIT_LIST; //reset error
- goto exit;
-}
+ /* Add all the barrier events as depend events. */
+ for (i = 0; i < queue->barrier_events_num; i++) {
+ assert(CL_EVENT_IS_BARRIER(queue->barrier_events[i]));
+ cl_event_add_ref(queue->barrier_events[i]);
+ depend_events[num_events + i] = queue->barrier_events[i];
+ }
-cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
- cl_command_queue queue)
-{
- cl_int i;
+ CL_OBJECT_UNLOCK(queue);
- /* Check whether wait user events */
- for(i=0; i<num_events_in_wait_list; i++) {
- if(event_wait_list[i]->status <= CL_COMPLETE)
- continue;
+ for (i = 0; i < num_events; i++) {
+ assert(event_list[i]);
+ assert(event_list[i]->ctx == ctx);
+ assert(CL_OBJECT_IS_EVENT(event_list[i]));
+ cl_event_add_ref(event_list[i]);
+ depend_events[i] = event_list[i];
+ }
- /* Need wait on user event, return and do enqueue defer */
- if((event_wait_list[i]->type == CL_COMMAND_USER) ||
- (event_wait_list[i]->enqueue_cb &&
- (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){
- return CL_ENQUEUE_EXECUTE_DEFER;
- }
- }
+ if (depend_events)
+ assert(total_events);
- if(queue && queue->barrier_events_num )
- return CL_ENQUEUE_EXECUTE_DEFER;
+ e = cl_event_new(ctx, queue, type, total_events, depend_events);
+ if (e == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
+ }
+ depend_events = NULL;
+ }
+ } while (0);
- /* Non user events or all user event finished, wait all enqueue events finish */
- for(i=0; i<num_events_in_wait_list; i++) {
- if(event_wait_list[i]->status <= CL_COMPLETE)
- continue;
+ if (err != CL_SUCCESS) {
+ if (depend_events) {
+ for (i = 0; i < total_events; i++) {
+ cl_event_delete(depend_events[i]);
+ }
+ cl_free(depend_events);
+ }
- //enqueue callback haven't finish, in another thread, wait
- if(event_wait_list[i]->enqueue_cb != NULL)
- return CL_ENQUEUE_EXECUTE_DEFER;
- if(event_wait_list[i]->gpgpu_event)
- cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
- cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback
+ // if set depend_events, must succeed.
+ assert(e->depend_events == NULL);
+ cl_event_delete(e);
}
- return CL_ENQUEUE_EXECUTE_IMM;
+
+ if (errcode_ret)
+ *errcode_ret = err;
+
+ return e;
}
-void cl_event_new_enqueue_callback(cl_event event,
- enqueue_data *data,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list)
+LOCAL cl_int
+cl_event_set_callback(cl_event event, cl_int exec_type, cl_event_notify_cb pfn_notify, void *user_data)
{
- enqueue_callback *cb, *node;
- user_event *user_events, *u_ev;
- cl_command_queue queue = event ? event->queue : NULL;
- cl_int i;
cl_int err = CL_SUCCESS;
+ cl_event_user_callback cb;
+ cl_bool exec_imm = CL_FALSE;
- /* Allocate and initialize the structure itself */
- TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
- cb->num_events = 0;
- TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list));
- for(i=0; i<num_events_in_wait_list; i++) {
- //user event will insert to cb->wait_user_events, need not in wait list, avoid ref twice
- if(event_wait_list[i]->type != CL_COMMAND_USER) {
- cb->wait_list[cb->num_events++] = event_wait_list[i];
- cl_event_add_ref(event_wait_list[i]); //add defer enqueue's wait event reference
- }
- }
- cb->event = event;
- cb->next = NULL;
- cb->wait_user_events = NULL;
-
- if(queue && queue->barrier_events_num > 0) {
- for(i=0; i<queue->barrier_events_num; i++) {
- /* Insert the enqueue_callback to user event list */
- node = queue->wait_events[i]->waits_head;
- if(node == NULL)
- queue->wait_events[i]->waits_head = cb;
- else{
- while((node != cb) && node->next)
- node = node->next;
- if(node == cb) //wait on dup user event
- continue;
- node->next = cb;
- }
+ assert(event);
+ assert(pfn_notify);
- /* Insert the user event to enqueue_callback's wait_user_events */
- TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]);
- cl_event_add_ref(queue->wait_events[i]);
+ do {
+ cb = cl_calloc(1, sizeof(_cl_event_user_callback));
+ if (cb == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
}
- }
- /* Find out all user events that in event_wait_list wait */
- for(i=0; i<num_events_in_wait_list; i++) {
- if(event_wait_list[i]->status <= CL_COMPLETE)
- continue;
-
- if(event_wait_list[i]->type == CL_COMMAND_USER) {
- /* Insert the enqueue_callback to user event list */
- node = event_wait_list[i]->waits_head;
- if(node == NULL)
- event_wait_list[i]->waits_head = cb;
- else {
- while((node != cb) && node->next)
- node = node->next;
- if(node == cb) //wait on dup user event
- continue;
- node->next = cb;
- }
- /* Insert the user event to enqueue_callback's wait_user_events */
- TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]);
- cl_event_add_ref(event_wait_list[i]);
- if(queue)
- cl_command_queue_insert_event(queue, event_wait_list[i]);
- if(queue && data->type == EnqueueBarrier){
- cl_command_queue_insert_barrier_event(queue, event_wait_list[i]);
- }
- } else if(event_wait_list[i]->enqueue_cb != NULL) {
- user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
- while(user_events != NULL) {
- /* Insert the enqueue_callback to user event's waits_tail */
- node = user_events->event->waits_head;
- if(node == NULL)
- event_wait_list[i]->waits_head = cb;
- else{
- while((node != cb) && node->next)
- node = node->next;
- if(node == cb) { //wait on dup user event
- user_events = user_events->next;
- continue;
- }
- node->next = cb;
- }
-
- /* Insert the user event to enqueue_callback's wait_user_events */
- TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event);
- cl_event_add_ref(user_events->event);
- if(queue)
- cl_command_queue_insert_event(event->queue, user_events->event);
- if(queue && data->type == EnqueueBarrier){
- cl_command_queue_insert_barrier_event(event->queue, user_events->event);
- }
- user_events = user_events->next;
- }
- }
- }
- if(event != NULL && event->queue != NULL && event->gpgpu_event != NULL) {
- event->gpgpu = cl_thread_gpgpu_take(event->queue);
- data->ptr = (void *)event->gpgpu_event;
- }
- cb->data = *data;
- if(event)
- event->enqueue_cb = cb;
-
-exit:
- return;
-error:
- if(cb) {
- while(cb->wait_user_events) {
- u_ev = cb->wait_user_events;
- cb->wait_user_events = cb->wait_user_events->next;
- cl_event_delete(u_ev->event);
- cl_free(u_ev);
+ list_init(&cb->node);
+ cb->pfn_notify = pfn_notify;
+ cb->user_data = user_data;
+ cb->status = exec_type;
+ cb->executed = CL_FALSE;
+
+ CL_OBJECT_LOCK(event);
+ if (event->status > exec_type) {
+ list_add_tail(&cb->node, &event->callbacks);
+ cb = NULL;
+ } else {
+ /* The state has already OK, call it immediately. */
+ exec_imm = CL_TRUE;
}
- for(i=0; i<cb->num_events; i++) {
- if(cb->wait_list[i]) {
- cl_event_delete(cb->wait_list[i]);
- }
- }
- cl_free(cb);
- }
- goto exit;
-}
+ CL_OBJECT_UNLOCK(event);
-void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb) {
- user_callback *user_cb = NULL;
- user_callback *queue_cb = NULL; // For thread safety, we create a queue that holds user_callback's pfn_notify contents
- user_callback *temp_cb = NULL;
- user_cb = event->user_cb;
- CL_OBJECT_LOCK(event);
- while(user_cb) {
- if(user_cb->status >= status
- && user_cb->executed == CL_FALSE) { // Added check to not execute a callback when it was already handled
- user_cb->executed = CL_TRUE;
- temp_cb = cl_malloc(sizeof(user_callback));
- if(!temp_cb) {
- break; // Out of memory
- }
- temp_cb->pfn_notify = user_cb->pfn_notify; // Minor struct copy to call ppfn_notify out of the pthread_mutex
- temp_cb->user_data = user_cb->user_data;
- if(free_cb) {
- cl_free(user_cb);
- }
- if(!queue_cb) {
- queue_cb = temp_cb;
- queue_cb->next = NULL;
- } else { // Enqueue First
- temp_cb->next = queue_cb;
- queue_cb = temp_cb;
- }
+ if (exec_imm) {
+ cb->pfn_notify(event, event->status, cb->user_data);
}
- user_cb = user_cb->next;
- }
- CL_OBJECT_UNLOCK(event);
- // Calling the callbacks outside of the event_lock is required because the callback can call cl_api functions and get deadlocked
- while(queue_cb) { // For each callback queued, actually execute the callback
- queue_cb->pfn_notify(event, event->status, queue_cb->user_data);
- temp_cb = queue_cb;
- queue_cb = queue_cb->next;
- cl_free(temp_cb);
- }
+ } while (0);
+
+ if (cb)
+ cl_free(cb);
+
+ return err;
}
-void cl_event_set_status(cl_event event, cl_int status)
+LOCAL cl_int
+cl_event_set_status(cl_event event, cl_int status)
{
- cl_int ret, i;
- cl_event evt;
+ list_head tmp_callbacks;
+ list_head *n;
+ list_head *pos;
+ cl_bool notify_queue = CL_FALSE;
+ cl_event_user_callback cb;
+
+ assert(event);
CL_OBJECT_LOCK(event);
- if(status >= event->status) {
+ if (event->status <= CL_COMPLETE) { // Already set to error or completed
CL_OBJECT_UNLOCK(event);
- return;
+ return CL_INVALID_OPERATION;
}
- if(event->status <= CL_COMPLETE) {
- event->status = status; //have done enqueue before or doing in another thread
- CL_OBJECT_UNLOCK(event);
- return;
+
+ if (CL_EVENT_IS_USER(event)) {
+ assert(event->status != CL_RUNNING && event->status != CL_QUEUED);
+ } else {
+ assert(event->queue); // Must belong to some queue.
}
- if(status <= CL_COMPLETE) {
- if(event->enqueue_cb) {
- if(status == CL_COMPLETE) {
- cl_enqueue_handle(event, &event->enqueue_cb->data);
- if(event->gpgpu_event)
- cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine
- } else {
- if(event->gpgpu_event) {
- // Error then cancel the enqueued event.
- cl_gpgpu_delete(event->gpgpu);
- event->gpgpu = NULL;
- }
- }
+ if (status >= event->status) { // Should never go back.
+ CL_OBJECT_UNLOCK(event);
+ return CL_INVALID_OPERATION;
+ }
- event->status = status; //Change the event status after enqueue and befor unlock
+ event->status = status;
+ /* Call all the callbacks. */
+ if (!list_empty(&event->callbacks)) {
+ do {
+ status = event->status;
+ list_init(&tmp_callbacks);
+ list_replace(&event->callbacks, &tmp_callbacks);
+ list_init(&event->callbacks);
+ /* Call all the callbacks without lock. */
CL_OBJECT_UNLOCK(event);
- for(i=0; i<event->enqueue_cb->num_events; i++)
- cl_event_delete(event->enqueue_cb->wait_list[i]);
+
+ list_for_each_safe(pos, n, &tmp_callbacks)
+ {
+ cb = list_entry(pos, _cl_event_user_callback, node);
+
+ assert(cb->executed == CL_FALSE);
+
+ if (cb->status < status)
+ continue;
+
+ list_del(&cb->node);
+ cb->executed = CL_TRUE;
+ cb->pfn_notify(event, status, cb->user_data);
+ cl_free(cb);
+ }
+
CL_OBJECT_LOCK(event);
- if(event->enqueue_cb->wait_list)
- cl_free(event->enqueue_cb->wait_list);
- cl_free(event->enqueue_cb);
- event->enqueue_cb = NULL;
- }
+ // Set back the uncalled callbacks.
+ list_splice_tail(&tmp_callbacks, &event->callbacks);
+
+ /* Status may changed because we unlock. need to check again. */
+ } while (status != event->status);
}
- if(event->status >= status) //maybe changed in other threads
- event->status = status;
+
+ /* Wakeup all the waiter for status change. */
+ CL_OBJECT_NOTIFY_COND(event);
+
+ if (event->status <= CL_COMPLETE) {
+ notify_queue = CL_TRUE;
+ }
+
CL_OBJECT_UNLOCK(event);
- /* Call user callback */
- cl_event_call_callback(event, status, CL_FALSE);
+ /* Need to notify all the command queue within the same context. */
+ if (notify_queue) {
+ cl_command_queue *q_list = NULL;
+ cl_uint queue_num = 0;
+ int i = 0;
+ int cookie = 0;
+
+ /*First, we need to remove it from queue's barrier list. */
+ if (CL_EVENT_IS_BARRIER(event)) {
+ assert(event->queue);
+ cl_command_queue_remove_barrier_event(event->queue, event);
+ }
- if(event->type == CL_COMMAND_USER) {
- /* Check all defer enqueue */
- enqueue_callback *cb, *enqueue_cb = event->waits_head;
- while(enqueue_cb) {
- /* Remove this user event in enqueue_cb, update the header if needed. */
- cl_event_remove_user_event(&enqueue_cb->wait_user_events, event);
- cl_event_delete(event);
+ /* Then, notify all the queues within the same context. */
+ CL_OBJECT_LOCK(event->ctx);
+ do {
+ queue_num = event->ctx->queue_num;
+ cookie = event->ctx->queue_cookie;
+
+ if (queue_num > 0) {
+ q_list = cl_calloc(queue_num, sizeof(cl_command_queue));
+ assert(q_list);
+ i = 0;
+ list_for_each(pos, &event->ctx->queues)
+ {
+ q_list[i] = (cl_command_queue)(list_entry(pos, _cl_base_object, node));
+ assert(i < queue_num);
+ i++;
+ }
- /* Still wait on other user events */
- if(enqueue_cb->wait_user_events != NULL) {
- enqueue_cb = enqueue_cb->next;
- continue;
- }
+ CL_OBJECT_UNLOCK(event->ctx); // Update status without context lock.
- //remove user event frome enqueue_cb's ctx
- cl_command_queue_remove_event(enqueue_cb->event->queue, event);
- cl_command_queue_remove_barrier_event(enqueue_cb->event->queue, event);
-
- /* All user events complete, now wait enqueue events */
- ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list,
- enqueue_cb->event->queue);
- assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
- ret = ~ret;
- cb = enqueue_cb;
- enqueue_cb = enqueue_cb->next;
-
- /* Call the pending operation */
- evt = cb->event;
- /* TODO: if this event wait on several events, one event's
- status is error, the others is complete, what's the status
- of this event? Can't find the description in OpenCL spec.
- Simply update to latest finish wait event.*/
- cl_event_set_status(cb->event, status);
- if(evt->emplict == CL_FALSE) {
- cl_event_delete(evt);
+ for (i = 0; i < queue_num; i++) {
+ cl_command_queue_notify(q_list[i]);
+ }
+
+ CL_OBJECT_LOCK(event->ctx); // Lock again.
+ } else {
+ /* No queue? Just do nothing. */
}
- }
- event->waits_head = NULL;
- }
- if(event->status <= CL_COMPLETE){
- /* Maintain the last_list when event completed*/
- if (event->last_prev)
- event->last_prev->last_next = event->last_next;
- if (event->last_next)
- event->last_next->last_prev = event->last_prev;
- if(event->queue && get_last_event(event->queue) == event)
- set_last_event(event->queue, event->last_next);
- event->last_prev = NULL;
- event->last_next = NULL;
- cl_event_delete(event);
+ } while (cookie != event->ctx->queue_cookie); // Some queue may be added when we unlock.
+ CL_OBJECT_UNLOCK(event->ctx);
+
+ if (q_list)
+ cl_free(q_list);
}
+
+ return CL_SUCCESS;
}
-void cl_event_update_status(cl_event event, int wait)
+LOCAL cl_int
+cl_event_wait_for_event_ready(const cl_event event)
{
- if(event->status <= CL_COMPLETE)
- return;
- if((event->gpgpu_event) &&
- (cl_gpgpu_event_update_status(event->gpgpu_event, wait) == command_complete))
- cl_event_set_status(event, CL_COMPLETE);
+ assert(CL_OBJECT_IS_EVENT(event));
+ return cl_event_wait_for_events_list(event->depend_event_num, event->depend_events);
}
-cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event* event)
+LOCAL cl_int
+cl_event_wait_for_events_list(cl_uint num_events, const cl_event *event_list)
{
- enqueue_data data = { 0 };
+ int i;
cl_event e;
+ cl_int ret = CL_SUCCESS;
- e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
- if(e == NULL)
- return CL_OUT_OF_HOST_MEMORY;
-
- if(event != NULL ){
- *event = e;
- }
+ for (i = 0; i < num_events; i++) {
+ e = event_list[i];
+ assert(e);
+ assert(CL_OBJECT_IS_EVENT(e));
-//enqueues a marker command which waits for either a list of events to complete, or if the list is
-//empty it waits for all commands previously enqueued in command_queue to complete before it completes.
- if(num_events_in_wait_list > 0){
- if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) {
- data.type = EnqueueMarker;
- cl_event_new_enqueue_callback(event?*event:NULL, &data, num_events_in_wait_list, event_wait_list);
- return CL_SUCCESS;
+ CL_OBJECT_LOCK(e);
+ while (e->status > CL_COMPLETE) {
+ CL_OBJECT_WAIT_ON_COND(e);
}
- } else if(queue->wait_events_num > 0) {
- data.type = EnqueueMarker;
- cl_event_new_enqueue_callback(event?*event:NULL, &data, queue->wait_events_num, queue->wait_events);
- return CL_SUCCESS;
+ /* Iff some error happened, return the error. */
+ if (e->status < CL_COMPLETE) {
+ ret = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+ }
+ CL_OBJECT_UNLOCK(e);
}
- cl_event_update_last_events(queue,1);
-
- cl_event_set_status(e, CL_COMPLETE);
- return CL_SUCCESS;
+ return ret;
}
-cl_int cl_event_barrier_with_wait_list(cl_command_queue queue,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event* event)
+LOCAL cl_int
+cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+ cl_event *event, cl_context ctx)
{
- enqueue_data data = { 0 };
- cl_event e;
-
- e = cl_event_new(queue->ctx, queue, CL_COMMAND_BARRIER, CL_TRUE);
- if(e == NULL)
- return CL_OUT_OF_HOST_MEMORY;
+ cl_int err = CL_SUCCESS;
+ cl_int i;
- if(event != NULL ){
- *event = e;
- }
-//enqueues a barrier command which waits for either a list of events to complete, or if the list is
-//empty it waits for all commands previously enqueued in command_queue to complete before it completes.
- if(num_events_in_wait_list > 0){
- if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) {
- data.type = EnqueueBarrier;
- cl_event_new_enqueue_callback(e, &data, num_events_in_wait_list, event_wait_list);
- return CL_SUCCESS;
+ do {
+ /* check the event_wait_list and num_events_in_wait_list */
+ if ((event_wait_list == NULL) && (num_events_in_wait_list > 0)) {
+ err = CL_INVALID_EVENT_WAIT_LIST;
+ break;
}
- } else if(queue->wait_events_num > 0) {
- data.type = EnqueueBarrier;
- cl_event_new_enqueue_callback(e, &data, queue->wait_events_num, queue->wait_events);
- return CL_SUCCESS;
- }
- cl_event_update_last_events(queue,1);
+ if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)) {
+ err = CL_INVALID_EVENT_WAIT_LIST;
+ break;
+ }
- cl_event_set_status(e, CL_COMPLETE);
- return CL_SUCCESS;
-}
+ /* check the event and context */
+ for (i = 0; i < num_events_in_wait_list; i++) {
+ if (event_wait_list[i] == NULL || !CL_OBJECT_IS_EVENT(event_wait_list[i])) {
+ err = CL_INVALID_EVENT;
+ break;
+ }
-cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time)
-{
- struct timespec ts;
+ if (cl_event_get_status(event_wait_list[i]) < CL_COMPLETE) {
+ err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+ break;
+ }
- if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){
- printf("CPU Timmer error\n");
- return CL_FALSE;
- }
- *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec;
+ if (event == event_wait_list + i) { /* Pointer of element of the wait list */
+ err = CL_INVALID_EVENT_WAIT_LIST;
+ break;
+ }
- return CL_SUCCESS;
-}
+ /* check all belong to same context. */
+ if (ctx == NULL) {
+ ctx = event_wait_list[i]->ctx;
+ }
+ if (event_wait_list[i]->ctx != ctx) {
+ err = CL_INVALID_CONTEXT;
+ break;
+ }
+ }
-cl_int cl_event_get_queued_cpu_timestamp(cl_event event)
-{
- cl_int ret_val;
+ if (err != CL_SUCCESS)
+ break;
- ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp);
+ } while (0);
- return ret_val;
+ return err;
}
-cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp)
+LOCAL void
+cl_event_exec(cl_event event, cl_int exec_status)
{
- cl_ulong ret_val;
+ /* We are MT safe here, no one should call this
+ at the same time. No need to lock */
+ cl_int ret = CL_SUCCESS;
+ cl_int status = cl_event_get_status(event);
+ cl_int depend_status;
- if(end_timestamp > start_timestamp){
- ret_val = end_timestamp - start_timestamp;
- }
- else {
- /*if start time stamp is greater than end timstamp then set ret value to max*/
- ret_val = ((cl_ulong) 1 << 32);
+ if (status < CL_COMPLETE || status <= exec_status) {
+ return;
}
- return ret_val;
-}
-
-cl_ulong cl_event_get_start_timestamp(cl_event event)
-{
- cl_ulong ret_val;
+ depend_status = cl_event_is_ready(event);
+ assert(depend_status <= CL_COMPLETE);
+ if (depend_status < CL_COMPLETE) { // Error happend, cancel exec.
+ ret = cl_event_set_status(event, depend_status);
+ return;
+ }
- ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[2]);
+ /* Do the according thing based on event type. */
+ ret = cl_enqueue_handle(&event->exec_data, exec_status);
- return ret_val;
+ if (ret != CL_SUCCESS) {
+ assert(ret < 0);
+ DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error staus is %d",
+ event, event->event_type, ret);
+ ret = cl_event_set_status(event, ret);
+ assert(ret == CL_SUCCESS);
+ } else {
+ ret = cl_event_set_status(event, exec_status);
+ assert(ret == CL_SUCCESS);
+ }
}
-cl_ulong cl_event_get_end_timestamp(cl_event event)
+/* 0 means ready, >0 means not ready, <0 means error. */
+LOCAL cl_int
+cl_event_is_ready(cl_event event)
{
- cl_ulong ret_val;
-
- ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[3]);
+ int i;
+ int status;
- return ret_val;
-}
+ for (i = 0; i < event->depend_event_num; i++) {
+ status = cl_event_get_status(event->depend_events[i]);
-cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
-{
- cl_ulong ret_val = 0;
- GET_QUEUE_THREAD_GPGPU(event->queue);
-
- if (!event->gpgpu_event) {
- cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
+ if (status != CL_COMPLETE) {
+ return status;
+ }
}
- if(param_name == CL_PROFILING_COMMAND_SUBMIT ||
- param_name == CL_PROFILING_COMMAND_QUEUED) {
- cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
- } else if(param_name == CL_PROFILING_COMMAND_START) {
- cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 0, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
- } else if (param_name == CL_PROFILING_COMMAND_END) {
- cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 1, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
- }
- return CL_INVALID_VALUE;
+ return CL_COMPLETE;
}
-cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event)
+LOCAL cl_event
+cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_bool is_barrier, cl_int *error)
{
- user_event * u_iter = *p_u_ev;
- user_event * u_ev;
-
- while(u_iter)
- {
- if(u_iter->event == event)
- return CL_SUCCESS;
- u_iter = u_iter->next;
- }
+ cl_event e = NULL;
+ cl_int err = CL_SUCCESS;
+ cl_command_type type = CL_COMMAND_MARKER;
+ enqueue_type eq_type = EnqueueMarker;
- TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
- u_ev->event = event;
- u_ev->next = *p_u_ev;
- *p_u_ev = u_ev;
+ if (is_barrier) {
+ type = CL_COMMAND_BARRIER;
+ eq_type = EnqueueBarrier;
+ }
+ if (event_wait_list) {
+ assert(num_events_in_wait_list > 0);
- return CL_SUCCESS;
-error:
- return CL_FALSE;
-}
+ e = cl_event_create(queue->ctx, queue, num_events_in_wait_list,
+ event_wait_list, type, &err);
+ if (err != CL_SUCCESS) {
+ *error = err;
+ return NULL;
+ }
+ } else { /* The marker depends on all events in the queue now. */
+ cl_command_queue_enqueue_worker worker = &queue->worker;
+ cl_uint i;
+ cl_uint event_num;
+ cl_event *depend_events;
+
+ CL_OBJECT_LOCK(queue);
+
+ /* First, wait for the command queue retire all in executing event. */
+ while (1) {
+ if (worker->quit) { // already destroy the queue?
+ CL_OBJECT_UNLOCK(queue);
+ *error = CL_INVALID_COMMAND_QUEUE;
+ return NULL;
+ }
-cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event)
-{
- user_event * u_iter = *p_u_ev;
- user_event * u_prev = *p_u_ev;
-
- while(u_iter){
- if(u_iter->event == event ){
- if(u_iter == *p_u_ev){
- *p_u_ev = u_iter->next;
- }else{
- u_prev->next = u_iter->next;
+ if (worker->in_exec_status != CL_COMPLETE) {
+ CL_OBJECT_WAIT_ON_COND(queue);
+ continue;
}
- cl_free(u_iter);
+
break;
}
- u_prev = u_iter;
- u_iter = u_iter->next;
+
+ event_num = 0;
+ depend_events = NULL;
+ if (!list_empty(&worker->enqueued_events)) {
+ depend_events = cl_command_queue_record_in_queue_events(queue, &event_num);
+ }
+
+ CL_OBJECT_UNLOCK(queue);
+
+ e = cl_event_create(queue->ctx, queue, event_num, depend_events, type, &err);
+
+ for (i = 0; i < event_num; i++) { //unref the temp
+ cl_event_delete(depend_events[i]);
+ }
+ if (depend_events)
+ cl_free(depend_events);
+
+ if (err != CL_SUCCESS) {
+ *error = err;
+ return NULL;
+ }
}
- return CL_SUCCESS;
+ e->exec_data.type = eq_type;
+ *error = CL_SUCCESS;
+ return e;
}
diff --git a/src/cl_event.h b/src/cl_event.h
index 85cd53e..f67299c 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -14,111 +14,75 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
*/
-#ifndef __CL_EVENT_H__
-#define __CL_EVENT_H__
+#ifndef __CL_EVENT_H_
+#define __CL_EVENT_H_
#include <semaphore.h>
#include "cl_base_object.h"
-#include "cl_driver.h"
#include "cl_enqueue.h"
#include "CL/cl.h"
-#define CL_ENQUEUE_EXECUTE_IMM 0
-#define CL_ENQUEUE_EXECUTE_DEFER 1
+typedef void(CL_CALLBACK *cl_event_notify_cb)(cl_event event, cl_int event_command_exec_status, void *user_data);
-typedef struct _user_event {
- cl_event event; /* The user event */
- struct _user_event* next; /* Next user event in list */
-} user_event;
+typedef struct _cl_event_user_callback {
+ cl_int status; /* The execution status */
+ cl_bool executed; /* Indicat the callback function been called or not */
+ cl_event_notify_cb pfn_notify; /* Callback function */
+ void *user_data; /* Callback user data */
+ list_head node; /* Event callback list node */
+} _cl_event_user_callback;
-typedef struct _enqueue_callback {
- cl_event event; /* The event relative this enqueue callback */
- enqueue_data data; /* Hold all enqueue callback's infomation */
- cl_uint num_events; /* num events in wait list */
- cl_event* wait_list; /* All event wait list this callback wait on */
- user_event* wait_user_events; /* The head of user event list the callback wait on */
- struct _enqueue_callback* next; /* The next enqueue callback in wait list */
-} enqueue_callback;
+typedef _cl_event_user_callback *cl_event_user_callback;
-typedef void (CL_CALLBACK *EVENT_NOTIFY)(cl_event event, cl_int event_command_exec_status, void *user_data);
-
-typedef struct _user_callback {
- cl_int status; /* The execution status */
- cl_bool executed; /* Indicat the callback function been called or not */
- EVENT_NOTIFY pfn_notify; /* Callback function */
- void* user_data; /* Callback user data */
- struct _user_callback* next; /* Next event callback in list */
-} user_callback;
-
-struct _cl_event {
- _cl_base_object base;
- cl_context ctx; /* The context associated with event */
- cl_command_queue queue; /* The command queue associated with event */
- cl_command_type type; /* The command type associated with event */
- cl_int status; /* The execution status */
- cl_gpgpu gpgpu; /* Current gpgpu, owned by this structure. */
- cl_gpgpu_event gpgpu_event; /* The event object communicate with hardware */
- user_callback* user_cb; /* The event callback functions */
- enqueue_callback* enqueue_cb; /* This event's enqueue */
- enqueue_callback* waits_head; /* The head of enqueues list wait on this event */
- cl_bool emplict; /* Identify this event whether created by api emplict*/
- cl_ulong timestamp[4];/* The time stamps for profiling. */
- cl_ulong queued_timestamp;
- cl_event last_next, last_prev;/* We need a list to monitor untouchable api event*/
-};
+typedef struct _cl_event {
+ _cl_base_object base;
+ cl_context ctx; /* The context associated with event */
+ cl_command_queue queue; /* The command queue associated with event */
+ cl_command_type event_type; /* Event type. */
+ cl_bool is_barrier; /* Is this event a barrier */
+ cl_int status; /* The execution status */
+ cl_event *depend_events; /* The events must complete before this. */
+ cl_uint depend_event_num; /* The depend events number. */
+ list_head callbacks; /* The events The event callback functions */
+ list_head enqueue_node; /* The node in the enqueue list. */
+ cl_ulong timestamp[4]; /* The time stamps for profiling. */
+ cl_ulong queued_timestamp;
+ enqueue_data exec_data; /* Context for execute this event. */
+} _cl_event;
#define CL_OBJECT_EVENT_MAGIC 0x8324a9f810ebf90fLL
-#define CL_OBJECT_IS_EVENT(obj) (((cl_base_object)obj)->magic == CL_OBJECT_EVENT_MAGIC)
+#define CL_OBJECT_IS_EVENT(obj) ((obj && \
+ ((cl_base_object)obj)->magic == CL_OBJECT_EVENT_MAGIC && \
+ CL_OBJECT_GET_REF(obj) >= 1))
+
+#define CL_EVENT_IS_MARKER(E) (E->event_type == CL_COMMAND_MARKER)
+#define CL_EVENT_IS_BARRIER(E) (E->event_type == CL_COMMAND_BARRIER)
+#define CL_EVENT_IS_USER(E) (E->event_type == CL_COMMAND_USER)
/* Create a new event object */
-cl_event cl_event_new(cl_context, cl_command_queue, cl_command_type, cl_bool);
-/* Unref the object and delete it if no more reference on it */
-void cl_event_delete(cl_event);
-/* Add one more reference to this object */
-void cl_event_add_ref(cl_event);
-/* Register a user callback function for specific commond execution status */
-cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *);
-/* Execute the event's callback if the event's status supersedes the callback's status. Free the callback if specified */
-void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb);
-/* Check events wait list for enqueue commonds */
-cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context);
-/* Wait the all events in wait list complete */
-cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue);
-/* New a enqueue suspend task */
-void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *);
-/* Set the event status and call all callbacks */
-void cl_event_set_status(cl_event, cl_int);
-/* Check and update event status */
-void cl_event_update_status(cl_event, cl_int);
-/* Create the marker event */
-cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*);
-/* Create the barrier event */
-cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*);
-/* Get the cpu time */
-cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time);
-/*Get the cpu time for queued*/
-cl_int cl_event_get_queued_cpu_timestamp(cl_event event);
-/*get timestamp delate between end and start*/
-cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp);
-/*Get start time stamp*/
-cl_ulong cl_event_get_start_timestamp(cl_event event);
-/*Get end time stamp*/
-cl_ulong cl_event_get_end_timestamp(cl_event event);
-/* Do the event profiling */
-cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
-/* insert the user event */
-cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event);
-/* remove the user event */
-cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event);
-/* flush the event's pending gpgpu batch buffer and notify driver this gpgpu event has been flushed. */
-cl_int cl_event_flush(cl_event event);
-/* monitor or block wait all events in the last_event list */
-void cl_event_update_last_events(cl_command_queue queuet, int wait);
-/* insert the event into the last_event list in queue */
-void cl_event_insert_last_events(cl_command_queue queue, cl_event event);
+extern cl_event cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events,
+ const cl_event *event_list, cl_command_type type, cl_int *errcode_ret);
+extern cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+ cl_event* event, cl_context ctx);
+extern void cl_event_exec(cl_event event, cl_int exec_status);
+/* 0 means ready, >0 means not ready, <0 means error. */
+extern cl_int cl_event_is_ready(cl_event event);
+extern cl_int cl_event_get_status(cl_event event);
+extern void cl_event_add_ref(cl_event event);
+extern void cl_event_delete(cl_event event);
+extern cl_int cl_event_set_status(cl_event event, cl_int status);
+extern cl_int cl_event_set_callback(cl_event event, cl_int exec_type,
+ cl_event_notify_cb pfn_notify, void *user_data);
+extern cl_int cl_event_wait_for_events_list(cl_uint num_events, const cl_event *event_list);
+extern cl_int cl_event_wait_for_event_ready(cl_event event);
+extern cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong end_timestamp);
+extern cl_ulong cl_event_get_start_timestamp(cl_event event);
+extern cl_ulong cl_event_get_end_timestamp(cl_event event);
+extern cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
+extern cl_event cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_bool is_barrier,
+ cl_int* error);
#endif /* __CL_EVENT_H__ */
-
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 06a4d5a..333ffc9 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -28,6 +28,7 @@
#include "cl_kernel.h"
#include "cl_command_queue.h"
#include "cl_cmrt.h"
+#include "cl_enqueue.h"
#include "CL/cl.h"
#include "CL/cl_intel.h"
@@ -1264,7 +1265,7 @@ cl_mem_add_ref(cl_mem mem)
#define LOCAL_SZ_2 4
LOCAL cl_int
-cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+cl_mem_copy(cl_command_queue queue, cl_event event, cl_mem src_buf, cl_mem dst_buf,
size_t src_offset, size_t dst_offset, size_t cb)
{
cl_int ret = CL_SUCCESS;
@@ -1317,7 +1318,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf);
cl_kernel_set_arg(ker, 3, sizeof(int), &dw_dst_offset);
cl_kernel_set_arg(ker, 4, sizeof(int), &cb);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
@@ -1358,7 +1359,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
cl_kernel_set_arg(ker, 4, sizeof(int), &dw_num);
cl_kernel_set_arg(ker, 5, sizeof(int), &first_mask);
cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
@@ -1388,7 +1389,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask);
cl_kernel_set_arg(ker, 7, sizeof(int), &shift);
cl_kernel_set_arg(ker, 8, sizeof(int), &dw_mask);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
@@ -1420,7 +1421,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
cl_kernel_set_arg(ker, 7, sizeof(int), &shift);
cl_kernel_set_arg(ker, 8, sizeof(int), &dw_mask);
cl_kernel_set_arg(ker, 9, sizeof(int), &src_less);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
@@ -1493,13 +1494,13 @@ cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image
cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]);
cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]);
- ret = cl_command_queue_ND_range(queue, ker, 3, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, NULL, 3, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
LOCAL cl_int
-cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
+cl_mem_fill(cl_command_queue queue, cl_event e, const void * pattern, size_t pattern_size,
cl_mem buffer, size_t offset, size_t size)
{
cl_int ret = CL_SUCCESS;
@@ -1596,13 +1597,13 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
if (is_128)
cl_kernel_set_arg(ker, 4, pattern_size, pattern1);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, e, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
LOCAL cl_int
-cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+cl_mem_copy_buffer_rect(cl_command_queue queue, cl_event event, cl_mem src_buf, cl_mem dst_buf,
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
size_t src_row_pitch, size_t src_slice_pitch,
size_t dst_row_pitch, size_t dst_slice_pitch) {
@@ -1617,7 +1618,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0];
cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0];
cl_int size = region[0]*region[1]*region[2];
- ret = cl_mem_copy(queue, src_buf, dst_buf,src_offset, dst_offset, size);
+ ret = cl_mem_copy(queue, NULL, src_buf, dst_buf,src_offset, dst_offset, size);
return ret;
}
@@ -1669,14 +1670,15 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_row_pitch);
cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_slice_pitch);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
return ret;
}
LOCAL cl_int
-cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image, struct _cl_mem_image* dst_image,
- const size_t *src_origin, const size_t *dst_origin, const size_t *region) {
+cl_mem_kernel_copy_image(cl_command_queue queue, cl_event event, struct _cl_mem_image* src_image,
+ struct _cl_mem_image* dst_image, const size_t *src_origin,
+ const size_t *dst_origin, const size_t *region) {
cl_int ret;
cl_kernel ker = NULL;
size_t global_off[] = {0,0,0};
@@ -1817,7 +1819,7 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_origin[1]);
cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
fail:
@@ -1830,7 +1832,7 @@ fail:
}
LOCAL cl_int
-cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, cl_mem buffer,
+cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_mem_image* image, cl_mem buffer,
const size_t *src_origin, const size_t dst_offset, const size_t *region) {
cl_int ret;
cl_kernel ker = NULL;
@@ -1919,7 +1921,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image,
cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]);
cl_kernel_set_arg(ker, 8, sizeof(cl_int), &kn_dst_offset);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
fail:
@@ -1933,7 +1935,7 @@ fail:
LOCAL cl_int
-cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_mem_image* image,
+cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffer, struct _cl_mem_image* image,
const size_t src_offset, const size_t *dst_origin, const size_t *region) {
cl_int ret;
cl_kernel ker = NULL;
@@ -2019,7 +2021,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me
cl_kernel_set_arg(ker, 7, sizeof(cl_int), &dst_origin[2]);
cl_kernel_set_arg(ker, 8, sizeof(cl_int), &kn_src_offset);
- ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz);
+ ret = cl_command_queue_ND_range(queue, ker, event, 1, global_off, global_sz, local_sz);
cl_kernel_delete(ker);
image->intel_fmt = intel_fmt;
@@ -2308,3 +2310,83 @@ error:
mem = NULL;
goto exit;
}
+
+LOCAL cl_int
+cl_mem_record_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset,
+ size_t size, const size_t *origin, const size_t *region)
+{
+ // TODO: Need to add MT safe logic.
+
+ cl_int slot = -1;
+ int err = CL_SUCCESS;
+ size_t sub_offset = 0;
+
+ if(mem->type == CL_MEM_SUBBUFFER_TYPE) {
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+ sub_offset = buffer->sub_offset;
+ }
+
+ ptr = (char*)ptr + offset + sub_offset;
+ if(mem->flags & CL_MEM_USE_HOST_PTR) {
+ assert(mem->host_ptr);
+ //only calc ptr here, will do memcpy in enqueue
+ *mem_ptr = (char *)mem->host_ptr + offset + sub_offset;
+ } else {
+ *mem_ptr = ptr;
+ }
+ /* Record the mapped address. */
+ if (!mem->mapped_ptr_sz) {
+ mem->mapped_ptr_sz = 16;
+ mem->mapped_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
+ if (!mem->mapped_ptr) {
+ cl_mem_unmap_auto(mem);
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+ memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ slot = 0;
+ } else {
+ int i = 0;
+ for (; i < mem->mapped_ptr_sz; i++) {
+ if (mem->mapped_ptr[i].ptr == NULL) {
+ slot = i;
+ break;
+ }
+ }
+ if (i == mem->mapped_ptr_sz) {
+ cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
+ if (!new_ptr) {
+ cl_mem_unmap_auto(mem);
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+ memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ memcpy(new_ptr, mem->mapped_ptr,
+ mem->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ slot = mem->mapped_ptr_sz;
+ mem->mapped_ptr_sz *= 2;
+ free(mem->mapped_ptr);
+ mem->mapped_ptr = new_ptr;
+ }
+ }
+ assert(slot != -1);
+ mem->mapped_ptr[slot].ptr = *mem_ptr;
+ mem->mapped_ptr[slot].v_ptr = ptr;
+ mem->mapped_ptr[slot].size = size;
+ if(origin) {
+ assert(region);
+ mem->mapped_ptr[slot].origin[0] = origin[0];
+ mem->mapped_ptr[slot].origin[1] = origin[1];
+ mem->mapped_ptr[slot].origin[2] = origin[2];
+ mem->mapped_ptr[slot].region[0] = region[0];
+ mem->mapped_ptr[slot].region[1] = region[1];
+ mem->mapped_ptr[slot].region[2] = region[2];
+ }
+ mem->map_ref++;
+error:
+ if (err != CL_SUCCESS)
+ *mem_ptr = NULL;
+ return err;
+}
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 9bb5c47..82f30f6 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -101,7 +101,17 @@ typedef struct _cl_mem {
} _cl_mem;
#define CL_OBJECT_MEM_MAGIC 0x381a27b9ee6504dfLL
-#define CL_OBJECT_IS_MEM(obj) (((cl_base_object)obj)->magic == CL_OBJECT_MEM_MAGIC)
+#define CL_OBJECT_IS_MEM(obj) ((obj && \
+ ((cl_base_object)obj)->magic == CL_OBJECT_MEM_MAGIC && \
+ CL_OBJECT_GET_REF(obj) >= 1))
+#define CL_OBJECT_IS_IMAGE(mem) ((mem && \
+ ((cl_base_object)mem)->magic == CL_OBJECT_MEM_MAGIC && \
+ CL_OBJECT_GET_REF(mem) >= 1 && \
+ mem->type >= CL_MEM_IMAGE_TYPE))
+#define CL_OBJECT_IS_BUFFER(mem) ((mem && \
+ ((cl_base_object)mem)->magic == CL_OBJECT_MEM_MAGIC && \
+ CL_OBJECT_GET_REF(mem) >= 1 && \
+ mem->type < CL_MEM_IMAGE_TYPE))
struct _cl_mem_image {
_cl_mem base;
@@ -221,30 +231,30 @@ extern void cl_mem_gl_delete(struct _cl_mem_gl_image *);
extern void cl_mem_add_ref(cl_mem);
/* api clEnqueueCopyBuffer help function */
-extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
+extern cl_int cl_mem_copy(cl_command_queue queue, cl_event event, cl_mem src_buf, cl_mem dst_buf,
size_t src_offset, size_t dst_offset, size_t cb);
-extern cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
+extern cl_int cl_mem_fill(cl_command_queue queue, cl_event e, const void * pattern, size_t pattern_size,
cl_mem buffer, size_t offset, size_t size);
extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image*,
const size_t *, const size_t *);
/* api clEnqueueCopyBufferRect help function */
-extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem,
+extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_event event, cl_mem, cl_mem,
const size_t *, const size_t *, const size_t *,
size_t, size_t, size_t, size_t);
/* api clEnqueueCopyImage help function */
-extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*,
- const size_t *, const size_t *, const size_t *);
+extern cl_int cl_mem_kernel_copy_image(cl_command_queue, cl_event event, struct _cl_mem_image*,
+ struct _cl_mem_image*, const size_t *, const size_t *, const size_t *);
/* api clEnqueueCopyImageToBuffer help function */
-extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct _cl_mem_image*, cl_mem,
+extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, cl_event, struct _cl_mem_image*, cl_mem,
const size_t *, const size_t, const size_t *);
/* api clEnqueueCopyBufferToImage help function */
-extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue, cl_mem, struct _cl_mem_image*,
+extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue, cl_event, cl_mem, struct _cl_mem_image*,
const size_t, const size_t *, const size_t *);
/* Directly map a memory object */
@@ -318,5 +328,8 @@ extern cl_mem cl_mem_new_image_from_fd(cl_context ctx,
size_t row_pitch,
cl_int *errcode);
+extern cl_int cl_mem_record_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset,
+ size_t size, const size_t *origin, const size_t *region);
+
#endif /* __CL_MEM_H__ */
--
2.7.4
More information about the Beignet
mailing list