[Beignet] [PATCH 4/4] Implement clEnqueueMarker and clEnqueueBarrier.
Yang Rong
rong.r.yang at intel.com
Tue Sep 17 01:10:01 PDT 2013
Add some event info to cl_command_queue.
One is non-complete user events, used to block marker event and barrier.
After these events become CL_COMPLETE, the events blocked by these events also
become CL_COMPLETE, so marker event will also set to CL_COMPLETE. If there is no
user events, need wait last event complete and set marker event to complete.
Add barrier_index, for clEnqueueBarrier, point to user events, indicate the enqueue
apis follow clEnqueueBarrier should wait on how many user events.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_api.c | 25 ++++++++++++-----
src/cl_command_queue.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
src/cl_command_queue.h | 15 ++++++++++
src/cl_enqueue.h | 4 +--
src/cl_event.c | 72 ++++++++++++++++++++++++++++++++++++++++++++----
src/cl_event.h | 6 ++--
6 files changed, 180 insertions(+), 16 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 9270b65..8f62990 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -66,7 +66,7 @@ inline cl_int
handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
cl_event* event, enqueue_data* data, cl_command_type type)
{
- cl_int status = cl_event_wait_events(num, wait_list);
+ cl_int status = cl_event_wait_events(num, wait_list, queue);
cl_event e;
if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) {
e = cl_event_new(queue->ctx, queue, type, event!=NULL);
@@ -1076,7 +1076,7 @@ clWaitForEvents(cl_uint num_events,
TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx);
- while(cl_event_wait_events(num_events, event_list) == CL_ENQUEUE_EXECUTE_DEFER) {
+ while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) {
usleep(8000); //sleep 8ms to wait other thread
}
@@ -2401,8 +2401,16 @@ cl_int
clEnqueueMarker(cl_command_queue command_queue,
cl_event * event)
{
- NOT_IMPLEMENTED;
- return 0;
+ cl_int err = CL_SUCCESS;
+ CHECK_QUEUE(command_queue);
+ if(event == NULL) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ cl_event_marker(command_queue, event);
+error:
+ return err;
}
cl_int
@@ -2421,9 +2429,12 @@ error:
cl_int
clEnqueueBarrier(cl_command_queue command_queue)
{
- NOT_IMPLEMENTED;
- return 0;
- //return clFinish(command_queue);
+ cl_int err = CL_SUCCESS;
+ CHECK_QUEUE(command_queue);
+ cl_command_queue_set_barrier(command_queue);
+
+error:
+ return err;
}
#define EXTFUNC(x) \
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 2454db6..11be423 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -87,6 +87,7 @@ cl_command_queue_delete(cl_command_queue queue)
cl_mem_delete(queue->perf);
cl_context_delete(queue->ctx);
cl_gpgpu_delete(queue->gpgpu);
+ cl_free(queue->wait_events);
queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
cl_free(queue);
}
@@ -412,3 +413,76 @@ cl_command_queue_finish(cl_command_queue queue)
return CL_SUCCESS;
}
+#define DEFAULT_WAIT_EVENTS_SIZE 16
+LOCAL void
+cl_command_queue_insert_event(cl_command_queue queue, cl_event event)
+{
+ cl_int i=0;
+ cl_event *new_list;
+
+ assert(queue != NULL);
+ if(queue->wait_events == NULL) {
+ queue->wait_events_size = DEFAULT_WAIT_EVENTS_SIZE;
+ TRY_ALLOC_NO_ERR (queue->wait_events, CALLOC_ARRAY(cl_event, queue->wait_events_size));
+ }
+
+ for(i=0; i<queue->wait_events_num; i++) {
+ if(queue->wait_events[i] == event)
+ return; //is in the wait_events, need to insert
+ }
+
+ if(queue->wait_events_num < queue->wait_events_size) {
+ queue->wait_events[queue->wait_events_num++] = event;
+ return;
+ }
+
+ //wait_events_num == wait_events_size, array is full
+ queue->wait_events_size *= 2;
+ TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->wait_events_size));
+ memcpy(new_list, queue->wait_events, sizeof(cl_event)*queue->wait_events_num);
+ cl_free(queue->wait_events);
+ queue->wait_events = new_list;
+ queue->wait_events[queue->wait_events_num++] = event;
+ return;
+
+exit:
+ return;
+error:
+ if(queue->wait_events)
+ cl_free(queue->wait_events);
+ queue->wait_events = NULL;
+ queue->wait_events_size = 0;
+ queue->wait_events_num = 0;
+ goto exit;
+
+}
+
+LOCAL void
+cl_command_queue_remove_event(cl_command_queue queue, cl_event event)
+{
+ cl_int i=0;
+
+ assert(queue->wait_events);
+ for(i=0; i<queue->wait_events_num; i++) {
+ if(queue->wait_events[i] == event)
+ break;
+ }
+
+ if(i == queue->wait_events_num)
+ return;
+
+ if(queue->barrier_index >= i)
+ queue->barrier_index -= 1;
+
+ for(; i<queue->wait_events_num-1; i++) {
+ queue->wait_events[i] = queue->wait_events[i+1];
+ }
+ queue->wait_events_num -= 1;
+}
+
+LOCAL void
+cl_command_queue_set_barrier(cl_command_queue queue)
+{
+ queue->barrier_index = queue->wait_events_num;
+}
+
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 9fe1dd1..9396fd7 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -33,6 +33,11 @@ struct _cl_command_queue {
uint64_t magic; /* To identify it as a command queue */
volatile int ref_n; /* We reference count this object */
cl_context ctx; /* Its parent context */
+ cl_event* wait_events; /* Point to array of non-complete user events that block this command queue */
+ cl_int wait_events_num; /* Number of Non-complete user events */
+ cl_int wait_events_size; /* The size of array that wait_events point to */
+ cl_int barrier_index; /* Indicate event count in wait_events as barrier events */
+ cl_event last_event; /* The last event in the queue, for enqueue mark used */
cl_command_queue_properties props; /* Queue properties */
cl_command_queue prev, next; /* We chain the command queues together */
cl_gpgpu gpgpu; /* Setup all GEN commands */
@@ -76,5 +81,15 @@ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
/* Bind all the image surfaces in the GPGPU state */
extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel);
+
+/* Insert a user event to command's wait_events */
+extern void cl_command_queue_insert_event(cl_command_queue, cl_event);
+
+/* Remove a user event from command's wait_events */
+extern void cl_command_queue_remove_event(cl_command_queue, cl_event);
+
+/* Set the barrier index */
+extern void cl_command_queue_set_barrier(cl_command_queue);
+
#endif /* __CL_COMMAND_QUEUE_H__ */
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index f90f921..b412d58 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -19,9 +19,8 @@
#ifndef __CL_ENQUEUE_H__
#define __CL_ENQUEUE_H__
-#include "cl_mem.h"
-#include "cl_command_queue.h"
#include "cl_internals.h"
+#include "cl_driver.h"
#include "CL/cl.h"
typedef enum {
@@ -41,6 +40,7 @@ typedef enum {
EnqueueUnmapMemObject,
EnqueueNDRangeKernel,
EnqueueNativeKernel,
+ EnqueueMarker,
EnqueueInvalid
} enqueue_type;
diff --git a/src/cl_event.c b/src/cl_event.c
index 83e1f50..918e245 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -23,6 +23,7 @@
#include "cl_alloc.h"
#include "cl_khr_icd.h"
#include "cl_kernel.h"
+#include "cl_command_queue.h"
#include <assert.h>
#include <stdio.h>
@@ -81,6 +82,8 @@ cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type ty
event->enqueue_cb = NULL;
event->waits_head = NULL;
event->emplict = emplict;
+ if(queue && event->gpgpu_event)
+ queue->last_event = event;
exit:
return event;
@@ -100,6 +103,9 @@ void cl_event_delete(cl_event event)
if (atomic_dec(&event->ref_n) > 1)
return;
+ if(event->queue && event->queue->last_event == event)
+ event->queue->last_event = NULL;
+
/* Call all user's callback if haven't execute */
user_callback *cb = event->user_cb;
while(event->user_cb) {
@@ -200,10 +206,11 @@ error:
goto exit;
}
-cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list)
+cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+ cl_command_queue queue)
{
cl_int i, j;
+
/* Check whether wait user events */
for(i=0; i<num_events_in_wait_list; i++) {
if(event_wait_list[i]->status <= CL_COMPLETE)
@@ -219,6 +226,10 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
}
}
+ if(queue && queue->barrier_index > 0) {
+ return CL_ENQUEUE_EXECUTE_DEFER;
+ }
+
/* Non user events or all user event finished, wait all enqueue events finish */
for(i=0; i<num_events_in_wait_list; i++) {
if(event_wait_list[i]->status <= CL_COMPLETE)
@@ -227,7 +238,8 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
//enqueue callback haven't finish, in another thread, wait
if(event_wait_list[i]->enqueue_cb != NULL)
return CL_ENQUEUE_EXECUTE_DEFER;
- cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
+ if(event_wait_list[i]->gpgpu_event)
+ cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback
}
return CL_ENQUEUE_EXECUTE_IMM;
@@ -240,6 +252,7 @@ void cl_event_new_enqueue_callback(cl_event event,
{
enqueue_callback *cb, *node;
user_event *user_events, *u_ev;
+ cl_command_queue queue = event->queue;
cl_int i;
/* Allocate and inialize the structure itself */
@@ -252,6 +265,27 @@ void cl_event_new_enqueue_callback(cl_event event,
cb->next = NULL;
cb->wait_user_events = NULL;
+ if(queue && queue->barrier_index > 0) {
+ for(i=0; i<queue->barrier_index; i++) {
+ /* Insert the enqueue_callback to user event list */
+ node = queue->wait_events[i]->waits_head;
+ if(node == NULL)
+ queue->wait_events[i]->waits_head = cb;
+ else
+ while((node != cb) && node->next)
+ node = node->next;
+ if(node == cb) //wait on dup user event
+ continue;
+ node->next = cb;
+
+ /* Insert the user event to enqueue_callback's wait_user_events */
+ TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
+ u_ev->event = queue->wait_events[i];
+ u_ev->next = cb->wait_user_events;
+ cb->wait_user_events = u_ev;
+ }
+ }
+
/* Find out all user events that events in event_wait_list wait */
for(i=0; i<num_events_in_wait_list; i++) {
if(event_wait_list[i]->status <= CL_COMPLETE)
@@ -274,6 +308,7 @@ void cl_event_new_enqueue_callback(cl_event event,
u_ev->event = event_wait_list[i];
u_ev->next = cb->wait_user_events;
cb->wait_user_events = u_ev;
+ cl_command_queue_insert_event(event->queue, event_wait_list[i]);
} else if(event_wait_list[i]->enqueue_cb != NULL) {
user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
while(user_events != NULL) {
@@ -293,10 +328,10 @@ void cl_event_new_enqueue_callback(cl_event event,
u_ev->next = cb->wait_user_events;
cb->wait_user_events = u_ev;
user_events = user_events->next;
+ cl_command_queue_insert_event(event->queue, event_wait_list[i]);
}
}
}
-
if(data->queue != NULL && event->gpgpu_event != NULL) {
cl_gpgpu_event_pending(data->queue->gpgpu, event->gpgpu_event);
data->ptr = (void *)event->gpgpu_event;
@@ -403,8 +438,12 @@ void cl_event_set_status(cl_event event, cl_int status)
continue;
}
+ //remove user event frome enqueue_cb's ctx
+ cl_command_queue_remove_event(enqueue_cb->event->queue, event);
+
/* All user events complete, now wait enqueue events */
- ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list);
+ ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list,
+ enqueue_cb->event->queue);
assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
cb = enqueue_cb;
@@ -428,3 +467,26 @@ void cl_event_update_status(cl_event event)
(cl_gpgpu_event_update_status(event->gpgpu_event, 0) == command_complete))
cl_event_set_status(event, CL_COMPLETE);
}
+
+cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
+{
+ enqueue_data data;
+
+ *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
+ if(event == NULL)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ //if wait_events_num>0, the marker event need wait queue->wait_events
+ if(queue->wait_events_num > 0) {
+ data.type = EnqueueMarker;
+ cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events);
+ return CL_SUCCESS;
+ }
+
+ if(queue->last_event && queue->last_event->gpgpu_event) {
+ cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1);
+ }
+
+ cl_event_set_status(*event, CL_COMPLETE);
+ return CL_SUCCESS;
+}
diff --git a/src/cl_event.h b/src/cl_event.h
index 8523abe..7dde24b 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -22,9 +22,9 @@
#include <semaphore.h>
-#include "cl_enqueue.h"
#include "cl_internals.h"
#include "cl_driver.h"
+#include "cl_enqueue.h"
#include "CL/cl.h"
#define CL_ENQUEUE_EXECUTE_IMM 0
@@ -81,12 +81,14 @@ cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *);
/* Check events wait list for enqueue commonds */
cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context);
/* Wait the all events in wait list complete */
-cl_int cl_event_wait_events(cl_uint, const cl_event *);
+cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue);
/* New a enqueue suspend task */
void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *);
/* Set the event status and call all callbacks */
void cl_event_set_status(cl_event, cl_int);
/* Check and update event status */
void cl_event_update_status(cl_event);
+/* Create the marker event */
+cl_int cl_event_marker(cl_command_queue, cl_event*);
#endif /* __CL_EVENT_H__ */
--
1.8.1.2
More information about the Beignet
mailing list