[Beignet] [PATCH 4/4] Implement clEnqueueMarker and clEnqueueBarrier.

Zhigang Gong zhigang.gong at linux.intel.com
Tue Sep 17 22:15:49 PDT 2013


LGTM, pushed, thanks.

On Tue, Sep 17, 2013 at 04:10:01PM +0800, Yang Rong wrote:
> Add some event info to cl_command_queue.
> One is non-complete user events, used to block marker event and barrier.
> After these events become CL_COMPLETE, the events blocked by these events also
> become CL_COMPLETE, so marker event will also set to CL_COMPLETE. If there is no
> user events, need wait last event complete and set marker event to complete.
> Add barrier_index, for clEnqueueBarrier, point to user events, indicate the enqueue
> apis follow clEnqueueBarrier should wait on how many user events.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_api.c           | 25 ++++++++++++-----
>  src/cl_command_queue.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/cl_command_queue.h | 15 ++++++++++
>  src/cl_enqueue.h       |  4 +--
>  src/cl_event.c         | 72 ++++++++++++++++++++++++++++++++++++++++++++----
>  src/cl_event.h         |  6 ++--
>  6 files changed, 180 insertions(+), 16 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 9270b65..8f62990 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -66,7 +66,7 @@ inline cl_int
>  handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
>                cl_event* event, enqueue_data* data, cl_command_type type)
>  {
> -  cl_int status = cl_event_wait_events(num, wait_list);
> +  cl_int status = cl_event_wait_events(num, wait_list, queue);
>    cl_event e;
>    if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) {
>      e = cl_event_new(queue->ctx, queue, type, event!=NULL);
> @@ -1076,7 +1076,7 @@ clWaitForEvents(cl_uint          num_events,
>  
>    TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx);
>  
> -  while(cl_event_wait_events(num_events, event_list) == CL_ENQUEUE_EXECUTE_DEFER) {
> +  while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) {
>      usleep(8000);       //sleep 8ms to wait other thread
>    }
>  
> @@ -2401,8 +2401,16 @@ cl_int
>  clEnqueueMarker(cl_command_queue     command_queue,
>                  cl_event *           event)
>  {
> -  NOT_IMPLEMENTED;
> -  return 0;
> +  cl_int err = CL_SUCCESS;
> +  CHECK_QUEUE(command_queue);
> +  if(event == NULL) {
> +    err = CL_INVALID_VALUE;
> +    goto error;
> +  }
> +
> +  cl_event_marker(command_queue, event);
> +error:
> +  return err;
>  }
>  
>  cl_int
> @@ -2421,9 +2429,12 @@ error:
>  cl_int
>  clEnqueueBarrier(cl_command_queue  command_queue)
>  {
> -  NOT_IMPLEMENTED;
> -  return 0;
> -  //return clFinish(command_queue);
> +  cl_int err = CL_SUCCESS;
> +  CHECK_QUEUE(command_queue);
> +  cl_command_queue_set_barrier(command_queue);
> +
> +error:
> +  return err;
>  }
>  
>  #define EXTFUNC(x)                      \
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 2454db6..11be423 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -87,6 +87,7 @@ cl_command_queue_delete(cl_command_queue queue)
>    cl_mem_delete(queue->perf);
>    cl_context_delete(queue->ctx);
>    cl_gpgpu_delete(queue->gpgpu);
> +  cl_free(queue->wait_events);
>    queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
>    cl_free(queue);
>  }
> @@ -412,3 +413,76 @@ cl_command_queue_finish(cl_command_queue queue)
>    return CL_SUCCESS;
>  }
>  
> +#define DEFAULT_WAIT_EVENTS_SIZE  16
> +LOCAL void
> +cl_command_queue_insert_event(cl_command_queue queue, cl_event event)
> +{
> +  cl_int i=0;
> +  cl_event *new_list;
> +
> +  assert(queue != NULL);
> +  if(queue->wait_events == NULL) {
> +    queue->wait_events_size = DEFAULT_WAIT_EVENTS_SIZE;
> +    TRY_ALLOC_NO_ERR (queue->wait_events, CALLOC_ARRAY(cl_event, queue->wait_events_size));
> +  }
> +
> +  for(i=0; i<queue->wait_events_num; i++) {
> +    if(queue->wait_events[i] == event)
> +      return;   //is in the wait_events, need to insert
> +  }
> +
> +  if(queue->wait_events_num < queue->wait_events_size) {
> +    queue->wait_events[queue->wait_events_num++] = event;
> +    return;
> +  }
> +
> +  //wait_events_num == wait_events_size, array is full
> +  queue->wait_events_size *= 2;
> +  TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->wait_events_size));
> +  memcpy(new_list, queue->wait_events, sizeof(cl_event)*queue->wait_events_num);
> +  cl_free(queue->wait_events);
> +  queue->wait_events = new_list;
> +  queue->wait_events[queue->wait_events_num++] = event;
> +  return;
> +
> +exit:
> +  return;
> +error:
> +  if(queue->wait_events)
> +    cl_free(queue->wait_events);
> +  queue->wait_events = NULL;
> +  queue->wait_events_size = 0;
> +  queue->wait_events_num = 0;
> +  goto exit;
> +
> +}
> +
> +LOCAL void
> +cl_command_queue_remove_event(cl_command_queue queue, cl_event event)
> +{
> +  cl_int i=0;
> +
> +  assert(queue->wait_events);
> +  for(i=0; i<queue->wait_events_num; i++) {
> +    if(queue->wait_events[i] == event)
> +      break;
> +  }
> +
> +  if(i == queue->wait_events_num)
> +    return;
> +
> +  if(queue->barrier_index >= i)
> +    queue->barrier_index -= 1;
> +
> +  for(; i<queue->wait_events_num-1; i++) {
> +    queue->wait_events[i] = queue->wait_events[i+1];
> +  }
> +  queue->wait_events_num -= 1;
> +}
> +
> +LOCAL void
> +cl_command_queue_set_barrier(cl_command_queue queue)
> +{
> +    queue->barrier_index = queue->wait_events_num;
> +}
> +
> diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
> index 9fe1dd1..9396fd7 100644
> --- a/src/cl_command_queue.h
> +++ b/src/cl_command_queue.h
> @@ -33,6 +33,11 @@ struct _cl_command_queue {
>    uint64_t magic;                      /* To identify it as a command queue */
>    volatile int ref_n;                  /* We reference count this object */
>    cl_context ctx;                      /* Its parent context */
> +  cl_event* wait_events;               /* Point to array of non-complete user events that block this command queue */
> +  cl_int    wait_events_num;           /* Number of Non-complete user events */
> +  cl_int    wait_events_size;          /* The size of array that wait_events point to */
> +  cl_int    barrier_index;             /* Indicate event count in wait_events as barrier events */
> +  cl_event  last_event;                /* The last event in the queue, for enqueue mark used */
>    cl_command_queue_properties  props;  /* Queue properties */
>    cl_command_queue prev, next;         /* We chain the command queues together */
>    cl_gpgpu gpgpu;                      /* Setup all GEN commands */
> @@ -76,5 +81,15 @@ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
>  
>  /* Bind all the image surfaces in the GPGPU state */
>  extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel);
> +
> +/* Insert a user event to command's wait_events */
> +extern void cl_command_queue_insert_event(cl_command_queue, cl_event);
> +
> +/* Remove a user event from command's wait_events */
> +extern void cl_command_queue_remove_event(cl_command_queue, cl_event);
> +
> +/* Set the barrier index */
> +extern void cl_command_queue_set_barrier(cl_command_queue);
> +
>  #endif /* __CL_COMMAND_QUEUE_H__ */
>  
> diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
> index f90f921..b412d58 100644
> --- a/src/cl_enqueue.h
> +++ b/src/cl_enqueue.h
> @@ -19,9 +19,8 @@
>  #ifndef __CL_ENQUEUE_H__
>  #define __CL_ENQUEUE_H__
>  
> -#include "cl_mem.h"
> -#include "cl_command_queue.h"
>  #include "cl_internals.h"
> +#include "cl_driver.h"
>  #include "CL/cl.h"
>  
>  typedef enum {
> @@ -41,6 +40,7 @@ typedef enum {
>    EnqueueUnmapMemObject,
>    EnqueueNDRangeKernel,
>    EnqueueNativeKernel,
> +  EnqueueMarker,
>    EnqueueInvalid
>  } enqueue_type;
>  
> diff --git a/src/cl_event.c b/src/cl_event.c
> index 83e1f50..918e245 100644
> --- a/src/cl_event.c
> +++ b/src/cl_event.c
> @@ -23,6 +23,7 @@
>  #include "cl_alloc.h"
>  #include "cl_khr_icd.h"
>  #include "cl_kernel.h"
> +#include "cl_command_queue.h"
>  
>  #include <assert.h>
>  #include <stdio.h>
> @@ -81,6 +82,8 @@ cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type ty
>    event->enqueue_cb = NULL;
>    event->waits_head = NULL;
>    event->emplict = emplict;
> +  if(queue && event->gpgpu_event)
> +    queue->last_event = event;
>  
>  exit:
>    return event;
> @@ -100,6 +103,9 @@ void cl_event_delete(cl_event event)
>    if (atomic_dec(&event->ref_n) > 1)
>      return;
>  
> +  if(event->queue && event->queue->last_event == event)
> +    event->queue->last_event = NULL;
> +
>    /* Call all user's callback if haven't execute */
>    user_callback *cb = event->user_cb;
>    while(event->user_cb) {
> @@ -200,10 +206,11 @@ error:
>    goto exit;
>  }
>  
> -cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
> -                          const cl_event *event_wait_list)
> +cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
> +                            cl_command_queue queue)
>  {
>    cl_int i, j;
> +
>    /* Check whether wait user events */
>    for(i=0; i<num_events_in_wait_list; i++) {
>      if(event_wait_list[i]->status <= CL_COMPLETE)
> @@ -219,6 +226,10 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
>      }
>    }
>  
> +  if(queue && queue->barrier_index > 0) {
> +    return CL_ENQUEUE_EXECUTE_DEFER;
> +  }
> +
>    /* Non user events or all user event finished, wait all enqueue events finish */
>    for(i=0; i<num_events_in_wait_list; i++) {
>      if(event_wait_list[i]->status <= CL_COMPLETE)
> @@ -227,7 +238,8 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
>      //enqueue callback haven't finish, in another thread, wait
>      if(event_wait_list[i]->enqueue_cb != NULL)
>        return CL_ENQUEUE_EXECUTE_DEFER;
> -    cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
> +    if(event_wait_list[i]->gpgpu_event)
> +      cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
>      cl_event_set_status(event_wait_list[i], CL_COMPLETE);  //Execute user's callback
>    }
>    return CL_ENQUEUE_EXECUTE_IMM;
> @@ -240,6 +252,7 @@ void cl_event_new_enqueue_callback(cl_event event,
>  {
>    enqueue_callback *cb, *node;
>    user_event *user_events, *u_ev;
> +  cl_command_queue queue = event->queue;
>    cl_int i;
>  
>    /* Allocate and inialize the structure itself */
> @@ -252,6 +265,27 @@ void cl_event_new_enqueue_callback(cl_event event,
>    cb->next = NULL;
>    cb->wait_user_events = NULL;
>  
> +  if(queue && queue->barrier_index > 0) {
> +    for(i=0; i<queue->barrier_index; i++) {
> +      /* Insert the enqueue_callback to user event list */
> +      node = queue->wait_events[i]->waits_head;
> +      if(node == NULL)
> +        queue->wait_events[i]->waits_head = cb;
> +      else
> +        while((node != cb) && node->next)
> +          node = node->next;
> +        if(node == cb)   //wait on dup user event
> +          continue;
> +        node->next = cb;
> +
> +      /* Insert the user event to enqueue_callback's wait_user_events */
> +      TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
> +      u_ev->event = queue->wait_events[i];
> +      u_ev->next = cb->wait_user_events;
> +      cb->wait_user_events = u_ev;
> +    }
> +  }
> +
>    /* Find out all user events that events in event_wait_list wait */
>    for(i=0; i<num_events_in_wait_list; i++) {
>      if(event_wait_list[i]->status <= CL_COMPLETE)
> @@ -274,6 +308,7 @@ void cl_event_new_enqueue_callback(cl_event event,
>        u_ev->event = event_wait_list[i];
>        u_ev->next = cb->wait_user_events;
>        cb->wait_user_events = u_ev;
> +      cl_command_queue_insert_event(event->queue, event_wait_list[i]);
>      } else if(event_wait_list[i]->enqueue_cb != NULL) {
>        user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
>        while(user_events != NULL) {
> @@ -293,10 +328,10 @@ void cl_event_new_enqueue_callback(cl_event event,
>          u_ev->next = cb->wait_user_events;
>          cb->wait_user_events = u_ev;
>          user_events = user_events->next;
> +        cl_command_queue_insert_event(event->queue, event_wait_list[i]);
>        }
>      }
>    }
> -
>    if(data->queue != NULL && event->gpgpu_event != NULL) {
>      cl_gpgpu_event_pending(data->queue->gpgpu, event->gpgpu_event);
>      data->ptr = (void *)event->gpgpu_event;
> @@ -403,8 +438,12 @@ void cl_event_set_status(cl_event event, cl_int status)
>        continue;
>      }
>  
> +    //remove user event frome enqueue_cb's ctx
> +    cl_command_queue_remove_event(enqueue_cb->event->queue, event);
> +
>      /* All user events complete, now wait enqueue events */
> -    ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list);
> +    ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list,
> +                               enqueue_cb->event->queue);
>      assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
>  
>      cb = enqueue_cb;
> @@ -428,3 +467,26 @@ void cl_event_update_status(cl_event event)
>       (cl_gpgpu_event_update_status(event->gpgpu_event, 0) == command_complete))
>      cl_event_set_status(event, CL_COMPLETE);
>  }
> +
> +cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
> +{
> +  enqueue_data data;
> +
> +  *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
> +  if(event == NULL)
> +    return CL_OUT_OF_HOST_MEMORY;
> +
> +  //if wait_events_num>0, the marker event need wait queue->wait_events
> +  if(queue->wait_events_num > 0) {
> +    data.type = EnqueueMarker;
> +    cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events);
> +    return CL_SUCCESS;
> +  }
> +
> +  if(queue->last_event && queue->last_event->gpgpu_event) {
> +    cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1);
> +  }
> +
> +  cl_event_set_status(*event, CL_COMPLETE);
> +  return CL_SUCCESS;
> +}
> diff --git a/src/cl_event.h b/src/cl_event.h
> index 8523abe..7dde24b 100644
> --- a/src/cl_event.h
> +++ b/src/cl_event.h
> @@ -22,9 +22,9 @@
>  
>  #include <semaphore.h>
>  
> -#include "cl_enqueue.h"
>  #include "cl_internals.h"
>  #include "cl_driver.h"
> +#include "cl_enqueue.h"
>  #include "CL/cl.h"
>  
>  #define CL_ENQUEUE_EXECUTE_IMM   0
> @@ -81,12 +81,14 @@ cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *);
>  /* Check events wait list for enqueue commonds */
>  cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context);
>  /* Wait the all events in wait list complete */
> -cl_int cl_event_wait_events(cl_uint, const cl_event *);
> +cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue);
>  /* New a enqueue suspend task */
>  void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *);
>  /* Set the event status and call all callbacks */
>  void cl_event_set_status(cl_event, cl_int);
>  /* Check and update event status */
>  void cl_event_update_status(cl_event);
> +/* Create the marker event */
> +cl_int cl_event_marker(cl_command_queue, cl_event*);
>  #endif /* __CL_EVENT_H__ */
>  
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list