[Beignet] [PATCH v2] runtime: fix a gpgpu event and thread local gpgpu handling bug.

He Junyan junyan.he at inbox.com
Thu Jul 3 00:36:58 PDT 2014


OK, that's LGTM

On 四, 2014-07-03 at 14:14 +0800, Zhigang Gong wrote:
> When pending a command queue, we need to record the whole gpgpu
> structure not just the batch buffer. For the following reason:
> 
> 1. We need to keep those private buffer, for example those printf buffers.
> 2. We need to make sure this gpgpu will not be reused by other enqueuement.
> 
> v2:
> Don't try to flush all user event attached to the queue.
> Just need to flush the current event when doing command queue flush.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  src/cl_api.c                  |  3 +-
>  src/cl_command_queue.c        | 14 +++++++--
>  src/cl_command_queue.h        |  4 +++
>  src/cl_driver.h               |  8 ++----
>  src/cl_driver_defs.c          |  4 +--
>  src/cl_enqueue.c              |  2 +-
>  src/cl_event.c                | 26 ++++++++++++++---
>  src/cl_event.h                |  7 +++--
>  src/cl_thread.c               | 20 +++++++++++++
>  src/cl_thread.h               |  3 ++
>  src/intel/intel_batchbuffer.c | 13 ---------
>  src/intel/intel_batchbuffer.h |  1 -
>  src/intel/intel_gpgpu.c       | 66 +++++++++++++++++--------------------------
>  13 files changed, 97 insertions(+), 74 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index d54ada6..8759027 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -69,7 +69,7 @@ handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
>                cl_event* event, enqueue_data* data, cl_command_type type)
>  {
>    cl_int status = cl_event_wait_events(num, wait_list, queue);
> -  cl_event e;
> +  cl_event e = NULL;
>    if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) {
>      e = cl_event_new(queue->ctx, queue, type, event!=NULL);
>  
> @@ -85,6 +85,7 @@ handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
>        cl_event_new_enqueue_callback(e, data, num, wait_list);
>      }
>    }
> +  queue->current_event = e;
>    return status;
>  }
>  
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 8426c4e..cd268aa 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -28,6 +28,7 @@
>  #include "cl_alloc.h"
>  #include "cl_driver.h"
>  #include "cl_khr_icd.h"
> +#include "cl_event.h"
>  #include "performance.h"
>  
>  #include <assert.h>
> @@ -421,10 +422,9 @@ error:
>    return err;
>  }
>  
> -LOCAL cl_int
> -cl_command_queue_flush(cl_command_queue queue)
> +LOCAL void
> +cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
>  {
> -  GET_QUEUE_THREAD_GPGPU(queue);
>    size_t global_wk_sz[3];
>    void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz);
>  
> @@ -447,7 +447,15 @@ cl_command_queue_flush(cl_command_queue queue)
>      global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
>      cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
>    }
> +}
>  
> +LOCAL cl_int
> +cl_command_queue_flush(cl_command_queue queue)
> +{
> +  GET_QUEUE_THREAD_GPGPU(queue);
> +  cl_command_queue_flush_gpgpu(queue, gpgpu);
> +  if (queue->current_event)
> +    cl_event_flush(queue->current_event);
>    cl_invalid_thread_gpgpu(queue);
>    return CL_SUCCESS;
>  }
> diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
> index b79d63a..bd70f25 100644
> --- a/src/cl_command_queue.h
> +++ b/src/cl_command_queue.h
> @@ -41,6 +41,7 @@ struct _cl_command_queue {
>    cl_int    wait_events_num;           /* Number of Non-complete user events */
>    cl_int    wait_events_size;          /* The size of array that wait_events point to */
>    cl_event  last_event;                /* The last event in the queue, for enqueue mark used */
> +  cl_event  current_event;             /* Current event. */
>    cl_command_queue_properties  props;  /* Queue properties */
>    cl_command_queue prev, next;         /* We chain the command queues together */
>    void *thread_data;                   /* Used to store thread context data */
> @@ -82,6 +83,9 @@ cl_int cl_command_queue_set_fulsim_buffer(cl_command_queue, cl_mem);
>  /* Flush for the command queue */
>  extern cl_int cl_command_queue_flush(cl_command_queue);
>  
> +/* Flush for the specified gpgpu */
> +extern void cl_command_queue_flush_gpgpu(cl_command_queue, cl_gpgpu);
> +
>  /* Wait for the completion of the command queue */
>  extern cl_int cl_command_queue_finish(cl_command_queue);
>  
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index 2999eb7..3d1d8d8 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -197,13 +197,9 @@ extern cl_gpgpu_event_new_cb *cl_gpgpu_event_new;
>  typedef int (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int);
>  extern cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status;
>  
> -/* pending flush the batch buffer of this event */
> -typedef void (cl_gpgpu_event_pending_cb)(cl_gpgpu, cl_gpgpu_event);
> -extern cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending;
> -
>  /* flush the batch buffer of this event */
> -typedef void (cl_gpgpu_event_resume_cb)(cl_gpgpu_event);
> -extern cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume;
> +typedef void (cl_gpgpu_event_flush_cb)(cl_gpgpu_event);
> +extern cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush;
>  
>  /* cancel exec batch buffer of this event */
>  typedef void (cl_gpgpu_event_cancel_cb)(cl_gpgpu_event);
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index c9385c4..72f25d9 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -79,9 +79,7 @@ LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
>  LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL;
>  LOCAL cl_gpgpu_event_new_cb *cl_gpgpu_event_new = NULL;
>  LOCAL cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status = NULL;
> -LOCAL cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending = NULL;
> -LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL;
> -LOCAL cl_gpgpu_event_cancel_cb *cl_gpgpu_event_cancel = NULL;
> +LOCAL cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush = NULL;
>  LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL;
>  LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = NULL;
>  LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index 7fa44ff..af118ad 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -461,7 +461,7 @@ cl_int cl_enqueue_handle(cl_event event, enqueue_data* data)
>      case EnqueueNDRangeKernel:
>      case EnqueueFillBuffer:
>      case EnqueueFillImage:
> -      cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
> +      cl_event_flush(event);
>        return CL_SUCCESS;
>      case EnqueueNativeKernel:
>        return cl_enqueue_native_kernel(data);
> diff --git a/src/cl_event.c b/src/cl_event.c
> index a3af59c..ec5cb68 100644
> --- a/src/cl_event.c
> +++ b/src/cl_event.c
> @@ -46,6 +46,17 @@ cl_event_is_gpu_command_type(cl_command_type type)
>    }
>  }
>  
> +void cl_event_flush(cl_event event)
> +{
> +  assert(event->gpgpu_event != NULL);
> +  if (event->gpgpu) {
> +    cl_command_queue_flush_gpgpu(event->queue, event->gpgpu);
> +    cl_gpgpu_delete(event->gpgpu);
> +    event->gpgpu = NULL;
> +  }
> +  cl_gpgpu_event_flush(event->gpgpu_event);
> +}
> +
>  cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
>  {
>    cl_event event = NULL;
> @@ -138,6 +149,11 @@ void cl_event_delete(cl_event event)
>    pthread_mutex_unlock(&event->ctx->event_lock);
>    cl_context_delete(event->ctx);
>  
> +  if (event->gpgpu) {
> +    fprintf(stderr, "Warning: a event is deleted with a pending enqueued task.\n");
> +    cl_gpgpu_delete(event->gpgpu);
> +    event->gpgpu = NULL;
> +  }
>    cl_free(event);
>  }
>  
> @@ -257,7 +273,6 @@ void cl_event_new_enqueue_callback(cl_event event,
>    cl_command_queue queue = event->queue;
>    cl_int i;
>    cl_int err = CL_SUCCESS;
> -  GET_QUEUE_THREAD_GPGPU(data->queue);
>  
>    /* Allocate and initialize the structure itself */
>    TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
> @@ -344,7 +359,7 @@ void cl_event_new_enqueue_callback(cl_event event,
>      }
>    }
>    if(data->queue != NULL && event->gpgpu_event != NULL) {
> -    cl_gpgpu_event_pending(gpgpu, event->gpgpu_event);
> +    event->gpgpu = cl_thread_gpgpu_take(event->queue);
>      data->ptr = (void *)event->gpgpu_event;
>    }
>    cb->data = *data;
> @@ -394,8 +409,11 @@ void cl_event_set_status(cl_event event, cl_int status)
>          if(event->gpgpu_event)
>            cl_gpgpu_event_update_status(event->gpgpu_event, 1);  //now set complet, need refine
>        } else {
> -        if(event->gpgpu_event)
> -          cl_gpgpu_event_cancel(event->gpgpu_event);  //Error cancel the enqueue
> +        if(event->gpgpu_event) {
> +          // Error then cancel the enqueued event.
> +          cl_gpgpu_delete(event->gpgpu);
> +          event->gpgpu = NULL;
> +        }
>        }
>  
>        event->status = status;  //Change the event status after enqueue and befor unlock
> diff --git a/src/cl_event.h b/src/cl_event.h
> index bd8a5c7..3c23d74 100644
> --- a/src/cl_event.h
> +++ b/src/cl_event.h
> @@ -63,6 +63,7 @@ struct _cl_event {
>    cl_command_queue   queue;       /* The command queue associated with event */
>    cl_command_type    type;        /* The command type associated with event */
>    cl_int             status;      /* The execution status */
> +  cl_gpgpu           gpgpu;       /* Current gpgpu, owned by this structure. */
>    cl_gpgpu_event     gpgpu_event; /* The event object communicate with hardware */
>    user_callback*     user_cb;     /* The event callback functions */
>    enqueue_callback*  enqueue_cb;  /* This event's enqueue */
> @@ -95,9 +96,11 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event
>  cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *,  cl_event*);
>  /* Do the event profiling */
>  cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
> -/*insert the user event*/
> +/* insert the user event */
>  cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event);
> -/*remove the user event*/
> +/* remove the user event */
>  cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event);
> +/* flush the event's pending gpgpu batch buffer and notify driver this gpgpu event has been flushed. */
> +void cl_event_flush(cl_event event);
>  #endif /* __CL_EVENT_H__ */
>  
> diff --git a/src/cl_thread.c b/src/cl_thread.c
> index 4692ed7..5713d70 100644
> --- a/src/cl_thread.c
> +++ b/src/cl_thread.c
> @@ -209,6 +209,26 @@ void cl_invalid_thread_gpgpu(cl_command_queue queue)
>    spec->valid = 0;
>  }
>  
> +cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue)
> +{
> +  queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data));
> +  thread_spec_data* spec = NULL;
> +
> +  pthread_mutex_lock(&thread_private->thread_data_lock);
> +  spec = thread_private->threads_data[thread_id];
> +  assert(spec);
> +  pthread_mutex_unlock(&thread_private->thread_data_lock);
> +
> +  if (!spec->valid)
> +    return NULL;
> +
> +  assert(spec->gpgpu);
> +  cl_gpgpu gpgpu = spec->gpgpu;
> +  spec->gpgpu = NULL;
> +  spec->valid = 0;
> +  return gpgpu;
> +}
> +
>  /* The destructor for clean the thread specific data. */
>  void cl_thread_data_destroy(cl_command_queue queue)
>  {
> diff --git a/src/cl_thread.h b/src/cl_thread.h
> index bf855a2..ecc99ad 100644
> --- a/src/cl_thread.h
> +++ b/src/cl_thread.h
> @@ -41,4 +41,7 @@ void cl_set_thread_batch_buf(cl_command_queue queue, void* buf);
>  /* Used to get the batch buffer of each thread. */
>  void* cl_get_thread_batch_buf(cl_command_queue queue);
>  
> +/* take current gpgpu from the thread gpgpu pool. */
> +cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue);
> +
>  #endif /* __CL_THREAD_H__ */
> diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
> index d0da77a..7767db3 100644
> --- a/src/intel/intel_batchbuffer.c
> +++ b/src/intel/intel_batchbuffer.c
> @@ -185,16 +185,3 @@ intel_batchbuffer_delete(intel_batchbuffer_t *batch)
>  
>    cl_free(batch);
>  }
> -
> -LOCAL void
> -intel_batchbuffer_take(intel_batchbuffer_t *from, intel_batchbuffer_t *to)
> -{
> -  *to = *from;
> -  //Need not unreference the buffer, to will unreference it.
> -  from->buffer = NULL;
> -  from->map = NULL;
> -  from->ptr = NULL;
> -  from->size = 0;
> -  from->atomic = 0;
> -  from->enable_slm = 0;
> -}
> diff --git a/src/intel/intel_batchbuffer.h b/src/intel/intel_batchbuffer.h
> index c62043e..0c3bc13 100644
> --- a/src/intel/intel_batchbuffer.h
> +++ b/src/intel/intel_batchbuffer.h
> @@ -101,7 +101,6 @@ extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*);
>  extern void intel_batchbuffer_terminate(intel_batchbuffer_t*);
>  extern void intel_batchbuffer_flush(intel_batchbuffer_t*);
>  extern void intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz);
> -extern void intel_batchbuffer_take(intel_batchbuffer_t*, intel_batchbuffer_t *);
>  
>  static INLINE uint32_t
>  intel_batchbuffer_space(const intel_batchbuffer_t *batch)
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index a7c449d..d00bc83 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -60,9 +60,8 @@ typedef struct surface_heap {
>  } surface_heap_t;
>  
>  typedef struct intel_event {
> -  intel_batchbuffer_t *batch;
> -  drm_intel_bo* buffer;
> -  drm_intel_bo* ts_buf;
> +  drm_intel_bo *buffer;
> +  drm_intel_bo *ts_buf;
>    int status;
>  } intel_event_t;
>  
> @@ -569,12 +568,19 @@ intel_gpgpu_check_binded_buf_address(intel_gpgpu_t *gpgpu)
>  }
>  
>  static void
> +intel_gpgpu_flush_batch_buffer(intel_batchbuffer_t *batch)
> +{
> +  assert(batch);
> +  intel_batchbuffer_emit_mi_flush(batch);
> +  intel_batchbuffer_flush(batch);
> +}
> +
> +static void
>  intel_gpgpu_flush(intel_gpgpu_t *gpgpu)
>  {
>    if (!gpgpu->batch || !gpgpu->batch->buffer)
>      return;
> -  intel_batchbuffer_emit_mi_flush(gpgpu->batch);
> -  intel_batchbuffer_flush(gpgpu->batch);
> +  intel_gpgpu_flush_batch_buffer(gpgpu->batch);
>    intel_gpgpu_check_binded_buf_address(gpgpu);
>  }
>  
> @@ -1156,11 +1162,10 @@ intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
>    intel_event_t *event = NULL;
>    TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t));
>  
> -  event->status = command_queued;
> -  event->batch = NULL;
>    event->buffer = gpgpu->batch->buffer;
> -  if(event->buffer != NULL)
> +  if (event->buffer)
>      drm_intel_bo_reference(event->buffer);
> +  event->status = command_queued;
>  
>    if(gpgpu->time_stamp_b.bo) {
>      event->ts_buf = gpgpu->time_stamp_b.bo;
> @@ -1175,6 +1180,17 @@ error:
>    goto exit;
>  }
>  
> +/*
> +   The upper layer already flushed the batch buffer, just update
> +   internal status to command_submitted.
> +*/
> +static void
> +intel_gpgpu_event_flush(intel_event_t *event)
> +{
> +  assert(event->status == command_queued);
> +  event->status = command_running;
> +}
> +
>  static int
>  intel_gpgpu_event_update_status(intel_event_t *event, int wait)
>  {
> @@ -1182,7 +1198,7 @@ intel_gpgpu_event_update_status(intel_event_t *event, int wait)
>      return event->status;
>  
>    if (event->buffer &&
> -      event->batch == NULL &&        //have flushed
> +      event->status == command_running &&
>        !drm_intel_bo_busy(event->buffer)) {
>      event->status = command_complete;
>      drm_intel_bo_unreference(event->buffer);
> @@ -1203,36 +1219,8 @@ intel_gpgpu_event_update_status(intel_event_t *event, int wait)
>  }
>  
>  static void
> -intel_gpgpu_event_pending(intel_gpgpu_t *gpgpu, intel_event_t *event)
> -{
> -  assert(event->buffer);           //This is gpu enqueue command
> -  assert(event->batch == NULL);    //This command haven't pengding.
> -  event->batch = intel_batchbuffer_new(gpgpu->drv);
> -  assert(event->batch);
> -  intel_batchbuffer_take(gpgpu->batch, event->batch);
> -}
> -
> -static void
> -intel_gpgpu_event_resume(intel_event_t *event)
> -{
> -  assert(event->batch);           //This command have pending.
> -  intel_batchbuffer_flush(event->batch);
> -  intel_batchbuffer_delete(event->batch);
> -  event->batch = NULL;
> -}
> -
> -static void
> -intel_gpgpu_event_cancel(intel_event_t *event)
> -{
> -  assert(event->batch);           //This command have pending.
> -  intel_batchbuffer_delete(event->batch);
> -  event->batch = NULL;
> -}
> -
> -static void
>  intel_gpgpu_event_delete(intel_event_t *event)
>  {
> -  assert(event->batch == NULL);   //This command must have been flushed.
>    if(event->buffer)
>      drm_intel_bo_unreference(event->buffer);
>    if(event->ts_buf)
> @@ -1412,10 +1400,8 @@ intel_set_gpgpu_callbacks(int device_id)
>    cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler;
>    cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch;
>    cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new;
> +  cl_gpgpu_event_flush = (cl_gpgpu_event_flush_cb *)intel_gpgpu_event_flush;
>    cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status;
> -  cl_gpgpu_event_pending = (cl_gpgpu_event_pending_cb *)intel_gpgpu_event_pending;
> -  cl_gpgpu_event_resume = (cl_gpgpu_event_resume_cb *)intel_gpgpu_event_resume;
> -  cl_gpgpu_event_cancel = (cl_gpgpu_event_cancel_cb *)intel_gpgpu_event_cancel;
>    cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
>    cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp;
>    cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;





More information about the Beignet mailing list