[Beignet] [PATCH v2] runtime: fix a gpgpu event and thread local gpgpu handling bug.
Zhigang Gong
zhigang.gong at intel.com
Wed Jul 2 23:14:50 PDT 2014
When pending a command queue, we need to record the whole gpgpu
structure not just the batch buffer. For the following reason:
1. We need to keep those private buffer, for example those printf buffers.
2. We need to make sure this gpgpu will not be reused by other enqueuement.
v2:
Don't try to flush all user event attached to the queue.
Just need to flush the current event when doing command queue flush.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
src/cl_api.c | 3 +-
src/cl_command_queue.c | 14 +++++++--
src/cl_command_queue.h | 4 +++
src/cl_driver.h | 8 ++----
src/cl_driver_defs.c | 4 +--
src/cl_enqueue.c | 2 +-
src/cl_event.c | 26 ++++++++++++++---
src/cl_event.h | 7 +++--
src/cl_thread.c | 20 +++++++++++++
src/cl_thread.h | 3 ++
src/intel/intel_batchbuffer.c | 13 ---------
src/intel/intel_batchbuffer.h | 1 -
src/intel/intel_gpgpu.c | 66 +++++++++++++++++--------------------------
13 files changed, 97 insertions(+), 74 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index d54ada6..8759027 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -69,7 +69,7 @@ handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
cl_event* event, enqueue_data* data, cl_command_type type)
{
cl_int status = cl_event_wait_events(num, wait_list, queue);
- cl_event e;
+ cl_event e = NULL;
if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) {
e = cl_event_new(queue->ctx, queue, type, event!=NULL);
@@ -85,6 +85,7 @@ handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
cl_event_new_enqueue_callback(e, data, num, wait_list);
}
}
+ queue->current_event = e;
return status;
}
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 8426c4e..cd268aa 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -28,6 +28,7 @@
#include "cl_alloc.h"
#include "cl_driver.h"
#include "cl_khr_icd.h"
+#include "cl_event.h"
#include "performance.h"
#include <assert.h>
@@ -421,10 +422,9 @@ error:
return err;
}
-LOCAL cl_int
-cl_command_queue_flush(cl_command_queue queue)
+LOCAL void
+cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
{
- GET_QUEUE_THREAD_GPGPU(queue);
size_t global_wk_sz[3];
void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz);
@@ -447,7 +447,15 @@ cl_command_queue_flush(cl_command_queue queue)
global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
}
+}
+LOCAL cl_int
+cl_command_queue_flush(cl_command_queue queue)
+{
+ GET_QUEUE_THREAD_GPGPU(queue);
+ cl_command_queue_flush_gpgpu(queue, gpgpu);
+ if (queue->current_event)
+ cl_event_flush(queue->current_event);
cl_invalid_thread_gpgpu(queue);
return CL_SUCCESS;
}
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index b79d63a..bd70f25 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -41,6 +41,7 @@ struct _cl_command_queue {
cl_int wait_events_num; /* Number of Non-complete user events */
cl_int wait_events_size; /* The size of array that wait_events point to */
cl_event last_event; /* The last event in the queue, for enqueue mark used */
+ cl_event current_event; /* Current event. */
cl_command_queue_properties props; /* Queue properties */
cl_command_queue prev, next; /* We chain the command queues together */
void *thread_data; /* Used to store thread context data */
@@ -82,6 +83,9 @@ cl_int cl_command_queue_set_fulsim_buffer(cl_command_queue, cl_mem);
/* Flush for the command queue */
extern cl_int cl_command_queue_flush(cl_command_queue);
+/* Flush for the specified gpgpu */
+extern void cl_command_queue_flush_gpgpu(cl_command_queue, cl_gpgpu);
+
/* Wait for the completion of the command queue */
extern cl_int cl_command_queue_finish(cl_command_queue);
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 2999eb7..3d1d8d8 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -197,13 +197,9 @@ extern cl_gpgpu_event_new_cb *cl_gpgpu_event_new;
typedef int (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int);
extern cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status;
-/* pending flush the batch buffer of this event */
-typedef void (cl_gpgpu_event_pending_cb)(cl_gpgpu, cl_gpgpu_event);
-extern cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending;
-
/* flush the batch buffer of this event */
-typedef void (cl_gpgpu_event_resume_cb)(cl_gpgpu_event);
-extern cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume;
+typedef void (cl_gpgpu_event_flush_cb)(cl_gpgpu_event);
+extern cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush;
/* cancel exec batch buffer of this event */
typedef void (cl_gpgpu_event_cancel_cb)(cl_gpgpu_event);
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index c9385c4..72f25d9 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -79,9 +79,7 @@ LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL;
LOCAL cl_gpgpu_event_new_cb *cl_gpgpu_event_new = NULL;
LOCAL cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status = NULL;
-LOCAL cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending = NULL;
-LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL;
-LOCAL cl_gpgpu_event_cancel_cb *cl_gpgpu_event_cancel = NULL;
+LOCAL cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush = NULL;
LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL;
LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = NULL;
LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 7fa44ff..af118ad 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -461,7 +461,7 @@ cl_int cl_enqueue_handle(cl_event event, enqueue_data* data)
case EnqueueNDRangeKernel:
case EnqueueFillBuffer:
case EnqueueFillImage:
- cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
+ cl_event_flush(event);
return CL_SUCCESS;
case EnqueueNativeKernel:
return cl_enqueue_native_kernel(data);
diff --git a/src/cl_event.c b/src/cl_event.c
index a3af59c..ec5cb68 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -46,6 +46,17 @@ cl_event_is_gpu_command_type(cl_command_type type)
}
}
+void cl_event_flush(cl_event event)
+{
+ assert(event->gpgpu_event != NULL);
+ if (event->gpgpu) {
+ cl_command_queue_flush_gpgpu(event->queue, event->gpgpu);
+ cl_gpgpu_delete(event->gpgpu);
+ event->gpgpu = NULL;
+ }
+ cl_gpgpu_event_flush(event->gpgpu_event);
+}
+
cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
{
cl_event event = NULL;
@@ -138,6 +149,11 @@ void cl_event_delete(cl_event event)
pthread_mutex_unlock(&event->ctx->event_lock);
cl_context_delete(event->ctx);
+ if (event->gpgpu) {
+ fprintf(stderr, "Warning: a event is deleted with a pending enqueued task.\n");
+ cl_gpgpu_delete(event->gpgpu);
+ event->gpgpu = NULL;
+ }
cl_free(event);
}
@@ -257,7 +273,6 @@ void cl_event_new_enqueue_callback(cl_event event,
cl_command_queue queue = event->queue;
cl_int i;
cl_int err = CL_SUCCESS;
- GET_QUEUE_THREAD_GPGPU(data->queue);
/* Allocate and initialize the structure itself */
TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
@@ -344,7 +359,7 @@ void cl_event_new_enqueue_callback(cl_event event,
}
}
if(data->queue != NULL && event->gpgpu_event != NULL) {
- cl_gpgpu_event_pending(gpgpu, event->gpgpu_event);
+ event->gpgpu = cl_thread_gpgpu_take(event->queue);
data->ptr = (void *)event->gpgpu_event;
}
cb->data = *data;
@@ -394,8 +409,11 @@ void cl_event_set_status(cl_event event, cl_int status)
if(event->gpgpu_event)
cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine
} else {
- if(event->gpgpu_event)
- cl_gpgpu_event_cancel(event->gpgpu_event); //Error cancel the enqueue
+ if(event->gpgpu_event) {
+ // Error then cancel the enqueued event.
+ cl_gpgpu_delete(event->gpgpu);
+ event->gpgpu = NULL;
+ }
}
event->status = status; //Change the event status after enqueue and befor unlock
diff --git a/src/cl_event.h b/src/cl_event.h
index bd8a5c7..3c23d74 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -63,6 +63,7 @@ struct _cl_event {
cl_command_queue queue; /* The command queue associated with event */
cl_command_type type; /* The command type associated with event */
cl_int status; /* The execution status */
+ cl_gpgpu gpgpu; /* Current gpgpu, owned by this structure. */
cl_gpgpu_event gpgpu_event; /* The event object communicate with hardware */
user_callback* user_cb; /* The event callback functions */
enqueue_callback* enqueue_cb; /* This event's enqueue */
@@ -95,9 +96,11 @@ cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event
cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*);
/* Do the event profiling */
cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
-/*insert the user event*/
+/* insert the user event */
cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event);
-/*remove the user event*/
+/* remove the user event */
cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event);
+/* flush the event's pending gpgpu batch buffer and notify driver this gpgpu event has been flushed. */
+void cl_event_flush(cl_event event);
#endif /* __CL_EVENT_H__ */
diff --git a/src/cl_thread.c b/src/cl_thread.c
index 4692ed7..5713d70 100644
--- a/src/cl_thread.c
+++ b/src/cl_thread.c
@@ -209,6 +209,26 @@ void cl_invalid_thread_gpgpu(cl_command_queue queue)
spec->valid = 0;
}
+cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue)
+{
+ queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data));
+ thread_spec_data* spec = NULL;
+
+ pthread_mutex_lock(&thread_private->thread_data_lock);
+ spec = thread_private->threads_data[thread_id];
+ assert(spec);
+ pthread_mutex_unlock(&thread_private->thread_data_lock);
+
+ if (!spec->valid)
+ return NULL;
+
+ assert(spec->gpgpu);
+ cl_gpgpu gpgpu = spec->gpgpu;
+ spec->gpgpu = NULL;
+ spec->valid = 0;
+ return gpgpu;
+}
+
/* The destructor for clean the thread specific data. */
void cl_thread_data_destroy(cl_command_queue queue)
{
diff --git a/src/cl_thread.h b/src/cl_thread.h
index bf855a2..ecc99ad 100644
--- a/src/cl_thread.h
+++ b/src/cl_thread.h
@@ -41,4 +41,7 @@ void cl_set_thread_batch_buf(cl_command_queue queue, void* buf);
/* Used to get the batch buffer of each thread. */
void* cl_get_thread_batch_buf(cl_command_queue queue);
+/* take current gpgpu from the thread gpgpu pool. */
+cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue);
+
#endif /* __CL_THREAD_H__ */
diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
index d0da77a..7767db3 100644
--- a/src/intel/intel_batchbuffer.c
+++ b/src/intel/intel_batchbuffer.c
@@ -185,16 +185,3 @@ intel_batchbuffer_delete(intel_batchbuffer_t *batch)
cl_free(batch);
}
-
-LOCAL void
-intel_batchbuffer_take(intel_batchbuffer_t *from, intel_batchbuffer_t *to)
-{
- *to = *from;
- //Need not unreference the buffer, to will unreference it.
- from->buffer = NULL;
- from->map = NULL;
- from->ptr = NULL;
- from->size = 0;
- from->atomic = 0;
- from->enable_slm = 0;
-}
diff --git a/src/intel/intel_batchbuffer.h b/src/intel/intel_batchbuffer.h
index c62043e..0c3bc13 100644
--- a/src/intel/intel_batchbuffer.h
+++ b/src/intel/intel_batchbuffer.h
@@ -101,7 +101,6 @@ extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*);
extern void intel_batchbuffer_terminate(intel_batchbuffer_t*);
extern void intel_batchbuffer_flush(intel_batchbuffer_t*);
extern void intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz);
-extern void intel_batchbuffer_take(intel_batchbuffer_t*, intel_batchbuffer_t *);
static INLINE uint32_t
intel_batchbuffer_space(const intel_batchbuffer_t *batch)
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index a7c449d..d00bc83 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -60,9 +60,8 @@ typedef struct surface_heap {
} surface_heap_t;
typedef struct intel_event {
- intel_batchbuffer_t *batch;
- drm_intel_bo* buffer;
- drm_intel_bo* ts_buf;
+ drm_intel_bo *buffer;
+ drm_intel_bo *ts_buf;
int status;
} intel_event_t;
@@ -569,12 +568,19 @@ intel_gpgpu_check_binded_buf_address(intel_gpgpu_t *gpgpu)
}
static void
+intel_gpgpu_flush_batch_buffer(intel_batchbuffer_t *batch)
+{
+ assert(batch);
+ intel_batchbuffer_emit_mi_flush(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
intel_gpgpu_flush(intel_gpgpu_t *gpgpu)
{
if (!gpgpu->batch || !gpgpu->batch->buffer)
return;
- intel_batchbuffer_emit_mi_flush(gpgpu->batch);
- intel_batchbuffer_flush(gpgpu->batch);
+ intel_gpgpu_flush_batch_buffer(gpgpu->batch);
intel_gpgpu_check_binded_buf_address(gpgpu);
}
@@ -1156,11 +1162,10 @@ intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
intel_event_t *event = NULL;
TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t));
- event->status = command_queued;
- event->batch = NULL;
event->buffer = gpgpu->batch->buffer;
- if(event->buffer != NULL)
+ if (event->buffer)
drm_intel_bo_reference(event->buffer);
+ event->status = command_queued;
if(gpgpu->time_stamp_b.bo) {
event->ts_buf = gpgpu->time_stamp_b.bo;
@@ -1175,6 +1180,17 @@ error:
goto exit;
}
+/*
+ The upper layer already flushed the batch buffer, just update
+ internal status to command_submitted.
+*/
+static void
+intel_gpgpu_event_flush(intel_event_t *event)
+{
+ assert(event->status == command_queued);
+ event->status = command_running;
+}
+
static int
intel_gpgpu_event_update_status(intel_event_t *event, int wait)
{
@@ -1182,7 +1198,7 @@ intel_gpgpu_event_update_status(intel_event_t *event, int wait)
return event->status;
if (event->buffer &&
- event->batch == NULL && //have flushed
+ event->status == command_running &&
!drm_intel_bo_busy(event->buffer)) {
event->status = command_complete;
drm_intel_bo_unreference(event->buffer);
@@ -1203,36 +1219,8 @@ intel_gpgpu_event_update_status(intel_event_t *event, int wait)
}
static void
-intel_gpgpu_event_pending(intel_gpgpu_t *gpgpu, intel_event_t *event)
-{
- assert(event->buffer); //This is gpu enqueue command
- assert(event->batch == NULL); //This command haven't pengding.
- event->batch = intel_batchbuffer_new(gpgpu->drv);
- assert(event->batch);
- intel_batchbuffer_take(gpgpu->batch, event->batch);
-}
-
-static void
-intel_gpgpu_event_resume(intel_event_t *event)
-{
- assert(event->batch); //This command have pending.
- intel_batchbuffer_flush(event->batch);
- intel_batchbuffer_delete(event->batch);
- event->batch = NULL;
-}
-
-static void
-intel_gpgpu_event_cancel(intel_event_t *event)
-{
- assert(event->batch); //This command have pending.
- intel_batchbuffer_delete(event->batch);
- event->batch = NULL;
-}
-
-static void
intel_gpgpu_event_delete(intel_event_t *event)
{
- assert(event->batch == NULL); //This command must have been flushed.
if(event->buffer)
drm_intel_bo_unreference(event->buffer);
if(event->ts_buf)
@@ -1412,10 +1400,8 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler;
cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch;
cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new;
+ cl_gpgpu_event_flush = (cl_gpgpu_event_flush_cb *)intel_gpgpu_event_flush;
cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status;
- cl_gpgpu_event_pending = (cl_gpgpu_event_pending_cb *)intel_gpgpu_event_pending;
- cl_gpgpu_event_resume = (cl_gpgpu_event_resume_cb *)intel_gpgpu_event_resume;
- cl_gpgpu_event_cancel = (cl_gpgpu_event_cancel_cb *)intel_gpgpu_event_cancel;
cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp;
cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
--
1.8.3.2
More information about the Beignet
mailing list