[Beignet] [PATCH V2] Modify the multi-thread support for queue.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Dec 30 23:41:47 PST 2013
Tested ok, pushed, thanks.
On Tue, Dec 31, 2013 at 03:25:57PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> The old multi-thread support for queue do not work
> when threads will not exit. If the thread not exit
> but the queue is re-generated all the time, the
> gpgpu struct resouce will leak, and will fail to
> create GPU bo for gpgpu struct finally.
> We modify it to release the GPGPU resource every
> enqueuNDR finished and we re-alloc our gpgpu struct
> context next time.
> ---
> src/cl_command_queue.c | 6 ++--
> src/cl_command_queue_gen7.c | 1 +
> src/cl_driver.h | 10 +++++-
> src/cl_driver_defs.c | 2 ++
> src/cl_thread.c | 79 ++++++++++++++++++++++++++++++++++++-------
> src/cl_thread.h | 10 ++++++
> src/intel/intel_gpgpu.c | 20 +++++++++--
> 7 files changed, 110 insertions(+), 18 deletions(-)
>
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 3530976..4ac2e11 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -419,15 +419,15 @@ cl_command_queue_flush(cl_command_queue queue)
> GET_QUEUE_THREAD_GPGPU(queue);
>
> cl_gpgpu_flush(gpgpu);
> +
> + cl_invalid_thread_gpgpu(queue);
> return CL_SUCCESS;
> }
>
> LOCAL cl_int
> cl_command_queue_finish(cl_command_queue queue)
> {
> - GET_QUEUE_THREAD_GPGPU(queue);
> -
> - cl_gpgpu_sync(gpgpu);
> + cl_gpgpu_sync(cl_get_thread_batch_buf());
> return CL_SUCCESS;
> }
>
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index 923a881..ba69589 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -336,6 +336,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
> /* Start a new batch buffer */
> batch_sz = cl_kernel_compute_batch_sz(ker);
> cl_gpgpu_batch_reset(gpgpu, batch_sz);
> + cl_set_thread_batch_buf(cl_gpgpu_ref_batch_buf(gpgpu));
> cl_gpgpu_batch_start(gpgpu);
>
> /* Issue the GPGPU_WALKER command */
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index a34c22e..96fc377 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -95,7 +95,7 @@ typedef void (cl_gpgpu_delete_cb)(cl_gpgpu);
> extern cl_gpgpu_delete_cb *cl_gpgpu_delete;
>
> /* Synchonize GPU with CPU */
> -typedef cl_gpgpu (cl_gpgpu_sync_cb)(cl_gpgpu);
> +typedef void (cl_gpgpu_sync_cb)(void*);
> extern cl_gpgpu_sync_cb *cl_gpgpu_sync;
>
> /* Bind a regular unformatted buffer */
> @@ -200,6 +200,14 @@ extern cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp;
> typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_gpgpu, uint64_t*);
> extern cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp;
>
> +/* Get current batch buffer handle */
> +typedef void* (cl_gpgpu_ref_batch_buf_cb)(cl_gpgpu);
> +extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
> +
> +/* Get release batch buffer handle */
> +typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
> +extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
> +
> /* Will spawn all threads */
> typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
> uint32_t simd_sz,
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index b46799a..0a9012c 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -82,4 +82,6 @@ LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL;
> LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL;
> LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = NULL;
> LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
> +LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL;
> +LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL;
>
> diff --git a/src/cl_thread.c b/src/cl_thread.c
> index fbad5c5..cadc3cd 100644
> --- a/src/cl_thread.c
> +++ b/src/cl_thread.c
> @@ -20,30 +20,75 @@
> #include "cl_alloc.h"
> #include "cl_utils.h"
>
> +static __thread void* thread_batch_buf = NULL;
> +
> +typedef struct _cl_thread_spec_data {
> + cl_gpgpu gpgpu ;
> + int valid;
> +}cl_thread_spec_data;
> +
> +void cl_set_thread_batch_buf(void* buf) {
> + if (thread_batch_buf) {
> + cl_gpgpu_unref_batch_buf(thread_batch_buf);
> + }
> + thread_batch_buf = buf;
> +}
> +
> +void* cl_get_thread_batch_buf(void) {
> + return thread_batch_buf;
> +}
> +
> cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue)
> {
> pthread_key_t* key = queue->thread_data;
> - cl_gpgpu gpgpu = pthread_getspecific(*key);
> + cl_thread_spec_data* thread_spec_data = pthread_getspecific(*key);
>
> - if (!gpgpu) {
> - TRY_ALLOC_NO_ERR (gpgpu, cl_gpgpu_new(queue->ctx->drv));
> + if (!thread_spec_data) {
> + TRY_ALLOC_NO_ERR(thread_spec_data, CALLOC(struct _cl_thread_spec_data));
> + if (pthread_setspecific(*key, thread_spec_data)) {
> + cl_free(thread_spec_data);
> + return NULL;
> + }
> }
>
> - if (pthread_setspecific(*key, gpgpu)) {
> - cl_gpgpu_delete(gpgpu);
> - goto error;
> + if (!thread_spec_data->valid) {
> + TRY_ALLOC_NO_ERR(thread_spec_data->gpgpu, cl_gpgpu_new(queue->ctx->drv));
> + thread_spec_data->valid = 1;
> }
>
> -exit:
> - return gpgpu;
> error:
> - pthread_setspecific(*key, NULL);
> - goto exit;
> + return thread_spec_data->gpgpu;
> +}
> +
> +void cl_invalid_thread_gpgpu(cl_command_queue queue)
> +{
> + pthread_key_t* key = queue->thread_data;
> + cl_thread_spec_data* thread_spec_data = pthread_getspecific(*key);
> +
> + if (!thread_spec_data) {
> + return;
> + }
> +
> + if (!thread_spec_data->valid) {
> + return;
> + }
> +
> + assert(thread_spec_data->gpgpu);
> + cl_gpgpu_delete(thread_spec_data->gpgpu);
> + thread_spec_data->valid = 0;
> }
>
> static void thread_data_destructor(void *data) {
> - cl_gpgpu gpgpu = (cl_gpgpu)data;
> - cl_gpgpu_delete(gpgpu);
> + cl_thread_spec_data* thread_spec_data = (cl_thread_spec_data *)data;
> +
> + if (thread_batch_buf) {
> + cl_gpgpu_unref_batch_buf(thread_batch_buf);
> + thread_batch_buf = NULL;
> + }
> +
> + if (thread_spec_data->valid)
> + cl_gpgpu_delete(thread_spec_data->gpgpu);
> + cl_free(thread_spec_data);
> }
>
> /* Create the thread specific data. */
> @@ -67,6 +112,16 @@ void* cl_thread_data_create(void)
> void cl_thread_data_destroy(void * data)
> {
> pthread_key_t *thread_specific_key = (pthread_key_t *)data;
> +
> + /* First release self spec data. */
> + cl_thread_spec_data* thread_spec_data =
> + pthread_getspecific(*thread_specific_key);
> + if (thread_spec_data && thread_spec_data->valid) {
> + cl_gpgpu_delete(thread_spec_data->gpgpu);
> + if (thread_spec_data)
> + cl_free(thread_spec_data);
> + }
> +
> pthread_key_delete(*thread_specific_key);
> cl_free(thread_specific_key);
> }
> diff --git a/src/cl_thread.h b/src/cl_thread.h
> index 65f1bcf..c8ab63c 100644
> --- a/src/cl_thread.h
> +++ b/src/cl_thread.h
> @@ -31,4 +31,14 @@ void cl_thread_data_destroy(void * data);
>
> /* Used to get the gpgpu struct of each thread. */
> cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue);
> +
> +/* Used to release the gpgpu struct of each thread. */
> +void cl_invalid_thread_gpgpu(cl_command_queue queue);
> +
> +/* Used to set the batch buffer of each thread. */
> +void cl_set_thread_batch_buf(void* buf);
> +
> +/* Used to get the batch buffer of each thread. */
> +void* cl_get_thread_batch_buf(void);
> +
> #endif /* __CL_THREAD_H__ */
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index b1597ac..b2d8bb0 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -117,10 +117,24 @@ typedef struct intel_gpgpu intel_gpgpu_t;
>
>
> static void
> -intel_gpgpu_sync(intel_gpgpu_t *gpgpu)
> +intel_gpgpu_sync(void *buf)
> +{
> + if (buf)
> + drm_intel_bo_wait_rendering((drm_intel_bo *)buf);
> +}
> +
> +static void *intel_gpgpu_ref_batch_buf(intel_gpgpu_t *gpgpu)
> {
> if (gpgpu->batch->last_bo)
> - drm_intel_bo_wait_rendering(gpgpu->batch->last_bo);
> + drm_intel_bo_reference(gpgpu->batch->last_bo);
> +
> + return gpgpu->batch->last_bo;
> +}
> +
> +static void intel_gpgpu_unref_batch_buf(void *buf)
> +{
> + if (buf)
> + drm_intel_bo_unreference((drm_intel_bo *)buf);
> }
>
> static void
> @@ -1111,5 +1125,7 @@ intel_set_gpgpu_callbacks(void)
> cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
> cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp;
> cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
> + cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
> + cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
> }
>
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list