[Beignet] [PATCH V2] Modify the multi-thread support for queue.

Mon Dec 30 23:41:47 PST 2013

Tested ok, pushed, thanks.
On Tue, Dec 31, 2013 at 03:25:57PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
> 
> The old multi-thread support for queue do not work
> when threads will not exit. If the thread not exit
> but the queue is re-generated all the time, the
> gpgpu struct resouce will leak, and will fail to
> create GPU bo for gpgpu struct finally.
> We modify it to release the GPGPU resource every
> enqueuNDR finished and we re-alloc our gpgpu struct
> context next time.
> ---
>  src/cl_command_queue.c      |    6 ++--
>  src/cl_command_queue_gen7.c |    1 +
>  src/cl_driver.h             |   10 +++++-
>  src/cl_driver_defs.c        |    2 ++
>  src/cl_thread.c             |   79 ++++++++++++++++++++++++++++++++++++-------
>  src/cl_thread.h             |   10 ++++++
>  src/intel/intel_gpgpu.c     |   20 +++++++++--
>  7 files changed, 110 insertions(+), 18 deletions(-)
> 
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 3530976..4ac2e11 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -419,15 +419,15 @@ cl_command_queue_flush(cl_command_queue queue)
>    GET_QUEUE_THREAD_GPGPU(queue);
>  
>    cl_gpgpu_flush(gpgpu);
> +
> +  cl_invalid_thread_gpgpu(queue);
>    return CL_SUCCESS;
>  }
>  
>  LOCAL cl_int
>  cl_command_queue_finish(cl_command_queue queue)
>  {
> -  GET_QUEUE_THREAD_GPGPU(queue);
> -
> -  cl_gpgpu_sync(gpgpu);
> +  cl_gpgpu_sync(cl_get_thread_batch_buf());
>    return CL_SUCCESS;
>  }
>  
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index 923a881..ba69589 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -336,6 +336,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
>    /* Start a new batch buffer */
>    batch_sz = cl_kernel_compute_batch_sz(ker);
>    cl_gpgpu_batch_reset(gpgpu, batch_sz);
> +  cl_set_thread_batch_buf(cl_gpgpu_ref_batch_buf(gpgpu));
>    cl_gpgpu_batch_start(gpgpu);
>  
>    /* Issue the GPGPU_WALKER command */
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index a34c22e..96fc377 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -95,7 +95,7 @@ typedef void (cl_gpgpu_delete_cb)(cl_gpgpu);
>  extern cl_gpgpu_delete_cb *cl_gpgpu_delete;
>  
>  /* Synchonize GPU with CPU */
> -typedef cl_gpgpu (cl_gpgpu_sync_cb)(cl_gpgpu);
> +typedef void (cl_gpgpu_sync_cb)(void*);
>  extern cl_gpgpu_sync_cb *cl_gpgpu_sync;
>  
>  /* Bind a regular unformatted buffer */
> @@ -200,6 +200,14 @@ extern cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp;
>  typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_gpgpu, uint64_t*);
>  extern cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp;
>  
> +/* Get current batch buffer handle */
> +typedef void* (cl_gpgpu_ref_batch_buf_cb)(cl_gpgpu);
> +extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
> +
> +/* Get release batch buffer handle */
> +typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
> +extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
> +
>  /* Will spawn all threads */
>  typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
>                                    uint32_t simd_sz,
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index b46799a..0a9012c 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -82,4 +82,6 @@ LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL;
>  LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL;
>  LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = NULL;
>  LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
> +LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL;
> +LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL;
>  
> diff --git a/src/cl_thread.c b/src/cl_thread.c
> index fbad5c5..cadc3cd 100644
> --- a/src/cl_thread.c
> +++ b/src/cl_thread.c
> @@ -20,30 +20,75 @@
>  #include "cl_alloc.h"
>  #include "cl_utils.h"
>  
> +static __thread void* thread_batch_buf = NULL;
> +
> +typedef struct _cl_thread_spec_data {
> +  cl_gpgpu gpgpu ;
> +  int valid;
> +}cl_thread_spec_data;
> +
> +void cl_set_thread_batch_buf(void* buf) {
> +  if (thread_batch_buf) {
> +    cl_gpgpu_unref_batch_buf(thread_batch_buf);
> +  }
> +  thread_batch_buf = buf;
> +}
> +
> +void* cl_get_thread_batch_buf(void) {
> +  return thread_batch_buf;
> +}
> +
>  cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue)
>  {
>    pthread_key_t* key = queue->thread_data;
> -  cl_gpgpu gpgpu = pthread_getspecific(*key);
> +  cl_thread_spec_data* thread_spec_data = pthread_getspecific(*key);
>  
> -  if (!gpgpu) {
> -    TRY_ALLOC_NO_ERR (gpgpu, cl_gpgpu_new(queue->ctx->drv));
> +  if (!thread_spec_data) {
> +    TRY_ALLOC_NO_ERR(thread_spec_data, CALLOC(struct _cl_thread_spec_data));
> +    if (pthread_setspecific(*key, thread_spec_data)) {
> +      cl_free(thread_spec_data);
> +      return NULL;
> +    }
>    }
>  
> -  if (pthread_setspecific(*key, gpgpu)) {
> -    cl_gpgpu_delete(gpgpu);
> -    goto error;
> +  if (!thread_spec_data->valid) {
> +    TRY_ALLOC_NO_ERR(thread_spec_data->gpgpu, cl_gpgpu_new(queue->ctx->drv));
> +    thread_spec_data->valid = 1;
>    }
>  
> -exit:
> -  return gpgpu;
>  error:
> -  pthread_setspecific(*key, NULL);
> -  goto exit;
> +  return thread_spec_data->gpgpu;
> +}
> +
> +void cl_invalid_thread_gpgpu(cl_command_queue queue)
> +{
> +  pthread_key_t* key = queue->thread_data;
> +  cl_thread_spec_data* thread_spec_data = pthread_getspecific(*key);
> +
> +  if (!thread_spec_data) {
> +    return;
> +  }
> +
> +  if (!thread_spec_data->valid) {
> +    return;
> +  }
> +
> +  assert(thread_spec_data->gpgpu);
> +  cl_gpgpu_delete(thread_spec_data->gpgpu);
> +  thread_spec_data->valid = 0;
>  }
>  
>  static void thread_data_destructor(void *data) {
> -  cl_gpgpu gpgpu = (cl_gpgpu)data;
> -  cl_gpgpu_delete(gpgpu);
> +  cl_thread_spec_data* thread_spec_data = (cl_thread_spec_data *)data;
> +
> +  if (thread_batch_buf) {
> +    cl_gpgpu_unref_batch_buf(thread_batch_buf);
> +    thread_batch_buf = NULL;
> +  }
> +
> +  if (thread_spec_data->valid)
> +    cl_gpgpu_delete(thread_spec_data->gpgpu);
> +  cl_free(thread_spec_data);
>  }
>  
>  /* Create the thread specific data. */
> @@ -67,6 +112,16 @@ void* cl_thread_data_create(void)
>  void cl_thread_data_destroy(void * data)
>  {
>    pthread_key_t *thread_specific_key = (pthread_key_t *)data;
> +
> +  /* First release self spec data. */
> +  cl_thread_spec_data* thread_spec_data =
> +         pthread_getspecific(*thread_specific_key);
> +  if (thread_spec_data && thread_spec_data->valid) {
> +    cl_gpgpu_delete(thread_spec_data->gpgpu);
> +    if (thread_spec_data)
> +      cl_free(thread_spec_data);
> +  }
> +
>    pthread_key_delete(*thread_specific_key);
>    cl_free(thread_specific_key);
>  }
> diff --git a/src/cl_thread.h b/src/cl_thread.h
> index 65f1bcf..c8ab63c 100644
> --- a/src/cl_thread.h
> +++ b/src/cl_thread.h
> @@ -31,4 +31,14 @@ void cl_thread_data_destroy(void * data);
>  
>  /* Used to get the gpgpu struct of each thread. */
>  cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue);
> +
> +/* Used to release the gpgpu struct of each thread. */
> +void cl_invalid_thread_gpgpu(cl_command_queue queue);
> +
> +/* Used to set the batch buffer of each thread. */
> +void cl_set_thread_batch_buf(void* buf);
> +
> +/* Used to get the batch buffer of each thread. */
> +void* cl_get_thread_batch_buf(void);
> +
>  #endif /* __CL_THREAD_H__ */
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index b1597ac..b2d8bb0 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -117,10 +117,24 @@ typedef struct intel_gpgpu intel_gpgpu_t;
>  
>  
>  static void
> -intel_gpgpu_sync(intel_gpgpu_t *gpgpu)
> +intel_gpgpu_sync(void *buf)
> +{
> +  if (buf)
> +    drm_intel_bo_wait_rendering((drm_intel_bo *)buf);
> +}
> +
> +static void *intel_gpgpu_ref_batch_buf(intel_gpgpu_t *gpgpu)
>  {
>    if (gpgpu->batch->last_bo)
> -    drm_intel_bo_wait_rendering(gpgpu->batch->last_bo);
> +    drm_intel_bo_reference(gpgpu->batch->last_bo);
> +
> +  return gpgpu->batch->last_bo;
> +}
> +
> +static void intel_gpgpu_unref_batch_buf(void *buf)
> +{
> +  if (buf)
> +    drm_intel_bo_unreference((drm_intel_bo *)buf);
>  }
>  
>  static void
> @@ -1111,5 +1125,7 @@ intel_set_gpgpu_callbacks(void)
>    cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
>    cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp;
>    cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
> +  cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
> +  cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
>  }
>  
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet