[Beignet] [Patch V2] Refine the intel gpgpu delete.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Oct 27 21:31:47 PDT 2014


LGTM, just pushed, thanks.

On Mon, Oct 27, 2014 at 04:16:51PM +0800, Yang Rong wrote:
> The intel gpgpu struct is destroyed when a new gpgpu struct needed. But in that time,
> the command batch relative with the destroyed gpgpu may have not finish, and the resource
> in gpgpu still used by gpgpu, can't be destroyed.
> So, when delete a gpgpu, check the batch status, if have not complete, insert to list in intel driver,
> and delete all finished gpgpu in that list.
> 
> V2: V1 assume all the gpgpu list would complete in order, but it is not true. The insert order in is the
>     gpgpu delete order, not the flush command batch order. So visit and delete all complete gpgpu in the list.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/intel/intel_batchbuffer.c |   5 +-
>  src/intel/intel_driver.c      |  27 +--------
>  src/intel/intel_driver.h      |  28 ++++++++++
>  src/intel/intel_gpgpu.c       | 127 ++++++++++++++++++++++++------------------
>  src/intel/intel_gpgpu.h       |  64 +++++++++++++++++++++
>  5 files changed, 169 insertions(+), 82 deletions(-)
> 
> diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
> index ba6b05f..b054fc7 100644
> --- a/src/intel/intel_batchbuffer.c
> +++ b/src/intel/intel_batchbuffer.c
> @@ -136,8 +136,9 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
>    if (!is_locked)
>      intel_driver_unlock_hardware(batch->intel);
>  
> -  // Release the buffer
> -  intel_batchbuffer_terminate(batch);
> +  // Can't release buffer here. gpgpu only can be delete only when the batch buffer is complete.
> +  // Remain the buffer for gpgpu delete check.
> +  //intel_batchbuffer_terminate(batch);
>  }
>  
>  LOCAL void 
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index 390e965..bb97220 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -79,31 +79,6 @@
>  #include "cl_device_id.h"
>  #include "cl_platform_id.h"
>  
> -#define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
> -  sigset_t bl_mask;                                           \
> -  sigfillset(&bl_mask);                                       \
> -  sigdelset(&bl_mask, SIGFPE);                                \
> -  sigdelset(&bl_mask, SIGILL);                                \
> -  sigdelset(&bl_mask, SIGSEGV);                               \
> -  sigdelset(&bl_mask, SIGBUS);                                \
> -  sigdelset(&bl_mask, SIGKILL);                               \
> -  pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
> -} while (0)
> -
> -#define RESTORE_BLOCKED_SIGSET(DRIVER) do {                   \
> -  pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL);     \
> -} while (0)
> -
> -#define PPTHREAD_MUTEX_LOCK(DRIVER) do {                      \
> -  SET_BLOCKED_SIGSET(DRIVER);                                 \
> -  pthread_mutex_lock(&(DRIVER)->ctxmutex);                    \
> -} while (0)
> -
> -#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do {                    \
> -  pthread_mutex_unlock(&(DRIVER)->ctxmutex);                  \
> -  RESTORE_BLOCKED_SIGSET(DRIVER);                             \
> -} while (0)
> -
>  static void
>  intel_driver_delete(intel_driver_t *driver)
>  {
> @@ -423,11 +398,13 @@ intel_get_device_id(void)
>    return intel_device_id;
>  }
>  
> +extern void intel_gpgpu_delete_all(intel_driver_t *driver);
>  static void
>  cl_intel_driver_delete(intel_driver_t *driver)
>  {
>    if (driver == NULL)
>      return;
> +  intel_gpgpu_delete_all(driver);
>    intel_driver_context_destroy(driver);
>    intel_driver_close(driver);
>    intel_driver_terminate(driver);
> diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
> index 107fdfc..8b82738 100644
> --- a/src/intel/intel_driver.h
> +++ b/src/intel/intel_driver.h
> @@ -54,6 +54,7 @@
>  #include <drm.h>
>  #include <i915_drm.h>
>  #include <intel_bufmgr.h>
> +#include <intel/intel_gpgpu.h>
>  
>  #define CMD_MI                                  (0x0 << 29)
>  #define CMD_2D                                  (0x2 << 29)
> @@ -73,6 +74,7 @@
>  #define BR13_8888                               (0x3 << 24)
>  
>  struct dri_state;
> +struct intel_gpgpu_node;
>  typedef struct _XDisplay Display;
>  
>  typedef struct intel_driver
> @@ -88,8 +90,34 @@ typedef struct intel_driver
>    int need_close;
>    Display *x11_display;
>    struct dri_state *dri_ctx;
> +  struct intel_gpgpu_node *gpgpu_list;
>  } intel_driver_t;
>  
> +#define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
> +  sigset_t bl_mask;                                           \
> +  sigfillset(&bl_mask);                                       \
> +  sigdelset(&bl_mask, SIGFPE);                                \
> +  sigdelset(&bl_mask, SIGILL);                                \
> +  sigdelset(&bl_mask, SIGSEGV);                               \
> +  sigdelset(&bl_mask, SIGBUS);                                \
> +  sigdelset(&bl_mask, SIGKILL);                               \
> +  pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
> +} while (0)
> +
> +#define RESTORE_BLOCKED_SIGSET(DRIVER) do {                   \
> +  pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL);     \
> +} while (0)
> +
> +#define PPTHREAD_MUTEX_LOCK(DRIVER) do {                      \
> +  SET_BLOCKED_SIGSET(DRIVER);                                 \
> +  pthread_mutex_lock(&(DRIVER)->ctxmutex);                    \
> +} while (0)
> +
> +#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do {                    \
> +  pthread_mutex_unlock(&(DRIVER)->ctxmutex);                  \
> +  RESTORE_BLOCKED_SIGSET(DRIVER);                             \
> +} while (0)
> +
>  /* device control */
>  extern void intel_driver_lock_hardware(intel_driver_t*);
>  extern void intel_driver_unlock_hardware(intel_driver_t*);
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 325d982..603def6 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -33,8 +33,6 @@
>  #include "intel/intel_gpgpu.h"
>  #include "intel/intel_defines.h"
>  #include "intel/intel_structs.h"
> -#include "intel/intel_batchbuffer.h"
> -#include "intel/intel_driver.h"
>  #include "program.h" // for BTI_RESERVED_NUM
>  
>  #include "cl_alloc.h"
> @@ -69,58 +67,6 @@ typedef struct intel_event {
>  
>  #define MAX_IF_DESC    32
>  
> -/* We can bind only a limited number of buffers */
> -enum { max_buf_n = 128 };
> -
> -enum { max_img_n = 128};
> -
> -enum {max_sampler_n = 16 };
> -
> -/* Handle GPGPU state */
> -struct intel_gpgpu
> -{
> -  void* ker_opaque;
> -  size_t global_wk_sz[3];
> -  void* printf_info;
> -  intel_driver_t *drv;
> -  intel_batchbuffer_t *batch;
> -  cl_gpgpu_kernel *ker;
> -  drm_intel_bo *binded_buf[max_buf_n];  /* all buffers binded for the call */
> -  uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
> -  uint32_t binded_offset[max_buf_n];    /* their offsets in the curbe buffer */
> -  uint32_t binded_n;                    /* number of buffers binded */
> -
> -  unsigned long img_bitmap;              /* image usage bitmap. */
> -  unsigned int img_index_base;          /* base index for image surface.*/
> -
> -  unsigned long sampler_bitmap;          /* sampler usage bitmap. */
> -
> -  struct { drm_intel_bo *bo; } stack_b;
> -  struct { drm_intel_bo *bo; } perf_b;
> -  struct { drm_intel_bo *bo; } scratch_b;
> -  struct { drm_intel_bo *bo; } constant_b;
> -  struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
> -  struct { drm_intel_bo *bo;
> -           drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
> -
> -  struct { drm_intel_bo *bo; } aux_buf;
> -  struct {
> -    uint32_t surface_heap_offset;
> -    uint32_t curbe_offset;
> -    uint32_t idrt_offset;
> -    uint32_t sampler_state_offset;
> -    uint32_t sampler_border_color_state_offset;
> -  } aux_offset;
> -
> -  uint32_t per_thread_scratch;
> -  struct {
> -    uint32_t num_cs_entries;
> -    uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
> -  } curb;
> -
> -  uint32_t max_threads;      /* max threads requested by the user */
> -};
> -
>  typedef struct intel_gpgpu intel_gpgpu_t;
>  
>  typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm);
> @@ -172,7 +118,7 @@ static void intel_gpgpu_unref_batch_buf(void *buf)
>  }
>  
>  static void
> -intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> +intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
>  {
>    if (gpgpu == NULL)
>      return;
> @@ -198,6 +144,77 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
>    cl_free(gpgpu);
>  }
>  
> +/* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */
> +void intel_gpgpu_delete_all(intel_driver_t *drv)
> +{
> +  struct intel_gpgpu_node *p;
> +  if(drv->gpgpu_list == NULL)
> +    return;
> +
> +  PPTHREAD_MUTEX_LOCK(drv);
> +  while(drv->gpgpu_list) {
> +    p = drv->gpgpu_list;
> +    drv->gpgpu_list = p->next;
> +    intel_gpgpu_delete_finished(p->gpgpu);
> +    cl_free(p);
> +  }
> +  PPTHREAD_MUTEX_UNLOCK(drv);
> +}
> +
> +static void
> +intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> +{
> +  intel_driver_t *drv = gpgpu->drv;
> +  struct intel_gpgpu_node *p, *node;
> +
> +  PPTHREAD_MUTEX_LOCK(drv);
> +  p = drv->gpgpu_list;
> +  if(p) {
> +    node = p->next;
> +    while(node) {
> +      if(node->gpgpu->batch && node->gpgpu->batch->buffer &&
> +         !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
> +        p->next = node->next;
> +        intel_gpgpu_delete_finished(node->gpgpu);
> +        cl_free(node);
> +        node = p->next;
> +      } else {
> +        p = node;
> +        node = node->next;
> +      }
> +    }
> +    node = drv->gpgpu_list;
> +    if(node->gpgpu->batch && node->gpgpu->batch->buffer &&
> +       !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
> +      drv->gpgpu_list = drv->gpgpu_list->next;
> +      intel_gpgpu_delete_finished(node->gpgpu);
> +      cl_free(node);
> +      node = p->next;
> +    }
> +  }
> +  if (gpgpu == NULL)
> +    return;
> +
> +  if(gpgpu->batch && gpgpu->batch->buffer &&
> +     !drm_intel_bo_busy(gpgpu->batch->buffer)) {
> +    TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
> +    node->gpgpu = gpgpu;
> +    node->next = NULL;
> +    p = drv->gpgpu_list;
> +    if(p == NULL)
> +      drv->gpgpu_list= node;
> +    else {
> +      while(p->next)
> +        p = p->next;
> +      p->next = node;
> +    }
> +  } else
> +    intel_gpgpu_delete_finished(gpgpu);
> +
> +error:
> +  PPTHREAD_MUTEX_UNLOCK(drv);
> +}
> +
>  static intel_gpgpu_t*
>  intel_gpgpu_new(intel_driver_t *drv)
>  {
> diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
> index d593ac7..7c89ac7 100644
> --- a/src/intel/intel_gpgpu.h
> +++ b/src/intel/intel_gpgpu.h
> @@ -23,10 +23,74 @@
>  
>  #include "cl_utils.h"
>  #include "cl_driver.h"
> +#include "intel/intel_batchbuffer.h"
> +#include "intel/intel_driver.h"
>  
>  #include <stdlib.h>
>  #include <stdint.h>
>  
> +
> +/* We can bind only a limited number of buffers */
> +enum { max_buf_n = 128 };
> +
> +enum { max_img_n = 128};
> +
> +enum {max_sampler_n = 16 };
> +
> +struct intel_driver;
> +struct intel_batchbuffer;
> +
> +/* Handle GPGPU state */
> +struct intel_gpgpu
> +{
> +  void* ker_opaque;
> +  size_t global_wk_sz[3];
> +  void* printf_info;
> +  struct intel_driver *drv;
> +  struct intel_batchbuffer *batch;
> +  cl_gpgpu_kernel *ker;
> +  drm_intel_bo *binded_buf[max_buf_n];  /* all buffers binded for the call */
> +  uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
> +  uint32_t binded_offset[max_buf_n];    /* their offsets in the curbe buffer */
> +  uint32_t binded_n;                    /* number of buffers binded */
> +
> +  unsigned long img_bitmap;              /* image usage bitmap. */
> +  unsigned int img_index_base;          /* base index for image surface.*/
> +
> +  unsigned long sampler_bitmap;          /* sampler usage bitmap. */
> +
> +  struct { drm_intel_bo *bo; } stack_b;
> +  struct { drm_intel_bo *bo; } perf_b;
> +  struct { drm_intel_bo *bo; } scratch_b;
> +  struct { drm_intel_bo *bo; } constant_b;
> +  struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
> +  struct { drm_intel_bo *bo;
> +           drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
> +
> +  struct { drm_intel_bo *bo; } aux_buf;
> +  struct {
> +    uint32_t surface_heap_offset;
> +    uint32_t curbe_offset;
> +    uint32_t idrt_offset;
> +    uint32_t sampler_state_offset;
> +    uint32_t sampler_border_color_state_offset;
> +  } aux_offset;
> +
> +  uint32_t per_thread_scratch;
> +  struct {
> +    uint32_t num_cs_entries;
> +    uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
> +  } curb;
> +
> +  uint32_t max_threads;      /* max threads requested by the user */
> +};
> +
> +struct intel_gpgpu_node {
> +  struct intel_gpgpu *gpgpu;
> +  struct intel_gpgpu_node *next;
> +};
> +
> +
>  /* Set the gpgpu related call backs */
>  extern void intel_set_gpgpu_callbacks(int device_id);
>  
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list