[Beignet] [PATCH] Refine the intel gpgpu delete.

Yang Rong rong.r.yang at intel.com
Sun Oct 26 23:01:26 PDT 2014


The intel gpgpu struct is destroyed when a new gpgpu struct needed. But in that time,
the command batch relative with the destroyed gpgpu may have not finish, and the resource
in gpgpu still used by gpgpu, can't be destroyed.
So, when delete a gpgpu, check the batch status, if have not complete, insert to list in intel driver,
and delete all finished gpgpu in that list.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/intel/intel_batchbuffer.c |   5 +-
 src/intel/intel_driver.c      |  27 +---------
 src/intel/intel_driver.h      |  28 +++++++++++
 src/intel/intel_gpgpu.c       | 112 +++++++++++++++++++++---------------------
 src/intel/intel_gpgpu.h       |  64 ++++++++++++++++++++++++
 5 files changed, 154 insertions(+), 82 deletions(-)

diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
index ba6b05f..b054fc7 100644
--- a/src/intel/intel_batchbuffer.c
+++ b/src/intel/intel_batchbuffer.c
@@ -136,8 +136,9 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
   if (!is_locked)
     intel_driver_unlock_hardware(batch->intel);
 
-  // Release the buffer
-  intel_batchbuffer_terminate(batch);
+  // Can't release buffer here. gpgpu only can be delete only when the batch buffer is complete.
+  // Remain the buffer for gpgpu delete check.
+  //intel_batchbuffer_terminate(batch);
 }
 
 LOCAL void 
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 390e965..bb97220 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -79,31 +79,6 @@
 #include "cl_device_id.h"
 #include "cl_platform_id.h"
 
-#define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
-  sigset_t bl_mask;                                           \
-  sigfillset(&bl_mask);                                       \
-  sigdelset(&bl_mask, SIGFPE);                                \
-  sigdelset(&bl_mask, SIGILL);                                \
-  sigdelset(&bl_mask, SIGSEGV);                               \
-  sigdelset(&bl_mask, SIGBUS);                                \
-  sigdelset(&bl_mask, SIGKILL);                               \
-  pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
-} while (0)
-
-#define RESTORE_BLOCKED_SIGSET(DRIVER) do {                   \
-  pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL);     \
-} while (0)
-
-#define PPTHREAD_MUTEX_LOCK(DRIVER) do {                      \
-  SET_BLOCKED_SIGSET(DRIVER);                                 \
-  pthread_mutex_lock(&(DRIVER)->ctxmutex);                    \
-} while (0)
-
-#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do {                    \
-  pthread_mutex_unlock(&(DRIVER)->ctxmutex);                  \
-  RESTORE_BLOCKED_SIGSET(DRIVER);                             \
-} while (0)
-
 static void
 intel_driver_delete(intel_driver_t *driver)
 {
@@ -423,11 +398,13 @@ intel_get_device_id(void)
   return intel_device_id;
 }
 
+extern void intel_gpgpu_delete_all(intel_driver_t *driver);
 static void
 cl_intel_driver_delete(intel_driver_t *driver)
 {
   if (driver == NULL)
     return;
+  intel_gpgpu_delete_all(driver);
   intel_driver_context_destroy(driver);
   intel_driver_close(driver);
   intel_driver_terminate(driver);
diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
index 107fdfc..8b82738 100644
--- a/src/intel/intel_driver.h
+++ b/src/intel/intel_driver.h
@@ -54,6 +54,7 @@
 #include <drm.h>
 #include <i915_drm.h>
 #include <intel_bufmgr.h>
+#include <intel/intel_gpgpu.h>
 
 #define CMD_MI                                  (0x0 << 29)
 #define CMD_2D                                  (0x2 << 29)
@@ -73,6 +74,7 @@
 #define BR13_8888                               (0x3 << 24)
 
 struct dri_state;
+struct intel_gpgpu_node;
 typedef struct _XDisplay Display;
 
 typedef struct intel_driver
@@ -88,8 +90,34 @@ typedef struct intel_driver
   int need_close;
   Display *x11_display;
   struct dri_state *dri_ctx;
+  struct intel_gpgpu_node *gpgpu_list;
 } intel_driver_t;
 
+#define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
+  sigset_t bl_mask;                                           \
+  sigfillset(&bl_mask);                                       \
+  sigdelset(&bl_mask, SIGFPE);                                \
+  sigdelset(&bl_mask, SIGILL);                                \
+  sigdelset(&bl_mask, SIGSEGV);                               \
+  sigdelset(&bl_mask, SIGBUS);                                \
+  sigdelset(&bl_mask, SIGKILL);                               \
+  pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
+} while (0)
+
+#define RESTORE_BLOCKED_SIGSET(DRIVER) do {                   \
+  pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL);     \
+} while (0)
+
+#define PPTHREAD_MUTEX_LOCK(DRIVER) do {                      \
+  SET_BLOCKED_SIGSET(DRIVER);                                 \
+  pthread_mutex_lock(&(DRIVER)->ctxmutex);                    \
+} while (0)
+
+#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do {                    \
+  pthread_mutex_unlock(&(DRIVER)->ctxmutex);                  \
+  RESTORE_BLOCKED_SIGSET(DRIVER);                             \
+} while (0)
+
 /* device control */
 extern void intel_driver_lock_hardware(intel_driver_t*);
 extern void intel_driver_unlock_hardware(intel_driver_t*);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 325d982..7f9b8ac 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -33,8 +33,6 @@
 #include "intel/intel_gpgpu.h"
 #include "intel/intel_defines.h"
 #include "intel/intel_structs.h"
-#include "intel/intel_batchbuffer.h"
-#include "intel/intel_driver.h"
 #include "program.h" // for BTI_RESERVED_NUM
 
 #include "cl_alloc.h"
@@ -69,58 +67,6 @@ typedef struct intel_event {
 
 #define MAX_IF_DESC    32
 
-/* We can bind only a limited number of buffers */
-enum { max_buf_n = 128 };
-
-enum { max_img_n = 128};
-
-enum {max_sampler_n = 16 };
-
-/* Handle GPGPU state */
-struct intel_gpgpu
-{
-  void* ker_opaque;
-  size_t global_wk_sz[3];
-  void* printf_info;
-  intel_driver_t *drv;
-  intel_batchbuffer_t *batch;
-  cl_gpgpu_kernel *ker;
-  drm_intel_bo *binded_buf[max_buf_n];  /* all buffers binded for the call */
-  uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
-  uint32_t binded_offset[max_buf_n];    /* their offsets in the curbe buffer */
-  uint32_t binded_n;                    /* number of buffers binded */
-
-  unsigned long img_bitmap;              /* image usage bitmap. */
-  unsigned int img_index_base;          /* base index for image surface.*/
-
-  unsigned long sampler_bitmap;          /* sampler usage bitmap. */
-
-  struct { drm_intel_bo *bo; } stack_b;
-  struct { drm_intel_bo *bo; } perf_b;
-  struct { drm_intel_bo *bo; } scratch_b;
-  struct { drm_intel_bo *bo; } constant_b;
-  struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
-  struct { drm_intel_bo *bo;
-           drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
-
-  struct { drm_intel_bo *bo; } aux_buf;
-  struct {
-    uint32_t surface_heap_offset;
-    uint32_t curbe_offset;
-    uint32_t idrt_offset;
-    uint32_t sampler_state_offset;
-    uint32_t sampler_border_color_state_offset;
-  } aux_offset;
-
-  uint32_t per_thread_scratch;
-  struct {
-    uint32_t num_cs_entries;
-    uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
-  } curb;
-
-  uint32_t max_threads;      /* max threads requested by the user */
-};
-
 typedef struct intel_gpgpu intel_gpgpu_t;
 
 typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm);
@@ -172,7 +118,7 @@ static void intel_gpgpu_unref_batch_buf(void *buf)
 }
 
 static void
-intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
+intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
 {
   if (gpgpu == NULL)
     return;
@@ -198,6 +144,62 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
   cl_free(gpgpu);
 }
 
+/* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */
+void intel_gpgpu_delete_all(intel_driver_t *drv)
+{
+  struct intel_gpgpu_node *p;
+  if(drv->gpgpu_list == NULL)
+    return;
+
+  PPTHREAD_MUTEX_LOCK(drv);
+  while(drv->gpgpu_list) {
+    p = drv->gpgpu_list;
+    drv->gpgpu_list = p->next;
+    intel_gpgpu_delete_finished(p->gpgpu);
+    cl_free(p);
+  }
+  PPTHREAD_MUTEX_UNLOCK(drv);
+}
+
+static void
+intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
+{
+  intel_driver_t *drv = gpgpu->drv;
+  struct intel_gpgpu_node *p, *node;
+
+  PPTHREAD_MUTEX_LOCK(drv);
+  while(drv->gpgpu_list) {
+    p = drv->gpgpu_list;
+    if(p->gpgpu->batch && p->gpgpu->batch->buffer &&
+       !drm_intel_bo_busy(p->gpgpu->batch->buffer)) {
+      drv->gpgpu_list = p->next;
+      intel_gpgpu_delete_finished(p->gpgpu);
+      cl_free(p);
+    }
+  }
+  if (gpgpu == NULL)
+    return;
+
+  if(gpgpu->batch && gpgpu->batch->buffer &&
+     !drm_intel_bo_busy(gpgpu->batch->buffer)) {
+    TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
+    node->gpgpu = gpgpu;
+    node->next = NULL;
+    p = drv->gpgpu_list;
+    if(p == NULL)
+      drv->gpgpu_list= node;
+    else {
+      while(p->next)
+        p = p->next;
+      p->next = node;
+    }
+  } else
+    intel_gpgpu_delete_finished(gpgpu);
+
+error:
+  PPTHREAD_MUTEX_UNLOCK(drv);
+}
+
 static intel_gpgpu_t*
 intel_gpgpu_new(intel_driver_t *drv)
 {
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
index d593ac7..7c89ac7 100644
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -23,10 +23,74 @@
 
 #include "cl_utils.h"
 #include "cl_driver.h"
+#include "intel/intel_batchbuffer.h"
+#include "intel/intel_driver.h"
 
 #include <stdlib.h>
 #include <stdint.h>
 
+
+/* We can bind only a limited number of buffers */
+enum { max_buf_n = 128 };
+
+enum { max_img_n = 128};
+
+enum {max_sampler_n = 16 };
+
+struct intel_driver;
+struct intel_batchbuffer;
+
+/* Handle GPGPU state */
+struct intel_gpgpu
+{
+  void* ker_opaque;
+  size_t global_wk_sz[3];
+  void* printf_info;
+  struct intel_driver *drv;
+  struct intel_batchbuffer *batch;
+  cl_gpgpu_kernel *ker;
+  drm_intel_bo *binded_buf[max_buf_n];  /* all buffers binded for the call */
+  uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
+  uint32_t binded_offset[max_buf_n];    /* their offsets in the curbe buffer */
+  uint32_t binded_n;                    /* number of buffers binded */
+
+  unsigned long img_bitmap;              /* image usage bitmap. */
+  unsigned int img_index_base;          /* base index for image surface.*/
+
+  unsigned long sampler_bitmap;          /* sampler usage bitmap. */
+
+  struct { drm_intel_bo *bo; } stack_b;
+  struct { drm_intel_bo *bo; } perf_b;
+  struct { drm_intel_bo *bo; } scratch_b;
+  struct { drm_intel_bo *bo; } constant_b;
+  struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
+  struct { drm_intel_bo *bo;
+           drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
+
+  struct { drm_intel_bo *bo; } aux_buf;
+  struct {
+    uint32_t surface_heap_offset;
+    uint32_t curbe_offset;
+    uint32_t idrt_offset;
+    uint32_t sampler_state_offset;
+    uint32_t sampler_border_color_state_offset;
+  } aux_offset;
+
+  uint32_t per_thread_scratch;
+  struct {
+    uint32_t num_cs_entries;
+    uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
+  } curb;
+
+  uint32_t max_threads;      /* max threads requested by the user */
+};
+
+struct intel_gpgpu_node {
+  struct intel_gpgpu *gpgpu;
+  struct intel_gpgpu_node *next;
+};
+
+
 /* Set the gpgpu related call backs */
 extern void intel_set_gpgpu_callbacks(int device_id);
 
-- 
1.8.3.2



More information about the Beignet mailing list