[Beignet] [PATCH] Refine the intel gpgpu delete.
Zhigang Gong
zhigang.gong at linux.intel.com
Sun Oct 26 23:33:06 PDT 2014
When loop for the gpgpu list, it's better to check each gpgpu
structure once with non-blocking mode. Please see below diff.
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 7f9b8ac..c4a1c9c 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -168,14 +168,17 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
struct intel_gpgpu_node *p, *node;
PPTHREAD_MUTEX_LOCK(drv);
- while(drv->gpgpu_list) {
+ p = drv->gpgpu_list;
+ while(p) {
p = drv->gpgpu_list;
if(p->gpgpu->batch && p->gpgpu->batch->buffer &&
!drm_intel_bo_busy(p->gpgpu->batch->buffer)) {
drv->gpgpu_list = p->next;
intel_gpgpu_delete_finished(p->gpgpu);
cl_free(p);
- }
+ p = drv->gpgpu_list->next;
+ } else
+ p = p->next;
}
if (gpgpu == NULL)
return;
On Mon, Oct 27, 2014 at 02:01:26PM +0800, Yang Rong wrote:
> The intel gpgpu struct is destroyed when a new gpgpu struct needed. But in that time,
> the command batch relative with the destroyed gpgpu may have not finish, and the resource
> in gpgpu still used by gpgpu, can't be destroyed.
> So, when delete a gpgpu, check the batch status, if have not complete, insert to list in intel driver,
> and delete all finished gpgpu in that list.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> src/intel/intel_batchbuffer.c | 5 +-
> src/intel/intel_driver.c | 27 +---------
> src/intel/intel_driver.h | 28 +++++++++++
> src/intel/intel_gpgpu.c | 112 +++++++++++++++++++++---------------------
> src/intel/intel_gpgpu.h | 64 ++++++++++++++++++++++++
> 5 files changed, 154 insertions(+), 82 deletions(-)
>
> diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
> index ba6b05f..b054fc7 100644
> --- a/src/intel/intel_batchbuffer.c
> +++ b/src/intel/intel_batchbuffer.c
> @@ -136,8 +136,9 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
> if (!is_locked)
> intel_driver_unlock_hardware(batch->intel);
>
> - // Release the buffer
> - intel_batchbuffer_terminate(batch);
> + // Can't release buffer here. gpgpu only can be delete only when the batch buffer is complete.
> + // Remain the buffer for gpgpu delete check.
> + //intel_batchbuffer_terminate(batch);
> }
>
> LOCAL void
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index 390e965..bb97220 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -79,31 +79,6 @@
> #include "cl_device_id.h"
> #include "cl_platform_id.h"
>
> -#define SET_BLOCKED_SIGSET(DRIVER) do { \
> - sigset_t bl_mask; \
> - sigfillset(&bl_mask); \
> - sigdelset(&bl_mask, SIGFPE); \
> - sigdelset(&bl_mask, SIGILL); \
> - sigdelset(&bl_mask, SIGSEGV); \
> - sigdelset(&bl_mask, SIGBUS); \
> - sigdelset(&bl_mask, SIGKILL); \
> - pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
> -} while (0)
> -
> -#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
> - pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
> -} while (0)
> -
> -#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
> - SET_BLOCKED_SIGSET(DRIVER); \
> - pthread_mutex_lock(&(DRIVER)->ctxmutex); \
> -} while (0)
> -
> -#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
> - pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
> - RESTORE_BLOCKED_SIGSET(DRIVER); \
> -} while (0)
> -
> static void
> intel_driver_delete(intel_driver_t *driver)
> {
> @@ -423,11 +398,13 @@ intel_get_device_id(void)
> return intel_device_id;
> }
>
> +extern void intel_gpgpu_delete_all(intel_driver_t *driver);
> static void
> cl_intel_driver_delete(intel_driver_t *driver)
> {
> if (driver == NULL)
> return;
> + intel_gpgpu_delete_all(driver);
> intel_driver_context_destroy(driver);
> intel_driver_close(driver);
> intel_driver_terminate(driver);
> diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h
> index 107fdfc..8b82738 100644
> --- a/src/intel/intel_driver.h
> +++ b/src/intel/intel_driver.h
> @@ -54,6 +54,7 @@
> #include <drm.h>
> #include <i915_drm.h>
> #include <intel_bufmgr.h>
> +#include <intel/intel_gpgpu.h>
>
> #define CMD_MI (0x0 << 29)
> #define CMD_2D (0x2 << 29)
> @@ -73,6 +74,7 @@
> #define BR13_8888 (0x3 << 24)
>
> struct dri_state;
> +struct intel_gpgpu_node;
> typedef struct _XDisplay Display;
>
> typedef struct intel_driver
> @@ -88,8 +90,34 @@ typedef struct intel_driver
> int need_close;
> Display *x11_display;
> struct dri_state *dri_ctx;
> + struct intel_gpgpu_node *gpgpu_list;
> } intel_driver_t;
>
> +#define SET_BLOCKED_SIGSET(DRIVER) do { \
> + sigset_t bl_mask; \
> + sigfillset(&bl_mask); \
> + sigdelset(&bl_mask, SIGFPE); \
> + sigdelset(&bl_mask, SIGILL); \
> + sigdelset(&bl_mask, SIGSEGV); \
> + sigdelset(&bl_mask, SIGBUS); \
> + sigdelset(&bl_mask, SIGKILL); \
> + pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \
> +} while (0)
> +
> +#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
> + pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
> +} while (0)
> +
> +#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
> + SET_BLOCKED_SIGSET(DRIVER); \
> + pthread_mutex_lock(&(DRIVER)->ctxmutex); \
> +} while (0)
> +
> +#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
> + pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
> + RESTORE_BLOCKED_SIGSET(DRIVER); \
> +} while (0)
> +
> /* device control */
> extern void intel_driver_lock_hardware(intel_driver_t*);
> extern void intel_driver_unlock_hardware(intel_driver_t*);
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 325d982..7f9b8ac 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -33,8 +33,6 @@
> #include "intel/intel_gpgpu.h"
> #include "intel/intel_defines.h"
> #include "intel/intel_structs.h"
> -#include "intel/intel_batchbuffer.h"
> -#include "intel/intel_driver.h"
> #include "program.h" // for BTI_RESERVED_NUM
>
> #include "cl_alloc.h"
> @@ -69,58 +67,6 @@ typedef struct intel_event {
>
> #define MAX_IF_DESC 32
>
> -/* We can bind only a limited number of buffers */
> -enum { max_buf_n = 128 };
> -
> -enum { max_img_n = 128};
> -
> -enum {max_sampler_n = 16 };
> -
> -/* Handle GPGPU state */
> -struct intel_gpgpu
> -{
> - void* ker_opaque;
> - size_t global_wk_sz[3];
> - void* printf_info;
> - intel_driver_t *drv;
> - intel_batchbuffer_t *batch;
> - cl_gpgpu_kernel *ker;
> - drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */
> - uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
> - uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer */
> - uint32_t binded_n; /* number of buffers binded */
> -
> - unsigned long img_bitmap; /* image usage bitmap. */
> - unsigned int img_index_base; /* base index for image surface.*/
> -
> - unsigned long sampler_bitmap; /* sampler usage bitmap. */
> -
> - struct { drm_intel_bo *bo; } stack_b;
> - struct { drm_intel_bo *bo; } perf_b;
> - struct { drm_intel_bo *bo; } scratch_b;
> - struct { drm_intel_bo *bo; } constant_b;
> - struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
> - struct { drm_intel_bo *bo;
> - drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/
> -
> - struct { drm_intel_bo *bo; } aux_buf;
> - struct {
> - uint32_t surface_heap_offset;
> - uint32_t curbe_offset;
> - uint32_t idrt_offset;
> - uint32_t sampler_state_offset;
> - uint32_t sampler_border_color_state_offset;
> - } aux_offset;
> -
> - uint32_t per_thread_scratch;
> - struct {
> - uint32_t num_cs_entries;
> - uint32_t size_cs_entry; /* size of one entry in 512bit elements */
> - } curb;
> -
> - uint32_t max_threads; /* max threads requested by the user */
> -};
> -
> typedef struct intel_gpgpu intel_gpgpu_t;
>
> typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm);
> @@ -172,7 +118,7 @@ static void intel_gpgpu_unref_batch_buf(void *buf)
> }
>
> static void
> -intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> +intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
> {
> if (gpgpu == NULL)
> return;
> @@ -198,6 +144,62 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> cl_free(gpgpu);
> }
>
> +/* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */
> +void intel_gpgpu_delete_all(intel_driver_t *drv)
> +{
> + struct intel_gpgpu_node *p;
> + if(drv->gpgpu_list == NULL)
> + return;
> +
> + PPTHREAD_MUTEX_LOCK(drv);
> + while(drv->gpgpu_list) {
> + p = drv->gpgpu_list;
> + drv->gpgpu_list = p->next;
> + intel_gpgpu_delete_finished(p->gpgpu);
> + cl_free(p);
> + }
> + PPTHREAD_MUTEX_UNLOCK(drv);
> +}
> +
> +static void
> +intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> +{
> + intel_driver_t *drv = gpgpu->drv;
> + struct intel_gpgpu_node *p, *node;
> +
> + PPTHREAD_MUTEX_LOCK(drv);
> + while(drv->gpgpu_list) {
> + p = drv->gpgpu_list;
> + if(p->gpgpu->batch && p->gpgpu->batch->buffer &&
> + !drm_intel_bo_busy(p->gpgpu->batch->buffer)) {
> + drv->gpgpu_list = p->next;
> + intel_gpgpu_delete_finished(p->gpgpu);
> + cl_free(p);
> + }
> + }
> + if (gpgpu == NULL)
> + return;
> +
> + if(gpgpu->batch && gpgpu->batch->buffer &&
> + !drm_intel_bo_busy(gpgpu->batch->buffer)) {
> + TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
> + node->gpgpu = gpgpu;
> + node->next = NULL;
> + p = drv->gpgpu_list;
> + if(p == NULL)
> + drv->gpgpu_list= node;
> + else {
> + while(p->next)
> + p = p->next;
> + p->next = node;
> + }
> + } else
> + intel_gpgpu_delete_finished(gpgpu);
> +
> +error:
> + PPTHREAD_MUTEX_UNLOCK(drv);
> +}
> +
> static intel_gpgpu_t*
> intel_gpgpu_new(intel_driver_t *drv)
> {
> diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
> index d593ac7..7c89ac7 100644
> --- a/src/intel/intel_gpgpu.h
> +++ b/src/intel/intel_gpgpu.h
> @@ -23,10 +23,74 @@
>
> #include "cl_utils.h"
> #include "cl_driver.h"
> +#include "intel/intel_batchbuffer.h"
> +#include "intel/intel_driver.h"
>
> #include <stdlib.h>
> #include <stdint.h>
>
> +
> +/* We can bind only a limited number of buffers */
> +enum { max_buf_n = 128 };
> +
> +enum { max_img_n = 128};
> +
> +enum {max_sampler_n = 16 };
> +
> +struct intel_driver;
> +struct intel_batchbuffer;
> +
> +/* Handle GPGPU state */
> +struct intel_gpgpu
> +{
> + void* ker_opaque;
> + size_t global_wk_sz[3];
> + void* printf_info;
> + struct intel_driver *drv;
> + struct intel_batchbuffer *batch;
> + cl_gpgpu_kernel *ker;
> + drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */
> + uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
> + uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer */
> + uint32_t binded_n; /* number of buffers binded */
> +
> + unsigned long img_bitmap; /* image usage bitmap. */
> + unsigned int img_index_base; /* base index for image surface.*/
> +
> + unsigned long sampler_bitmap; /* sampler usage bitmap. */
> +
> + struct { drm_intel_bo *bo; } stack_b;
> + struct { drm_intel_bo *bo; } perf_b;
> + struct { drm_intel_bo *bo; } scratch_b;
> + struct { drm_intel_bo *bo; } constant_b;
> + struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
> + struct { drm_intel_bo *bo;
> + drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/
> +
> + struct { drm_intel_bo *bo; } aux_buf;
> + struct {
> + uint32_t surface_heap_offset;
> + uint32_t curbe_offset;
> + uint32_t idrt_offset;
> + uint32_t sampler_state_offset;
> + uint32_t sampler_border_color_state_offset;
> + } aux_offset;
> +
> + uint32_t per_thread_scratch;
> + struct {
> + uint32_t num_cs_entries;
> + uint32_t size_cs_entry; /* size of one entry in 512bit elements */
> + } curb;
> +
> + uint32_t max_threads; /* max threads requested by the user */
> +};
> +
> +struct intel_gpgpu_node {
> + struct intel_gpgpu *gpgpu;
> + struct intel_gpgpu_node *next;
> +};
> +
> +
> /* Set the gpgpu related call backs */
> extern void intel_set_gpgpu_callbacks(int device_id);
>
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list