[Beignet] [PATCH] Refine the intel gpgpu delete.
Yang, Rong R
rong.r.yang at intel.com
Mon Oct 27 01:15:13 PDT 2014
Yes, I assume that all the gpgpu in the list would complete in order, but it may not true.
I have sent a new version.
> -----Original Message-----
> From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com]
> Sent: Monday, October 27, 2014 14:33
> To: Yang, Rong R
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] Refine the intel gpgpu delete.
>
>
> When loop for the gpgpu list, it's better to check each gpgpu structure once
> with non-blocking mode. Please see below diff.
>
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index
> 7f9b8ac..c4a1c9c 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -168,14 +168,17 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> struct intel_gpgpu_node *p, *node;
>
> PPTHREAD_MUTEX_LOCK(drv);
> - while(drv->gpgpu_list) {
> + p = drv->gpgpu_list;
> + while(p) {
> p = drv->gpgpu_list;
> if(p->gpgpu->batch && p->gpgpu->batch->buffer &&
> !drm_intel_bo_busy(p->gpgpu->batch->buffer)) {
> drv->gpgpu_list = p->next;
> intel_gpgpu_delete_finished(p->gpgpu);
> cl_free(p);
> - }
> + p = drv->gpgpu_list->next;
> + } else
> + p = p->next;
> }
> if (gpgpu == NULL)
> return;
>
>
> On Mon, Oct 27, 2014 at 02:01:26PM +0800, Yang Rong wrote:
> > The intel gpgpu struct is destroyed when a new gpgpu struct needed.
> > But in that time, the command batch relative with the destroyed gpgpu
> > may have not finish, and the resource in gpgpu still used by gpgpu, can't be
> destroyed.
> > So, when delete a gpgpu, check the batch status, if have not complete,
> > insert to list in intel driver, and delete all finished gpgpu in that list.
> >
> > Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> > ---
> > src/intel/intel_batchbuffer.c | 5 +-
> > src/intel/intel_driver.c | 27 +---------
> > src/intel/intel_driver.h | 28 +++++++++++
> > src/intel/intel_gpgpu.c | 112 +++++++++++++++++++++------------------
> ---
> > src/intel/intel_gpgpu.h | 64 ++++++++++++++++++++++++
> > 5 files changed, 154 insertions(+), 82 deletions(-)
> >
> > diff --git a/src/intel/intel_batchbuffer.c
> > b/src/intel/intel_batchbuffer.c index ba6b05f..b054fc7 100644
> > --- a/src/intel/intel_batchbuffer.c
> > +++ b/src/intel/intel_batchbuffer.c
> > @@ -136,8 +136,9 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
> > if (!is_locked)
> > intel_driver_unlock_hardware(batch->intel);
> >
> > - // Release the buffer
> > - intel_batchbuffer_terminate(batch);
> > + // Can't release buffer here. gpgpu only can be delete only when the
> batch buffer is complete.
> > + // Remain the buffer for gpgpu delete check.
> > + //intel_batchbuffer_terminate(batch);
> > }
> >
> > LOCAL void
> > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index
> > 390e965..bb97220 100644
> > --- a/src/intel/intel_driver.c
> > +++ b/src/intel/intel_driver.c
> > @@ -79,31 +79,6 @@
> > #include "cl_device_id.h"
> > #include "cl_platform_id.h"
> >
> > -#define SET_BLOCKED_SIGSET(DRIVER) do { \
> > - sigset_t bl_mask; \
> > - sigfillset(&bl_mask); \
> > - sigdelset(&bl_mask, SIGFPE); \
> > - sigdelset(&bl_mask, SIGILL); \
> > - sigdelset(&bl_mask, SIGSEGV); \
> > - sigdelset(&bl_mask, SIGBUS); \
> > - sigdelset(&bl_mask, SIGKILL); \
> > - pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ -}
> > while (0)
> > -
> > -#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
> > - pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
> > -} while (0)
> > -
> > -#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
> > - SET_BLOCKED_SIGSET(DRIVER); \
> > - pthread_mutex_lock(&(DRIVER)->ctxmutex); \
> > -} while (0)
> > -
> > -#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
> > - pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
> > - RESTORE_BLOCKED_SIGSET(DRIVER); \
> > -} while (0)
> > -
> > static void
> > intel_driver_delete(intel_driver_t *driver) { @@ -423,11 +398,13 @@
> > intel_get_device_id(void)
> > return intel_device_id;
> > }
> >
> > +extern void intel_gpgpu_delete_all(intel_driver_t *driver);
> > static void
> > cl_intel_driver_delete(intel_driver_t *driver) {
> > if (driver == NULL)
> > return;
> > + intel_gpgpu_delete_all(driver);
> > intel_driver_context_destroy(driver);
> > intel_driver_close(driver);
> > intel_driver_terminate(driver);
> > diff --git a/src/intel/intel_driver.h b/src/intel/intel_driver.h index
> > 107fdfc..8b82738 100644
> > --- a/src/intel/intel_driver.h
> > +++ b/src/intel/intel_driver.h
> > @@ -54,6 +54,7 @@
> > #include <drm.h>
> > #include <i915_drm.h>
> > #include <intel_bufmgr.h>
> > +#include <intel/intel_gpgpu.h>
> >
> > #define CMD_MI (0x0 << 29)
> > #define CMD_2D (0x2 << 29)
> > @@ -73,6 +74,7 @@
> > #define BR13_8888 (0x3 << 24)
> >
> > struct dri_state;
> > +struct intel_gpgpu_node;
> > typedef struct _XDisplay Display;
> >
> > typedef struct intel_driver
> > @@ -88,8 +90,34 @@ typedef struct intel_driver
> > int need_close;
> > Display *x11_display;
> > struct dri_state *dri_ctx;
> > + struct intel_gpgpu_node *gpgpu_list;
> > } intel_driver_t;
> >
> > +#define SET_BLOCKED_SIGSET(DRIVER) do { \
> > + sigset_t bl_mask; \
> > + sigfillset(&bl_mask); \
> > + sigdelset(&bl_mask, SIGFPE); \
> > + sigdelset(&bl_mask, SIGILL); \
> > + sigdelset(&bl_mask, SIGSEGV); \
> > + sigdelset(&bl_mask, SIGBUS); \
> > + sigdelset(&bl_mask, SIGKILL); \
> > + pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ }
> > +while (0)
> > +
> > +#define RESTORE_BLOCKED_SIGSET(DRIVER) do { \
> > + pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \
> > +} while (0)
> > +
> > +#define PPTHREAD_MUTEX_LOCK(DRIVER) do { \
> > + SET_BLOCKED_SIGSET(DRIVER); \
> > + pthread_mutex_lock(&(DRIVER)->ctxmutex); \
> > +} while (0)
> > +
> > +#define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \
> > + pthread_mutex_unlock(&(DRIVER)->ctxmutex); \
> > + RESTORE_BLOCKED_SIGSET(DRIVER); \
> > +} while (0)
> > +
> > /* device control */
> > extern void intel_driver_lock_hardware(intel_driver_t*);
> > extern void intel_driver_unlock_hardware(intel_driver_t*);
> > diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index
> > 325d982..7f9b8ac 100644
> > --- a/src/intel/intel_gpgpu.c
> > +++ b/src/intel/intel_gpgpu.c
> > @@ -33,8 +33,6 @@
> > #include "intel/intel_gpgpu.h"
> > #include "intel/intel_defines.h"
> > #include "intel/intel_structs.h"
> > -#include "intel/intel_batchbuffer.h"
> > -#include "intel/intel_driver.h"
> > #include "program.h" // for BTI_RESERVED_NUM
> >
> > #include "cl_alloc.h"
> > @@ -69,58 +67,6 @@ typedef struct intel_event {
> >
> > #define MAX_IF_DESC 32
> >
> > -/* We can bind only a limited number of buffers */ -enum { max_buf_n
> > = 128 };
> > -
> > -enum { max_img_n = 128};
> > -
> > -enum {max_sampler_n = 16 };
> > -
> > -/* Handle GPGPU state */
> > -struct intel_gpgpu
> > -{
> > - void* ker_opaque;
> > - size_t global_wk_sz[3];
> > - void* printf_info;
> > - intel_driver_t *drv;
> > - intel_batchbuffer_t *batch;
> > - cl_gpgpu_kernel *ker;
> > - drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the
> > call */
> > - uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers
> binded for the call */
> > - uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe
> buffer */
> > - uint32_t binded_n; /* number of buffers binded */
> > -
> > - unsigned long img_bitmap; /* image usage bitmap. */
> > - unsigned int img_index_base; /* base index for image surface.*/
> > -
> > - unsigned long sampler_bitmap; /* sampler usage bitmap. */
> > -
> > - struct { drm_intel_bo *bo; } stack_b;
> > - struct { drm_intel_bo *bo; } perf_b;
> > - struct { drm_intel_bo *bo; } scratch_b;
> > - struct { drm_intel_bo *bo; } constant_b;
> > - struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
> > - struct { drm_intel_bo *bo;
> > - drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/
> > -
> > - struct { drm_intel_bo *bo; } aux_buf;
> > - struct {
> > - uint32_t surface_heap_offset;
> > - uint32_t curbe_offset;
> > - uint32_t idrt_offset;
> > - uint32_t sampler_state_offset;
> > - uint32_t sampler_border_color_state_offset;
> > - } aux_offset;
> > -
> > - uint32_t per_thread_scratch;
> > - struct {
> > - uint32_t num_cs_entries;
> > - uint32_t size_cs_entry; /* size of one entry in 512bit elements */
> > - } curb;
> > -
> > - uint32_t max_threads; /* max threads requested by the user */
> > -};
> > -
> > typedef struct intel_gpgpu intel_gpgpu_t;
> >
> > typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t
> > use_slm); @@ -172,7 +118,7 @@ static void
> > intel_gpgpu_unref_batch_buf(void *buf) }
> >
> > static void
> > -intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> > +intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
> > {
> > if (gpgpu == NULL)
> > return;
> > @@ -198,6 +144,62 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
> > cl_free(gpgpu);
> > }
> >
> > +/* Destroy the all intel_gpgpu, no matter finish or not, when driver
> > +destroy */ void intel_gpgpu_delete_all(intel_driver_t *drv) {
> > + struct intel_gpgpu_node *p;
> > + if(drv->gpgpu_list == NULL)
> > + return;
> > +
> > + PPTHREAD_MUTEX_LOCK(drv);
> > + while(drv->gpgpu_list) {
> > + p = drv->gpgpu_list;
> > + drv->gpgpu_list = p->next;
> > + intel_gpgpu_delete_finished(p->gpgpu);
> > + cl_free(p);
> > + }
> > + PPTHREAD_MUTEX_UNLOCK(drv);
> > +}
> > +
> > +static void
> > +intel_gpgpu_delete(intel_gpgpu_t *gpgpu) {
> > + intel_driver_t *drv = gpgpu->drv;
> > + struct intel_gpgpu_node *p, *node;
> > +
> > + PPTHREAD_MUTEX_LOCK(drv);
> > + while(drv->gpgpu_list) {
> > + p = drv->gpgpu_list;
> > + if(p->gpgpu->batch && p->gpgpu->batch->buffer &&
> > + !drm_intel_bo_busy(p->gpgpu->batch->buffer)) {
> > + drv->gpgpu_list = p->next;
> > + intel_gpgpu_delete_finished(p->gpgpu);
> > + cl_free(p);
> > + }
> > + }
> > + if (gpgpu == NULL)
> > + return;
> > +
> > + if(gpgpu->batch && gpgpu->batch->buffer &&
> > + !drm_intel_bo_busy(gpgpu->batch->buffer)) {
> > + TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
> > + node->gpgpu = gpgpu;
> > + node->next = NULL;
> > + p = drv->gpgpu_list;
> > + if(p == NULL)
> > + drv->gpgpu_list= node;
> > + else {
> > + while(p->next)
> > + p = p->next;
> > + p->next = node;
> > + }
> > + } else
> > + intel_gpgpu_delete_finished(gpgpu);
> > +
> > +error:
> > + PPTHREAD_MUTEX_UNLOCK(drv);
> > +}
> > +
> > static intel_gpgpu_t*
> > intel_gpgpu_new(intel_driver_t *drv)
> > {
> > diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h index
> > d593ac7..7c89ac7 100644
> > --- a/src/intel/intel_gpgpu.h
> > +++ b/src/intel/intel_gpgpu.h
> > @@ -23,10 +23,74 @@
> >
> > #include "cl_utils.h"
> > #include "cl_driver.h"
> > +#include "intel/intel_batchbuffer.h"
> > +#include "intel/intel_driver.h"
> >
> > #include <stdlib.h>
> > #include <stdint.h>
> >
> > +
> > +/* We can bind only a limited number of buffers */ enum { max_buf_n =
> > +128 };
> > +
> > +enum { max_img_n = 128};
> > +
> > +enum {max_sampler_n = 16 };
> > +
> > +struct intel_driver;
> > +struct intel_batchbuffer;
> > +
> > +/* Handle GPGPU state */
> > +struct intel_gpgpu
> > +{
> > + void* ker_opaque;
> > + size_t global_wk_sz[3];
> > + void* printf_info;
> > + struct intel_driver *drv;
> > + struct intel_batchbuffer *batch;
> > + cl_gpgpu_kernel *ker;
> > + drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the
> > +call */
> > + uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers
> binded for the call */
> > + uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe
> buffer */
> > + uint32_t binded_n; /* number of buffers binded */
> > +
> > + unsigned long img_bitmap; /* image usage bitmap. */
> > + unsigned int img_index_base; /* base index for image surface.*/
> > +
> > + unsigned long sampler_bitmap; /* sampler usage bitmap. */
> > +
> > + struct { drm_intel_bo *bo; } stack_b; struct { drm_intel_bo *bo; }
> > + perf_b; struct { drm_intel_bo *bo; } scratch_b; struct {
> > + drm_intel_bo *bo; } constant_b; struct { drm_intel_bo *bo; }
> > + time_stamp_b; /* time stamp buffer */ struct { drm_intel_bo *bo;
> > + drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/
> > +
> > + struct { drm_intel_bo *bo; } aux_buf; struct {
> > + uint32_t surface_heap_offset;
> > + uint32_t curbe_offset;
> > + uint32_t idrt_offset;
> > + uint32_t sampler_state_offset;
> > + uint32_t sampler_border_color_state_offset;
> > + } aux_offset;
> > +
> > + uint32_t per_thread_scratch;
> > + struct {
> > + uint32_t num_cs_entries;
> > + uint32_t size_cs_entry; /* size of one entry in 512bit elements
> > + */ } curb;
> > +
> > + uint32_t max_threads; /* max threads requested by the user */
> > +};
> > +
> > +struct intel_gpgpu_node {
> > + struct intel_gpgpu *gpgpu;
> > + struct intel_gpgpu_node *next;
> > +};
> > +
> > +
> > /* Set the gpgpu related call backs */ extern void
> > intel_set_gpgpu_callbacks(int device_id);
> >
> > --
> > 1.8.3.2
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list