[PATCH 8/8] amdgpu: use sync file for shared semaphores (v2)

Tue Apr 4 08:10:36 UTC 2017

On Tue, Apr 04, 2017 at 09:40:57AM +0200, Christian König wrote:
> Am 04.04.2017 um 06:27 schrieb Dave Airlie:
> > From: Dave Airlie <airlied at redhat.com>
> > 
> > This creates a new interface for amdgpu with ioctls to
> > create/destroy/import and export shared semaphores using
> > sem object backed by the sync_file code. The semaphores
> > are not installed as fd (except for export), but rather
> > like other driver internal objects in an idr. The idr
> > holds the initial reference on the sync file.
> > 
> > The command submission interface is enhanced with two new
> > chunks, one for semaphore waiting, one for semaphore signalling
> > and just takes a list of handles for each.
> > 
> > This is based on work originally done by David Zhou at AMD,
> > with input from Christian Konig on what things should look like.
> > 
> > NOTE: this interface addition needs a version bump to expose
> > it to userspace.
> > 
> > v1.1: keep file reference on import.
> > v2: move to using syncobjs
> > 
> > Signed-off-by: Dave Airlie <airlied at redhat.com>
> 
> Looks good to me in general, but one note below.
> 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 86 ++++++++++++++++++++++++++++++++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
> >   include/uapi/drm/amdgpu_drm.h           |  6 +++
> >   3 files changed, 92 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 4671432..4dd210b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -27,6 +27,7 @@
> >   #include <linux/pagemap.h>
> >   #include <drm/drmP.h>
> >   #include <drm/amdgpu_drm.h>
> > +#include <drm/drm_syncobj.h>
> >   #include "amdgpu.h"
> >   #include "amdgpu_trace.h"
> > @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> >   			break;
> >   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
> > +		case AMDGPU_CHUNK_ID_SEM_WAIT:
> > +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
> >   			break;
> >   		default:
> > @@ -1009,6 +1012,44 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
> >   	return 0;
> >   }
> > +static int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> > +				      struct drm_file *file_private,
> > +				      struct amdgpu_sync *sync,
> > +				      uint32_t handle)
> > +{
> > +	int r;
> > +	struct dma_fence *old_fence;
> > +	r = drm_syncobj_swap_fences(file_private, handle, NULL, &old_fence);
> 
> What happens when we the CS fails later on? Would the semaphore then not
> contain the fence any more?

Atm rules are that you're not allowed to publish a dma_fence before you're
committed to executing whatever it represents (i.e. guaranteed to get
signalled).

Publish depends upon context, but can include: Install an fd for a type
fence sync_file, swap the sync_file in a sema type, assign it to the
reservation_object for implicit sync.

I guess we need to have drm_syncobj_lookup exported (or whatever it was),
keep a temporary pointer, and then swap it only at the end (with
sync_file_swap_fence or whatever it was).

-Daniel

> 
> Christian.
> 
> > +	if (r)
> > +		return r;
> > +
> > +	r = amdgpu_sync_fence(adev, sync, old_fence);
> > +	dma_fence_put(old_fence);
> > +
> > +	return r;
> > +}
> > +
> > +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
> > +				       struct amdgpu_cs_parser *p,
> > +				       struct amdgpu_cs_chunk *chunk)
> > +{
> > +	unsigned num_deps;
> > +	int i, r;
> > +	struct drm_amdgpu_cs_chunk_sem *deps;
> > +
> > +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> > +	num_deps = chunk->length_dw * 4 /
> > +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> > +
> > +	for (i = 0; i < num_deps; ++i) {
> > +		r = amdgpu_sem_lookup_and_sync(adev, p->filp, &p->job->sync,
> > +					       deps[i].handle);
> > +		if (r)
> > +			return r;
> > +	}
> > +	return 0;
> > +}
> > +
> >   static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
> >   				  struct amdgpu_cs_parser *p)
> >   {
> > @@ -1023,12 +1064,55 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
> >   			r = amdgpu_process_fence_dep(adev, p, chunk);
> >   			if (r)
> >   				return r;
> > +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
> > +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
> > +			if (r)
> > +				return r;
> >   		}
> >   	}
> >   	return 0;
> >   }
> > +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
> > +					 struct amdgpu_cs_chunk *chunk,
> > +					 struct dma_fence *fence)
> > +{
> > +	unsigned num_deps;
> > +	int i, r;
> > +	struct drm_amdgpu_cs_chunk_sem *deps;
> > +
> > +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> > +	num_deps = chunk->length_dw * 4 /
> > +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> > +
> > +	for (i = 0; i < num_deps; ++i) {
> > +		r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
> > +					      fence);
> > +		if (r)
> > +			return r;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> > +{
> > +	int i, r;
> > +
> > +	for (i = 0; i < p->nchunks; ++i) {
> > +		struct amdgpu_cs_chunk *chunk;
> > +
> > +		chunk = &p->chunks[i];
> > +
> > +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
> > +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
> > +			if (r)
> > +				return r;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> >   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> >   			    union drm_amdgpu_cs *cs)
> >   {
> > @@ -1056,7 +1140,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> >   	trace_amdgpu_cs_ioctl(job);
> >   	amd_sched_entity_push_job(&job->base);
> > -	return 0;
> > +	return amdgpu_cs_post_dependencies(p);
> >   }
> >   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index b76cd69..e95951e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -683,7 +683,7 @@ static struct drm_driver kms_driver = {
> >   	.driver_features =
> >   	    DRIVER_USE_AGP |
> >   	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
> > -	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
> > +	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
> >   	.load = amdgpu_driver_load_kms,
> >   	.open = amdgpu_driver_open_kms,
> >   	.preclose = amdgpu_driver_preclose_kms,
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 5797283..647c520 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -390,6 +390,8 @@ struct drm_amdgpu_gem_va {
> >   #define AMDGPU_CHUNK_ID_IB		0x01
> >   #define AMDGPU_CHUNK_ID_FENCE		0x02
> >   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
> > +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
> > +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
> >   struct drm_amdgpu_cs_chunk {
> >   	__u32		chunk_id;
> > @@ -454,6 +456,10 @@ struct drm_amdgpu_cs_chunk_fence {
> >   	__u32 offset;
> >   };
> > +struct drm_amdgpu_cs_chunk_sem {
> > +	__u32 handle;
> > +};
> > +
> >   struct drm_amdgpu_cs_chunk_data {
> >   	union {
> >   		struct drm_amdgpu_cs_chunk_ib		ib_data;
> 
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch