[PATCH 8/8] amdgpu: use sync file for shared semaphores (v2)

Christian König deathsimple at vodafone.de
Tue Apr 4 11:05:25 UTC 2017


Am 04.04.2017 um 10:10 schrieb Daniel Vetter:
> On Tue, Apr 04, 2017 at 09:40:57AM +0200, Christian König wrote:
>> Am 04.04.2017 um 06:27 schrieb Dave Airlie:
>>> From: Dave Airlie <airlied at redhat.com>
>>>
>>> This creates a new interface for amdgpu with ioctls to
>>> create/destroy/import and export shared semaphores using
>>> sem object backed by the sync_file code. The semaphores
>>> are not installed as fd (except for export), but rather
>>> like other driver internal objects in an idr. The idr
>>> holds the initial reference on the sync file.
>>>
>>> The command submission interface is enhanced with two new
>>> chunks, one for semaphore waiting, one for semaphore signalling
>>> and just takes a list of handles for each.
>>>
>>> This is based on work originally done by David Zhou at AMD,
>>> with input from Christian Konig on what things should look like.
>>>
>>> NOTE: this interface addition needs a version bump to expose
>>> it to userspace.
>>>
>>> v1.1: keep file reference on import.
>>> v2: move to using syncobjs
>>>
>>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>> Looks good to me in general, but one note below.
>>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 86 ++++++++++++++++++++++++++++++++-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
>>>    include/uapi/drm/amdgpu_drm.h           |  6 +++
>>>    3 files changed, 92 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 4671432..4dd210b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -27,6 +27,7 @@
>>>    #include <linux/pagemap.h>
>>>    #include <drm/drmP.h>
>>>    #include <drm/amdgpu_drm.h>
>>> +#include <drm/drm_syncobj.h>
>>>    #include "amdgpu.h"
>>>    #include "amdgpu_trace.h"
>>> @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>>>    			break;
>>>    		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>> +		case AMDGPU_CHUNK_ID_SEM_WAIT:
>>> +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
>>>    			break;
>>>    		default:
>>> @@ -1009,6 +1012,44 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
>>>    	return 0;
>>>    }
>>> +static int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
>>> +				      struct drm_file *file_private,
>>> +				      struct amdgpu_sync *sync,
>>> +				      uint32_t handle)
>>> +{
>>> +	int r;
>>> +	struct dma_fence *old_fence;
>>> +	r = drm_syncobj_swap_fences(file_private, handle, NULL, &old_fence);
>> What happens when we the CS fails later on? Would the semaphore then not
>> contain the fence any more?
> Atm rules are that you're not allowed to publish a dma_fence before you're
> committed to executing whatever it represents (i.e. guaranteed to get
> signalled).

Yeah, I know. But this isn't the signaling case, but rather the waiting 
case.

If I'm not completely mistaken what Dave does here is retrieving the 
fence from a semaphore object we need to wait for and setting it to NULL.

If the command submission fails with -ERESTARTSYS after this is done the 
retried IOCTL will behave incorrectly.

Regards,
Christian.

>
> Publish depends upon context, but can include: Install an fd for a type
> fence sync_file, swap the sync_file in a sema type, assign it to the
> reservation_object for implicit sync.
>
> I guess we need to have drm_syncobj_lookup exported (or whatever it was),
> keep a temporary pointer, and then swap it only at the end (with
> sync_file_swap_fence or whatever it was).
>
> -Daniel
>
>> Christian.
>>
>>> +	if (r)
>>> +		return r;
>>> +
>>> +	r = amdgpu_sync_fence(adev, sync, old_fence);
>>> +	dma_fence_put(old_fence);
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
>>> +				       struct amdgpu_cs_parser *p,
>>> +				       struct amdgpu_cs_chunk *chunk)
>>> +{
>>> +	unsigned num_deps;
>>> +	int i, r;
>>> +	struct drm_amdgpu_cs_chunk_sem *deps;
>>> +
>>> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>>> +	num_deps = chunk->length_dw * 4 /
>>> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
>>> +
>>> +	for (i = 0; i < num_deps; ++i) {
>>> +		r = amdgpu_sem_lookup_and_sync(adev, p->filp, &p->job->sync,
>>> +					       deps[i].handle);
>>> +		if (r)
>>> +			return r;
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>>    static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>>    				  struct amdgpu_cs_parser *p)
>>>    {
>>> @@ -1023,12 +1064,55 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>>    			r = amdgpu_process_fence_dep(adev, p, chunk);
>>>    			if (r)
>>>    				return r;
>>> +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
>>> +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
>>> +			if (r)
>>> +				return r;
>>>    		}
>>>    	}
>>>    	return 0;
>>>    }
>>> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
>>> +					 struct amdgpu_cs_chunk *chunk,
>>> +					 struct dma_fence *fence)
>>> +{
>>> +	unsigned num_deps;
>>> +	int i, r;
>>> +	struct drm_amdgpu_cs_chunk_sem *deps;
>>> +
>>> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>>> +	num_deps = chunk->length_dw * 4 /
>>> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
>>> +
>>> +	for (i = 0; i < num_deps; ++i) {
>>> +		r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
>>> +					      fence);
>>> +		if (r)
>>> +			return r;
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>>> +{
>>> +	int i, r;
>>> +
>>> +	for (i = 0; i < p->nchunks; ++i) {
>>> +		struct amdgpu_cs_chunk *chunk;
>>> +
>>> +		chunk = &p->chunks[i];
>>> +
>>> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
>>> +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
>>> +			if (r)
>>> +				return r;
>>> +		}
>>> +	}
>>> +	return 0;
>>> +}
>>> +
>>>    static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>    			    union drm_amdgpu_cs *cs)
>>>    {
>>> @@ -1056,7 +1140,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>    	trace_amdgpu_cs_ioctl(job);
>>>    	amd_sched_entity_push_job(&job->base);
>>> -	return 0;
>>> +	return amdgpu_cs_post_dependencies(p);
>>>    }
>>>    int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index b76cd69..e95951e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -683,7 +683,7 @@ static struct drm_driver kms_driver = {
>>>    	.driver_features =
>>>    	    DRIVER_USE_AGP |
>>>    	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
>>> -	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
>>> +	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
>>>    	.load = amdgpu_driver_load_kms,
>>>    	.open = amdgpu_driver_open_kms,
>>>    	.preclose = amdgpu_driver_preclose_kms,
>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>> index 5797283..647c520 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -390,6 +390,8 @@ struct drm_amdgpu_gem_va {
>>>    #define AMDGPU_CHUNK_ID_IB		0x01
>>>    #define AMDGPU_CHUNK_ID_FENCE		0x02
>>>    #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>>> +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
>>> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
>>>    struct drm_amdgpu_cs_chunk {
>>>    	__u32		chunk_id;
>>> @@ -454,6 +456,10 @@ struct drm_amdgpu_cs_chunk_fence {
>>>    	__u32 offset;
>>>    };
>>> +struct drm_amdgpu_cs_chunk_sem {
>>> +	__u32 handle;
>>> +};
>>> +
>>>    struct drm_amdgpu_cs_chunk_data {
>>>    	union {
>>>    		struct drm_amdgpu_cs_chunk_ib		ib_data;
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel




More information about the dri-devel mailing list