[PATCH 8/8] amdgpu: use sync file for shared semaphores (v2)
Christian König
deathsimple at vodafone.de
Tue Apr 4 11:05:25 UTC 2017
Am 04.04.2017 um 10:10 schrieb Daniel Vetter:
> On Tue, Apr 04, 2017 at 09:40:57AM +0200, Christian König wrote:
>> Am 04.04.2017 um 06:27 schrieb Dave Airlie:
>>> From: Dave Airlie <airlied at redhat.com>
>>>
>>> This creates a new interface for amdgpu with ioctls to
>>> create/destroy/import and export shared semaphores using
>>> sem object backed by the sync_file code. The semaphores
>>> are not installed as fd (except for export), but rather
>>> like other driver internal objects in an idr. The idr
>>> holds the initial reference on the sync file.
>>>
>>> The command submission interface is enhanced with two new
>>> chunks, one for semaphore waiting, one for semaphore signalling
>>> and just takes a list of handles for each.
>>>
>>> This is based on work originally done by David Zhou at AMD,
>>> with input from Christian Konig on what things should look like.
>>>
>>> NOTE: this interface addition needs a version bump to expose
>>> it to userspace.
>>>
>>> v1.1: keep file reference on import.
>>> v2: move to using syncobjs
>>>
>>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>> Looks good to me in general, but one note below.
>>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 ++++++++++++++++++++++++++++++++-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +-
>>> include/uapi/drm/amdgpu_drm.h | 6 +++
>>> 3 files changed, 92 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 4671432..4dd210b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -27,6 +27,7 @@
>>> #include <linux/pagemap.h>
>>> #include <drm/drmP.h>
>>> #include <drm/amdgpu_drm.h>
>>> +#include <drm/drm_syncobj.h>
>>> #include "amdgpu.h"
>>> #include "amdgpu_trace.h"
>>> @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>>> break;
>>> case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>> + case AMDGPU_CHUNK_ID_SEM_WAIT:
>>> + case AMDGPU_CHUNK_ID_SEM_SIGNAL:
>>> break;
>>> default:
>>> @@ -1009,6 +1012,44 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
>>> return 0;
>>> }
>>> +static int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
>>> + struct drm_file *file_private,
>>> + struct amdgpu_sync *sync,
>>> + uint32_t handle)
>>> +{
>>> + int r;
>>> + struct dma_fence *old_fence;
>>> + r = drm_syncobj_swap_fences(file_private, handle, NULL, &old_fence);
>> What happens when we the CS fails later on? Would the semaphore then not
>> contain the fence any more?
> Atm rules are that you're not allowed to publish a dma_fence before you're
> committed to executing whatever it represents (i.e. guaranteed to get
> signalled).
Yeah, I know. But this isn't the signaling case, but rather the waiting
case.
If I'm not completely mistaken what Dave does here is retrieving the
fence from a semaphore object we need to wait for and setting it to NULL.
If the command submission fails with -ERESTARTSYS after this is done the
retried IOCTL will behave incorrectly.
Regards,
Christian.
>
> Publish depends upon context, but can include: Install an fd for a type
> fence sync_file, swap the sync_file in a sema type, assign it to the
> reservation_object for implicit sync.
>
> I guess we need to have drm_syncobj_lookup exported (or whatever it was),
> keep a temporary pointer, and then swap it only at the end (with
> sync_file_swap_fence or whatever it was).
>
> -Daniel
>
>> Christian.
>>
>>> + if (r)
>>> + return r;
>>> +
>>> + r = amdgpu_sync_fence(adev, sync, old_fence);
>>> + dma_fence_put(old_fence);
>>> +
>>> + return r;
>>> +}
>>> +
>>> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
>>> + struct amdgpu_cs_parser *p,
>>> + struct amdgpu_cs_chunk *chunk)
>>> +{
>>> + unsigned num_deps;
>>> + int i, r;
>>> + struct drm_amdgpu_cs_chunk_sem *deps;
>>> +
>>> + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>>> + num_deps = chunk->length_dw * 4 /
>>> + sizeof(struct drm_amdgpu_cs_chunk_sem);
>>> +
>>> + for (i = 0; i < num_deps; ++i) {
>>> + r = amdgpu_sem_lookup_and_sync(adev, p->filp, &p->job->sync,
>>> + deps[i].handle);
>>> + if (r)
>>> + return r;
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>> struct amdgpu_cs_parser *p)
>>> {
>>> @@ -1023,12 +1064,55 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>> r = amdgpu_process_fence_dep(adev, p, chunk);
>>> if (r)
>>> return r;
>>> + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
>>> + r = amdgpu_process_sem_wait_dep(adev, p, chunk);
>>> + if (r)
>>> + return r;
>>> }
>>> }
>>> return 0;
>>> }
>>> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
>>> + struct amdgpu_cs_chunk *chunk,
>>> + struct dma_fence *fence)
>>> +{
>>> + unsigned num_deps;
>>> + int i, r;
>>> + struct drm_amdgpu_cs_chunk_sem *deps;
>>> +
>>> + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>>> + num_deps = chunk->length_dw * 4 /
>>> + sizeof(struct drm_amdgpu_cs_chunk_sem);
>>> +
>>> + for (i = 0; i < num_deps; ++i) {
>>> + r = drm_syncobj_replace_fence(p->filp, deps[i].handle,
>>> + fence);
>>> + if (r)
>>> + return r;
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>>> +{
>>> + int i, r;
>>> +
>>> + for (i = 0; i < p->nchunks; ++i) {
>>> + struct amdgpu_cs_chunk *chunk;
>>> +
>>> + chunk = &p->chunks[i];
>>> +
>>> + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
>>> + r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
>>> + if (r)
>>> + return r;
>>> + }
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>> union drm_amdgpu_cs *cs)
>>> {
>>> @@ -1056,7 +1140,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>> trace_amdgpu_cs_ioctl(job);
>>> amd_sched_entity_push_job(&job->base);
>>> - return 0;
>>> + return amdgpu_cs_post_dependencies(p);
>>> }
>>> int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index b76cd69..e95951e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -683,7 +683,7 @@ static struct drm_driver kms_driver = {
>>> .driver_features =
>>> DRIVER_USE_AGP |
>>> DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
>>> - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
>>> + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
>>> .load = amdgpu_driver_load_kms,
>>> .open = amdgpu_driver_open_kms,
>>> .preclose = amdgpu_driver_preclose_kms,
>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>> index 5797283..647c520 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -390,6 +390,8 @@ struct drm_amdgpu_gem_va {
>>> #define AMDGPU_CHUNK_ID_IB 0x01
>>> #define AMDGPU_CHUNK_ID_FENCE 0x02
>>> #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
>>> +#define AMDGPU_CHUNK_ID_SEM_WAIT 0x04
>>> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL 0x05
>>> struct drm_amdgpu_cs_chunk {
>>> __u32 chunk_id;
>>> @@ -454,6 +456,10 @@ struct drm_amdgpu_cs_chunk_fence {
>>> __u32 offset;
>>> };
>>> +struct drm_amdgpu_cs_chunk_sem {
>>> + __u32 handle;
>>> +};
>>> +
>>> struct drm_amdgpu_cs_chunk_data {
>>> union {
>>> struct drm_amdgpu_cs_chunk_ib ib_data;
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
More information about the dri-devel
mailing list