[PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

Wed Aug 9 18:25:26 UTC 2017

Am 09.08.2017 um 19:57 schrieb Chris Wilson:
> Quoting Jason Ekstrand (2017-08-09 18:00:54)
>> Vulkan VkFence semantics require that the application be able to perform
>> a CPU wait on work which may not yet have been submitted.  This is
>> perfectly safe because the CPU wait has a timeout which will get
>> triggered eventually if no work is ever submitted.  This behavior is
>> advantageous for multi-threaded workloads because, so long as all of the
>> threads agree on what fences to use up-front, you don't have the extra
>> cross-thread synchronization cost of thread A telling thread B that it
>> has submitted its dependent work and thread B is now free to wait.
>>
>> Within a single process, this can be implemented in the userspace driver
>> by doing exactly the same kind of tracking the app would have to do
>> using posix condition variables or similar.  However, in order for this
>> to work cross-process (as is required by VK_KHR_external_fence), we need
>> to handle this in the kernel.
>>
>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
>> instructs the IOCTL to wait for the syncobj to have a non-null fence and
>> then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
>> easily get the Vulkan behavior.
>>
>> v2:
>>   - Fix a bug in the invalid syncobj error path
>>   - Unify the wait-all and wait-any cases
>>
>> Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
>> Cc: Dave Airlie <airlied at redhat.com>
>> ---
>>
>> I realized today (this is what happens when you sleep) that it takes almost
>> no work to make the wait-any path also handle wait-all.  By unifying the
>> two, I deleted over a hundred lines of code from the implementation.
>>
>>   drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++--------
>>   include/uapi/drm/drm.h        |   1 +
>>   2 files changed, 199 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 510dfc2..5e7f654 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -51,6 +51,7 @@
>>   #include <linux/fs.h>
>>   #include <linux/anon_inodes.h>
>>   #include <linux/sync_file.h>
>> +#include <linux/sched/signal.h>
>>   
>>   #include "drm_internal.h"
>>   #include <drm/drm_syncobj.h>
>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
> This is a bit of the puzzle that is missing.
>
>>          list_add_tail(&cb->node, &syncobj->cb_list);
>>   }
>>   
>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>> +                                                struct dma_fence **fence,
>> +                                                struct drm_syncobj_cb *cb,
>> +                                                drm_syncobj_func_t func)
>> +{
>> +       int ret;
>> +
>> +       spin_lock(&syncobj->lock);
>> +       if (syncobj->fence) {
>> +               *fence = dma_fence_get(syncobj->fence);
>> +               ret = 1;
>> +       } else {
>> +               *fence = NULL;
>> +               drm_syncobj_add_callback_locked(syncobj, cb, func);
>> +               ret = 0;
>> +       }
>> +       spin_unlock(&syncobj->lock);
>> +
>> +       return ret;
>> +}
>> +
>>   /**
>>    * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>>    * @syncobj: Sync object to which to add the callback
>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>                                          &args->handle);
>>   }
>>   
>> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
>> +                               uint32_t wait_flags)
>> +{
>> +       struct dma_fence *fence;
>> +       int ret;
>> +
>> +       fence = drm_syncobj_fence_get(syncobj);
>> +       if (!fence) {
>> +               if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>> +                       return 0;
>> +               else
>> +                       return -EINVAL;
>> +       }
>> +
>> +       ret = dma_fence_is_signaled(fence) ? 1 : 0;
>> +
>> +       dma_fence_put(fence);
>> +
>> +       return ret;
>> +}
>> +
>> +struct syncobj_wait_entry {
>> +       struct task_struct *task;
>> +       struct dma_fence *fence;
>> +       struct dma_fence_cb fence_cb;
>> +       struct drm_syncobj_cb syncobj_cb;
>> +};
>> +
>> +static void syncobj_wait_fence_func(struct dma_fence *fence,
>> +                                   struct dma_fence_cb *cb)
>> +{
>> +       struct syncobj_wait_entry *wait =
>> +               container_of(cb, struct syncobj_wait_entry, fence_cb);
>> +
>> +       wake_up_process(wait->task);
>> +}
>> +
>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>> +                                     struct drm_syncobj_cb *cb)
>> +{
>> +       struct syncobj_wait_entry *wait =
>> +               container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>> +
>> +       /* This happens inside the syncobj lock */
>> +       wait->fence = dma_fence_get(syncobj->fence);
>> +       wake_up_process(wait->task);
>> +}
>> +
>> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> +                                                 uint32_t count,
>> +                                                 uint32_t flags,
>> +                                                 signed long timeout,
>> +                                                 uint32_t *idx)
>> +{
>> +       struct syncobj_wait_entry *entries;
>> +       struct dma_fence *fence;
>> +       signed long ret;
>> +       uint32_t signaled_count, i;
>> +
>> +       if (timeout == 0) {
>> +               signaled_count = 0;
>> +               for (i = 0; i < count; ++i) {
>> +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
>> +                       if (ret < 0)
>> +                               return ret;
>> +                       if (ret == 0)
>> +                               continue;
>> +                       if (signaled_count == 0 && idx)
>> +                               *idx = i;
>> +                       signaled_count++;
>> +               }
>> +
>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>> +                       return signaled_count == count ? 1 : 0;
>> +               else
>> +                       return signaled_count > 0 ? 1 : 0;
>> +       }
>> +
>> +       entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
>> +       if (!entries)
>> +               return -ENOMEM;
>> +
>> +       for (i = 0; i < count; ++i) {
>> +               entries[i].task = current;
>> +
>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>> +                       drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>> +                                                             &entries[i].fence,
>> +                                                             &entries[i].syncobj_cb,
>> +                                                             syncobj_wait_syncobj_func);
>> +               } else {
>> +                       entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>> +                       if (!entries[i].fence) {
>> +                               ret = -EINVAL;
>> +                               goto err_cleanup_entries;
>> +                       }
> So we are taking a snapshot here. It looks like this could have been
> done using a dma_fence_array + dma_fence_proxy for capturing the future
> fence.
>
>> +       ret = timeout;
>> +       while (ret > 0) {
>> +               signaled_count = 0;
>> +               for (i = 0; i < count; ++i) {
>> +                       fence = entries[i].fence;
>> +                       if (!fence)
>> +                               continue;
>> +
>> +                       if (dma_fence_is_signaled(fence) ||
>> +                           (!entries[i].fence_cb.func &&
>> +                            dma_fence_add_callback(fence,
>> +                                                   &entries[i].fence_cb,
>> +                                                   syncobj_wait_fence_func))) {
>> +                               /* The fence has been signaled */
>> +                               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>> +                                       signaled_count++;
>> +                               } else {
>> +                                       if (idx)
>> +                                               *idx = i;
>> +                                       goto done_waiting;
>> +                               }
>> +                       }
>> +               }
>> +
>> +               if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
>> +                   signaled_count == count)
>> +                       goto done_waiting;
>> +
>> +               set_current_state(TASK_INTERRUPTIBLE);
> This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
> before doing any checks. So that if any state changes whilst you are in
> the middle of those checks, the schedule_timeout is a nop and you can
> check again.

Yeah, completely agree.

I would rather drop the approach with the custom wait and try to use 
wait_event_interruptible here.

As far as I can see that should make the whole patch much cleaner in 
general.

Regards,
Christian.

> -Chris
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel