[PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Chris Wilson
chris at chris-wilson.co.uk
Wed Aug 9 17:57:44 UTC 2017
Quoting Jason Ekstrand (2017-08-09 18:00:54)
> Vulkan VkFence semantics require that the application be able to perform
> a CPU wait on work which may not yet have been submitted. This is
> perfectly safe because the CPU wait has a timeout which will get
> triggered eventually if no work is ever submitted. This behavior is
> advantageous for multi-threaded workloads because, so long as all of the
> threads agree on what fences to use up-front, you don't have the extra
> cross-thread synchronization cost of thread A telling thread B that it
> has submitted its dependent work and thread B is now free to wait.
>
> Within a single process, this can be implemented in the userspace driver
> by doing exactly the same kind of tracking the app would have to do
> using posix condition variables or similar. However, in order for this
> to work cross-process (as is required by VK_KHR_external_fence), we need
> to handle this in the kernel.
>
> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
> instructs the IOCTL to wait for the syncobj to have a non-null fence and
> then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
> easily get the Vulkan behavior.
>
> v2:
> - Fix a bug in the invalid syncobj error path
> - Unify the wait-all and wait-any cases
>
> Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
> Cc: Dave Airlie <airlied at redhat.com>
> ---
>
> I realized today (this is what happens when you sleep) that it takes almost
> no work to make the wait-any path also handle wait-all. By unifying the
> two, I deleted over a hundred lines of code from the implementation.
>
> drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++--------
> include/uapi/drm/drm.h | 1 +
> 2 files changed, 199 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 510dfc2..5e7f654 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -51,6 +51,7 @@
> #include <linux/fs.h>
> #include <linux/anon_inodes.h>
> #include <linux/sync_file.h>
> +#include <linux/sched/signal.h>
>
> #include "drm_internal.h"
> #include <drm/drm_syncobj.h>
> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
This is a bit of the puzzle that is missing.
> list_add_tail(&cb->node, &syncobj->cb_list);
> }
>
> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> + struct dma_fence **fence,
> + struct drm_syncobj_cb *cb,
> + drm_syncobj_func_t func)
> +{
> + int ret;
> +
> + spin_lock(&syncobj->lock);
> + if (syncobj->fence) {
> + *fence = dma_fence_get(syncobj->fence);
> + ret = 1;
> + } else {
> + *fence = NULL;
> + drm_syncobj_add_callback_locked(syncobj, cb, func);
> + ret = 0;
> + }
> + spin_unlock(&syncobj->lock);
> +
> + return ret;
> +}
> +
> /**
> * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
> * @syncobj: Sync object to which to add the callback
> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
> &args->handle);
> }
>
> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
> + uint32_t wait_flags)
> +{
> + struct dma_fence *fence;
> + int ret;
> +
> + fence = drm_syncobj_fence_get(syncobj);
> + if (!fence) {
> + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> + return 0;
> + else
> + return -EINVAL;
> + }
> +
> + ret = dma_fence_is_signaled(fence) ? 1 : 0;
> +
> + dma_fence_put(fence);
> +
> + return ret;
> +}
> +
> +struct syncobj_wait_entry {
> + struct task_struct *task;
> + struct dma_fence *fence;
> + struct dma_fence_cb fence_cb;
> + struct drm_syncobj_cb syncobj_cb;
> +};
> +
> +static void syncobj_wait_fence_func(struct dma_fence *fence,
> + struct dma_fence_cb *cb)
> +{
> + struct syncobj_wait_entry *wait =
> + container_of(cb, struct syncobj_wait_entry, fence_cb);
> +
> + wake_up_process(wait->task);
> +}
> +
> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
> + struct drm_syncobj_cb *cb)
> +{
> + struct syncobj_wait_entry *wait =
> + container_of(cb, struct syncobj_wait_entry, syncobj_cb);
> +
> + /* This happens inside the syncobj lock */
> + wait->fence = dma_fence_get(syncobj->fence);
> + wake_up_process(wait->task);
> +}
> +
> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
> + uint32_t count,
> + uint32_t flags,
> + signed long timeout,
> + uint32_t *idx)
> +{
> + struct syncobj_wait_entry *entries;
> + struct dma_fence *fence;
> + signed long ret;
> + uint32_t signaled_count, i;
> +
> + if (timeout == 0) {
> + signaled_count = 0;
> + for (i = 0; i < count; ++i) {
> + ret = drm_syncobj_signaled(syncobjs[i], flags);
> + if (ret < 0)
> + return ret;
> + if (ret == 0)
> + continue;
> + if (signaled_count == 0 && idx)
> + *idx = i;
> + signaled_count++;
> + }
> +
> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
> + return signaled_count == count ? 1 : 0;
> + else
> + return signaled_count > 0 ? 1 : 0;
> + }
> +
> + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
> + if (!entries)
> + return -ENOMEM;
> +
> + for (i = 0; i < count; ++i) {
> + entries[i].task = current;
> +
> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> + drm_syncobj_fence_get_or_add_callback(syncobjs[i],
> + &entries[i].fence,
> + &entries[i].syncobj_cb,
> + syncobj_wait_syncobj_func);
> + } else {
> + entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
> + if (!entries[i].fence) {
> + ret = -EINVAL;
> + goto err_cleanup_entries;
> + }
So we are taking a snapshot here. It looks like this could have been
done using a dma_fence_array + dma_fence_proxy for capturing the future
fence.
> + ret = timeout;
> + while (ret > 0) {
> + signaled_count = 0;
> + for (i = 0; i < count; ++i) {
> + fence = entries[i].fence;
> + if (!fence)
> + continue;
> +
> + if (dma_fence_is_signaled(fence) ||
> + (!entries[i].fence_cb.func &&
> + dma_fence_add_callback(fence,
> + &entries[i].fence_cb,
> + syncobj_wait_fence_func))) {
> + /* The fence has been signaled */
> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
> + signaled_count++;
> + } else {
> + if (idx)
> + *idx = i;
> + goto done_waiting;
> + }
> + }
> + }
> +
> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
> + signaled_count == count)
> + goto done_waiting;
> +
> + set_current_state(TASK_INTERRUPTIBLE);
This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
before doing any checks. So that if any state changes whilst you are in
the middle of those checks, the schedule_timeout is a nop and you can
check again.
-Chris
More information about the dri-devel
mailing list