[PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)
Jason Ekstrand
jason at jlekstrand.net
Wed Aug 9 21:09:23 UTC 2017
On Wed, Aug 9, 2017 at 11:25 AM, Christian König <deathsimple at vodafone.de>
wrote:
> Am 09.08.2017 um 19:57 schrieb Chris Wilson:
>
>> Quoting Jason Ekstrand (2017-08-09 18:00:54)
>>
>>> Vulkan VkFence semantics require that the application be able to perform
>>> a CPU wait on work which may not yet have been submitted. This is
>>> perfectly safe because the CPU wait has a timeout which will get
>>> triggered eventually if no work is ever submitted. This behavior is
>>> advantageous for multi-threaded workloads because, so long as all of the
>>> threads agree on what fences to use up-front, you don't have the extra
>>> cross-thread synchronization cost of thread A telling thread B that it
>>> has submitted its dependent work and thread B is now free to wait.
>>>
>>> Within a single process, this can be implemented in the userspace driver
>>> by doing exactly the same kind of tracking the app would have to do
>>> using posix condition variables or similar. However, in order for this
>>> to work cross-process (as is required by VK_KHR_external_fence), we need
>>> to handle this in the kernel.
>>>
>>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
>>> instructs the IOCTL to wait for the syncobj to have a non-null fence and
>>> then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
>>> easily get the Vulkan behavior.
>>>
>>> v2:
>>> - Fix a bug in the invalid syncobj error path
>>> - Unify the wait-all and wait-any cases
>>>
>>> Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
>>> Cc: Dave Airlie <airlied at redhat.com>
>>> ---
>>>
>>> I realized today (this is what happens when you sleep) that it takes
>>> almost
>>> no work to make the wait-any path also handle wait-all. By unifying the
>>> two, I deleted over a hundred lines of code from the implementation.
>>>
>>> drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++
>>> ++++--------
>>> include/uapi/drm/drm.h | 1 +
>>> 2 files changed, 199 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index 510dfc2..5e7f654 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -51,6 +51,7 @@
>>> #include <linux/fs.h>
>>> #include <linux/anon_inodes.h>
>>> #include <linux/sync_file.h>
>>> +#include <linux/sched/signal.h>
>>> #include "drm_internal.h"
>>> #include <drm/drm_syncobj.h>
>>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct
>>> drm_syncobj *syncobj,
>>>
>> This is a bit of the puzzle that is missing.
>>
>
? It's in the previous patch.
> list_add_tail(&cb->node, &syncobj->cb_list);
>>> }
>>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj
>>> *syncobj,
>>> + struct dma_fence
>>> **fence,
>>> + struct drm_syncobj_cb
>>> *cb,
>>> + drm_syncobj_func_t func)
>>> +{
>>> + int ret;
>>> +
>>> + spin_lock(&syncobj->lock);
>>> + if (syncobj->fence) {
>>> + *fence = dma_fence_get(syncobj->fence);
>>> + ret = 1;
>>> + } else {
>>> + *fence = NULL;
>>> + drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> + ret = 0;
>>> + }
>>> + spin_unlock(&syncobj->lock);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> /**
>>> * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>>> * @syncobj: Sync object to which to add the callback
>>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
>>> *dev, void *data,
>>> &args->handle);
>>> }
>>> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
>>> + uint32_t wait_flags)
>>> +{
>>> + struct dma_fence *fence;
>>> + int ret;
>>> +
>>> + fence = drm_syncobj_fence_get(syncobj);
>>> + if (!fence) {
>>> + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>> + return 0;
>>> + else
>>> + return -EINVAL;
>>> + }
>>> +
>>> + ret = dma_fence_is_signaled(fence) ? 1 : 0;
>>> +
>>> + dma_fence_put(fence);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +struct syncobj_wait_entry {
>>> + struct task_struct *task;
>>> + struct dma_fence *fence;
>>> + struct dma_fence_cb fence_cb;
>>> + struct drm_syncobj_cb syncobj_cb;
>>> +};
>>> +
>>> +static void syncobj_wait_fence_func(struct dma_fence *fence,
>>> + struct dma_fence_cb *cb)
>>> +{
>>> + struct syncobj_wait_entry *wait =
>>> + container_of(cb, struct syncobj_wait_entry, fence_cb);
>>> +
>>> + wake_up_process(wait->task);
>>> +}
>>> +
>>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>> + struct drm_syncobj_cb *cb)
>>> +{
>>> + struct syncobj_wait_entry *wait =
>>> + container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>> +
>>> + /* This happens inside the syncobj lock */
>>> + wait->fence = dma_fence_get(syncobj->fence);
>>> + wake_up_process(wait->task);
>>> +}
>>> +
>>> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj
>>> **syncobjs,
>>> + uint32_t count,
>>> + uint32_t flags,
>>> + signed long timeout,
>>> + uint32_t *idx)
>>> +{
>>> + struct syncobj_wait_entry *entries;
>>> + struct dma_fence *fence;
>>> + signed long ret;
>>> + uint32_t signaled_count, i;
>>> +
>>> + if (timeout == 0) {
>>> + signaled_count = 0;
>>> + for (i = 0; i < count; ++i) {
>>> + ret = drm_syncobj_signaled(syncobjs[i], flags);
>>> + if (ret < 0)
>>> + return ret;
>>> + if (ret == 0)
>>> + continue;
>>> + if (signaled_count == 0 && idx)
>>> + *idx = i;
>>> + signaled_count++;
>>> + }
>>> +
>>> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>>> + return signaled_count == count ? 1 : 0;
>>> + else
>>> + return signaled_count > 0 ? 1 : 0;
>>> + }
>>> +
>>> + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
>>> + if (!entries)
>>> + return -ENOMEM;
>>> +
>>> + for (i = 0; i < count; ++i) {
>>> + entries[i].task = current;
>>> +
>>> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> + drm_syncobj_fence_get_or_add_
>>> callback(syncobjs[i],
>>> +
>>> &entries[i].fence,
>>> +
>>> &entries[i].syncobj_cb,
>>> +
>>> syncobj_wait_syncobj_func);
>>> + } else {
>>> + entries[i].fence = drm_syncobj_fence_get(syncobjs
>>> [i]);
>>> + if (!entries[i].fence) {
>>> + ret = -EINVAL;
>>> + goto err_cleanup_entries;
>>> + }
>>>
>> So we are taking a snapshot here. It looks like this could have been
>> done using a dma_fence_array + dma_fence_proxy for capturing the future
>> fence.
>>
>
I'm not sure what you mean. Is that something in the future fence series
that I should be looking at?
> + ret = timeout;
>>> + while (ret > 0) {
>>> + signaled_count = 0;
>>> + for (i = 0; i < count; ++i) {
>>> + fence = entries[i].fence;
>>> + if (!fence)
>>> + continue;
>>> +
>>> + if (dma_fence_is_signaled(fence) ||
>>> + (!entries[i].fence_cb.func &&
>>> + dma_fence_add_callback(fence,
>>> + &entries[i].fence_cb,
>>> +
>>> syncobj_wait_fence_func))) {
>>> + /* The fence has been signaled */
>>> + if (flags &
>>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>>> + signaled_count++;
>>> + } else {
>>> + if (idx)
>>> + *idx = i;
>>> + goto done_waiting;
>>> + }
>>> + }
>>> + }
>>> +
>>> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
>>> + signaled_count == count)
>>> + goto done_waiting;
>>> +
>>> + set_current_state(TASK_INTERRUPTIBLE);
>>>
>> This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
>> before doing any checks. So that if any state changes whilst you are in
>> the middle of those checks, the schedule_timeout is a nop and you can
>> check again.
>>
>
> Yeah, completely agree.
>
> I would rather drop the approach with the custom wait and try to use
> wait_event_interruptible here.
>
> As far as I can see that should make the whole patch much cleaner in
> general.
>
Sounds good to me.
--Jason
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20170809/db40313c/attachment-0001.html>
More information about the dri-devel
mailing list