[PATCH 3/9] drm/syncobj: add support for timeline point wait v8
Koenig, Christian
Christian.Koenig at amd.com
Mon Mar 18 17:20:30 UTC 2019
Am 18.03.19 um 17:59 schrieb Lionel Landwerlin:
> On 15/03/2019 12:09, Chunming Zhou wrote:
>> points array is one-to-one match with syncobjs array.
>> v2:
>> add seperate ioctl for timeline point wait, otherwise break uapi.
>> v3:
>> userspace can specify two kinds waits::
>> a. Wait for time point to be completed.
>> b. and wait for time point to become available
>> v4:
>> rebase
>> v5:
>> add comment for xxx_WAIT_AVAILABLE
>> v6: rebase and rework on new container
>> v7: drop _WAIT_COMPLETED, it is the default anyway
>> v8: correctly handle garbage collected fences
>>
>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>> Cc: Jason Ekstrand <jason at jlekstrand.net>
>> Cc: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>> Cc: Dave Airlie <airlied at redhat.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>> drivers/gpu/drm/drm_internal.h | 2 +
>> drivers/gpu/drm/drm_ioctl.c | 2 +
>> drivers/gpu/drm/drm_syncobj.c | 153 ++++++++++++++++++++++++++-------
>> include/uapi/drm/drm.h | 15 ++++
>> 4 files changed, 143 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h
>> b/drivers/gpu/drm/drm_internal.h
>> index 251d67e04c2d..331ac6225b58 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -182,6 +182,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct
>> drm_device *dev, void *data,
>> struct drm_file *file_private);
>> int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *file_private);
>> +int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *file_private);
>> int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *file_private);
>> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index 687943df58e1..c984654646fa 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -688,6 +688,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>> DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>> DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT,
>> drm_syncobj_timeline_wait_ioctl,
>> + DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
>> DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>> b/drivers/gpu/drm/drm_syncobj.c
>> index 19a9ce638119..eae51978cda4 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -61,6 +61,7 @@ struct syncobj_wait_entry {
>> struct task_struct *task;
>> struct dma_fence *fence;
>> struct dma_fence_cb fence_cb;
>> + u64 point;
>> };
>> static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>> @@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find);
>> static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
>> struct syncobj_wait_entry *wait)
>> {
>> + struct dma_fence *fence;
>> +
>> if (wait->fence)
>> return;
>> @@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct
>> drm_syncobj *syncobj,
>> * have the lock, try one more time just to be sure we don't add a
>> * callback when a fence has already been set.
>> */
>> - if (syncobj->fence)
>> - wait->fence = dma_fence_get(
>> - rcu_dereference_protected(syncobj->fence, 1));
>> - else
>> + fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
>> 1));
>> + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
>> + dma_fence_put(fence);
>> list_add_tail(&wait->node, &syncobj->cb_list);
>> + } else if (!fence) {
>> + wait->fence = dma_fence_get_stub();
>> + } else {
>> + wait->fence = fence;
>> + }
>> spin_unlock(&syncobj->lock);
>> }
>> @@ -149,10 +156,8 @@ void drm_syncobj_add_point(struct drm_syncobj
>> *syncobj,
>> dma_fence_chain_init(chain, prev, fence, point);
>> rcu_assign_pointer(syncobj->fence, &chain->base);
>> - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>> - list_del_init(&cur->node);
>> + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
>> syncobj_wait_syncobj_func(syncobj, cur);
>> - }
>> spin_unlock(&syncobj->lock);
>> /* Walk the chain once to trigger garbage collection */
>> @@ -184,10 +189,8 @@ void drm_syncobj_replace_fence(struct
>> drm_syncobj *syncobj,
>> rcu_assign_pointer(syncobj->fence, fence);
>> if (fence != old_fence) {
>> - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>> - list_del_init(&cur->node);
>> + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
>> syncobj_wait_syncobj_func(syncobj, cur);
>> - }
>> }
>> spin_unlock(&syncobj->lock);
>> @@ -644,13 +647,27 @@ static void syncobj_wait_fence_func(struct
>> dma_fence *fence,
>> static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>> struct syncobj_wait_entry *wait)
>> {
>> + struct dma_fence *fence;
>> +
>> /* This happens inside the syncobj lock */
>> - wait->fence =
>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>> - lockdep_is_held(&syncobj->lock)));
>> + fence = rcu_dereference_protected(syncobj->fence,
>> + lockdep_is_held(&syncobj->lock));
>> + dma_fence_get(fence);
>> + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
>> + dma_fence_put(fence);
>> + return;
>> + } else if (!fence) {
>> + wait->fence = dma_fence_get_stub();
>> + } else {
>> + wait->fence = fence;
>> + }
>> +
>> wake_up_process(wait->task);
>> + list_del_init(&wait->node);
>> }
>> static signed long drm_syncobj_array_wait_timeout(struct
>> drm_syncobj **syncobjs,
>> + void __user *user_points,
>> uint32_t count,
>> uint32_t flags,
>> signed long timeout,
>> @@ -658,12 +675,27 @@ static signed long
>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> {
>> struct syncobj_wait_entry *entries;
>> struct dma_fence *fence;
>> + uint64_t *points;
>> uint32_t signaled_count, i;
>> - entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
>> - if (!entries)
>> + points = kmalloc_array(count, sizeof(*points), GFP_KERNEL);
>> + if (points == NULL)
>> return -ENOMEM;
>> + if (!user_points) {
>> + memset(points, 0, count * sizeof(uint64_t));
>> +
>> + } else if (copy_from_user(points, user_points,
>> + sizeof(uint64_t) * count)) {
>> + timeout = -EFAULT;
>> + goto err_free_points;
>> + }
>> +
>> + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
>> + if (!entries) {
>> + timeout = -ENOMEM;
>> + goto err_free_points;
>> + }
>> /* Walk the list of sync objects and initialize entries. We do
>> * this up-front so that we can properly return -EINVAL if
>> there is
>> * a syncobj with a missing fence and then never have the
>> chance of
>> @@ -671,9 +703,13 @@ static signed long
>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> */
>> signaled_count = 0;
>> for (i = 0; i < count; ++i) {
>> + struct dma_fence *fence;
>> +
>> entries[i].task = current;
>> - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>> - if (!entries[i].fence) {
>> + entries[i].point = points[i];
>> + fence = drm_syncobj_fence_get(syncobjs[i]);
>> + if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) {
>> + dma_fence_put(fence);
>> if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>> continue;
>> } else {
>> @@ -682,7 +718,13 @@ static signed long
>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> }
>> }
>> - if (dma_fence_is_signaled(entries[i].fence)) {
>> + if (fence)
>> + entries[i].fence = fence;
>> + else
>> + entries[i].fence = dma_fence_get_stub();
>> +
>> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
>
>
> Hey David,
>
> Could you remind me what DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE is used
> for?
>
> I didn't have a use for it in userspace,
The flag is used to wait for fences for a sequence number to show up.
Christian.
>
> Thanks,
>
> -Lionel
>
>
>> + dma_fence_is_signaled(entries[i].fence)) {
>> if (signaled_count == 0 && idx)
>> *idx = i;
>> signaled_count++;
>> @@ -715,7 +757,8 @@ static signed long
>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> if (!fence)
>> continue;
>> - if (dma_fence_is_signaled(fence) ||
>> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
>> + dma_fence_is_signaled(fence) ||
>> (!entries[i].fence_cb.func &&
>> dma_fence_add_callback(fence,
>> &entries[i].fence_cb,
>> @@ -760,6 +803,9 @@ static signed long
>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> }
>> kfree(entries);
>> +err_free_points:
>> + kfree(points);
>> +
>> return timeout;
>> }
>> @@ -799,19 +845,33 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>> static int drm_syncobj_array_wait(struct drm_device *dev,
>> struct drm_file *file_private,
>> struct drm_syncobj_wait *wait,
>> - struct drm_syncobj **syncobjs)
>> + struct drm_syncobj_timeline_wait *timeline_wait,
>> + struct drm_syncobj **syncobjs, bool timeline)
>> {
>> - signed long timeout =
>> drm_timeout_abs_to_jiffies(wait->timeout_nsec);
>> + signed long timeout = 0;
>> uint32_t first = ~0;
>> - timeout = drm_syncobj_array_wait_timeout(syncobjs,
>> - wait->count_handles,
>> - wait->flags,
>> - timeout, &first);
>> - if (timeout < 0)
>> - return timeout;
>> -
>> - wait->first_signaled = first;
>> + if (!timeline) {
>> + timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
>> + timeout = drm_syncobj_array_wait_timeout(syncobjs,
>> + NULL,
>> + wait->count_handles,
>> + wait->flags,
>> + timeout, &first);
>> + if (timeout < 0)
>> + return timeout;
>> + wait->first_signaled = first;
>> + } else {
>> + timeout =
>> drm_timeout_abs_to_jiffies(timeline_wait->timeout_nsec);
>> + timeout = drm_syncobj_array_wait_timeout(syncobjs,
>> + u64_to_user_ptr(timeline_wait->points),
>> + timeline_wait->count_handles,
>> + timeline_wait->flags,
>> + timeout, &first);
>> + if (timeout < 0)
>> + return timeout;
>> + timeline_wait->first_signaled = first;
>> + }
>> return 0;
>> }
>> @@ -897,13 +957,48 @@ drm_syncobj_wait_ioctl(struct drm_device
>> *dev, void *data,
>> return ret;
>> ret = drm_syncobj_array_wait(dev, file_private,
>> - args, syncobjs);
>> + args, NULL, syncobjs, false);
>> drm_syncobj_array_free(syncobjs, args->count_handles);
>> return ret;
>> }
>> +int
>> +drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *file_private)
>> +{
>> + struct drm_syncobj_timeline_wait *args = data;
>> + struct drm_syncobj **syncobjs;
>> + int ret = 0;
>> +
>> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> + return -ENODEV;
>> +
>> + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
>> + return -EINVAL;
>> +
>> + if (args->count_handles == 0)
>> + return -EINVAL;
>> +
>> + ret = drm_syncobj_array_find(file_private,
>> + u64_to_user_ptr(args->handles),
>> + args->count_handles,
>> + &syncobjs);
>> + if (ret < 0)
>> + return ret;
>> +
>> + ret = drm_syncobj_array_wait(dev, file_private,
>> + NULL, args, syncobjs, true);
>> +
>> + drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> + return ret;
>> +}
>> +
>> +
>> int
>> drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *file_private)
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 300f336633f2..0092111d002c 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -737,6 +737,7 @@ struct drm_syncobj_handle {
>> #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>> #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>> +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>> struct drm_syncobj_wait {
>> __u64 handles;
>> /* absolute timeout */
>> @@ -747,6 +748,19 @@ struct drm_syncobj_wait {
>> __u32 pad;
>> };
>> +struct drm_syncobj_timeline_wait {
>> + __u64 handles;
>> + /* wait on specific timeline point for every handles*/
>> + __u64 points;
>> + /* absolute timeout */
>> + __s64 timeout_nsec;
>> + __u32 count_handles;
>> + __u32 flags;
>> + __u32 first_signaled; /* only valid when not waiting all */
>> + __u32 pad;
>> +};
>> +
>> +
>> struct drm_syncobj_array {
>> __u64 handles;
>> __u32 count_handles;
>> @@ -909,6 +923,7 @@ extern "C" {
>> #define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct
>> drm_mode_get_lease)
>> #define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct
>> drm_mode_revoke_lease)
>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct
>> drm_syncobj_timeline_wait)
>> /**
>> * Device specific ioctls should only be in their respective headers
>> * The device specific ioctl range is from 0x40 to 0x9f.
>
>
More information about the amd-gfx
mailing list