[PATCH 1/3] [RFC]drm: add syncobj timeline support v4
zhoucm1
zhoucm1 at amd.com
Wed Sep 12 10:20:51 UTC 2018
On 2018年09月12日 15:22, Christian König wrote:
> Ping? Have you seen my comments here?
Sorry, I didn't see this reply. inline...
>
> Looks like you haven't addressed any of them in your last mail.
>
> Christian.
>
> Am 06.09.2018 um 09:25 schrieb Christian König:
>> Am 06.09.2018 um 08:25 schrieb Chunming Zhou:
>>> VK_KHR_timeline_semaphore:
>>> This extension introduces a new type of semaphore that has an
>>> integer payload
>>> identifying a point in a timeline. Such timeline semaphores support the
>>
>> Drop the term semaphore here, better use syncobj.
This is from VK_KHR_timeline_semaphore extension describe, not my
invention, I just quote it. In kernel side, we call syncobj, in UMD,
they still call semaphore.
>>
>>> following operations:
>>> * CPU query - A host operation that allows querying the payload
>>> of the
>>> timeline semaphore.
>>> * CPU wait - A host operation that allows a blocking wait for a
>>> timeline semaphore to reach a specified value.
>>> * Device wait - A device operation that allows waiting for a
>>> timeline semaphore to reach a specified value.
>>> * Device signal - A device operation that allows advancing the
>>> timeline semaphore to a specified value.
>>>
>>> Since it's a timeline, that means the front time point(PT) always is
>>> signaled before the late PT.
>>> a. signal PT design:
>>> Signal PT fence N depends on PT[N-1] fence and signal opertion
>>> fence, when PT[N] fence is signaled,
>>> the timeline will increase to value of PT[N].
>>> b. wait PT design:
>>> Wait PT fence is signaled by reaching timeline point value, when
>>> timeline is increasing, will compare
>>> wait PTs value with new timeline value, if PT value is lower than
>>> timeline value, then wait PT will be
>>> signaled, otherwise keep in list. semaphore wait operation can wait
>>> on any point of timeline,
>>> so need a RB tree to order them. And wait PT could ahead of signal
>>> PT, we need a sumission fence to
>>> perform that.
>>>
>>> v2:
>>> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
>>> 2. move unexposed denitions to .c file. (Daniel Vetter)
>>> 3. split up the change to drm_syncobj_find_fence() in a separate
>>> patch. (Christian)
>>> 4. split up the change to drm_syncobj_replace_fence() in a separate
>>> patch.
>>> 5. drop the submission_fence implementation and instead use
>>> wait_event() for that. (Christian)
>>> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)
>>>
>>> v3:
>>> 1. replace normal syncobj with timeline implemenation. (Vetter and
>>> Christian)
>>> a. normal syncobj signal op will create a signal PT to tail of
>>> signal pt list.
>>> b. normal syncobj wait op will create a wait pt with last
>>> signal point, and this wait PT is only signaled by related signal
>>> point PT.
>>> 2. many bug fix and clean up
>>> 3. stub fence moving is moved to other patch.
>>>
>>> v4:
>>> 1. fix RB tree loop with while(node=rb_first(...)). (Christian)
>>> 2. fix syncobj lifecycle. (Christian)
>>> 3. only enable_signaling when there is wait_pt. (Christian)
>>>
>>> Tested by ./deqp-vk -n dEQP-VK*semaphore* for normal syncobj
>>>
>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>> Cc: Christian Konig <christian.koenig at amd.com>
>>> Cc: Dave Airlie <airlied at redhat.com>
>>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>>> Cc: Daniel Vetter <daniel at ffwll.ch>
>>> ---
>>> drivers/gpu/drm/drm_syncobj.c | 516
>>> +++++++++++++++++----
>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-
>>> include/drm/drm_syncobj.h | 78 ++--
>>> include/uapi/drm/drm.h | 1 +
>>> 4 files changed, 448 insertions(+), 151 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index e9ce623d049e..94b31de23858 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -56,6 +56,9 @@
>>> #include "drm_internal.h"
>>> #include <drm/drm_syncobj.h>
>>> +/* merge normal syncobj to timeline syncobj, the point interval
>>> is 1 */
>>> +#define DRM_SYNCOBJ_NORMAL_POINT 1
>>> +
>>> struct drm_syncobj_stub_fence {
>>> struct dma_fence base;
>>> spinlock_t lock;
>>> @@ -82,6 +85,18 @@ static const struct dma_fence_ops
>>> drm_syncobj_stub_fence_ops = {
>>> .release = drm_syncobj_stub_fence_release,
>>> };
>>> +struct drm_syncobj_signal_pt {
>>> + struct dma_fence_array *base;
>>> + u64 value;
>>> + struct list_head list;
>>> +};
>>> +struct drm_syncobj_wait_pt {
>>> + struct drm_syncobj_stub_fence base;
>>> + struct dma_fence *signal_pt_fence;
>>> + struct dma_fence_cb wait_cb;
>>> + struct rb_node node;
>>> + u64 value;
>>> +};
>>> /**
>>> * drm_syncobj_find - lookup and reference a sync object.
>>> @@ -109,61 +124,238 @@ struct drm_syncobj *drm_syncobj_find(struct
>>> drm_file *file_private,
>>> }
>>> EXPORT_SYMBOL(drm_syncobj_find);
>>> -static void drm_syncobj_add_callback_locked(struct drm_syncobj
>>> *syncobj,
>>> - struct drm_syncobj_cb *cb,
>>> - drm_syncobj_func_t func)
>>> +static void drm_syncobj_timeline_init(struct drm_syncobj *syncobj,
>>> + struct drm_syncobj_timeline *syncobj_timeline)
>>
>> Since we merged timeline and singleton syncobj you can drop the extra
>> _timeline_ part in the function name I think.
Will try in v5.
>>
>>> {
>>> - cb->func = func;
>>> - list_add_tail(&cb->node, &syncobj->cb_list);
>>> + spin_lock(&syncobj->lock);
>>> + syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>>> + syncobj_timeline->timeline = 0;
>>> + syncobj_timeline->signal_point = 0;
>>> + init_waitqueue_head(&syncobj_timeline->wq);
>>> +
>>> + syncobj_timeline->wait_pt_tree = RB_ROOT;
>>> + INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>> + spin_unlock(&syncobj->lock);
>>> }
>>> -static int drm_syncobj_fence_get_or_add_callback(struct
>>> drm_syncobj *syncobj,
>>> - struct dma_fence **fence,
>>> - struct drm_syncobj_cb *cb,
>>> - drm_syncobj_func_t func)
>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>> + struct drm_syncobj_timeline *syncobj_timeline)
>>> {
>>> - int ret;
>>> -
>>> - *fence = drm_syncobj_fence_get(syncobj);
>>> - if (*fence)
>>> - return 1;
>>> + struct rb_node *node = NULL;
>>> + struct drm_syncobj_wait_pt *wait_pt = NULL;
>>> + struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>> spin_lock(&syncobj->lock);
>>> - /* We've already tried once to get a fence and failed. Now that we
>>> - * have the lock, try one more time just to be sure we don't add a
>>> - * callback when a fence has already been set.
>>> - */
>>> - if (syncobj->fence) {
>>> - *fence =
>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>> - lockdep_is_held(&syncobj->lock)));
>>> - ret = 1;
>>> - } else {
>>> - *fence = NULL;
>>> - drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> - ret = 0;
>>> + if (!RB_EMPTY_ROOT(&syncobj_timeline->wait_pt_tree))
>>> + DRM_ERROR("There still are advanced wait_pts! pls check!");
>>> + while ((node = rb_first(&syncobj_timeline->wait_pt_tree))) {
>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> + rb_erase(&wait_pt->node,
>>> + &syncobj_timeline->wait_pt_tree);
>>> + RB_CLEAR_NODE(&wait_pt->node);
>>> + if (!wait_pt->signal_pt_fence) {
>>> + /* if wait_pt still is advanced, fake signal it.
>>> + * generally shouldn't happens */
>>> + dma_fence_set_error(&wait_pt->base.base, -ENOENT);
>>> + dma_fence_signal(&wait_pt->base.base);
>>> + }
>>> + /* kfree(wait_pt) is excuted by fence put */
>>> + dma_fence_put(&wait_pt->base.base);
>>> + }
>>> + list_for_each_entry_safe(signal_pt, tmp,
>>> + &syncobj_timeline->signal_pt_list, list) {
>>> + list_del(&signal_pt->list);
>>> + dma_fence_put(&signal_pt->base->base);
>>> + kfree(signal_pt);
>>> }
>>> spin_unlock(&syncobj->lock);
>>> +}
>>> - return ret;
>>> +static void wait_pt_func(struct dma_fence *fence, struct
>>> dma_fence_cb *cb)
>>> +{
>>> + struct drm_syncobj_wait_pt *wait_pt =
>>> + container_of(cb, struct drm_syncobj_wait_pt, wait_cb);
>>> +
>>> + dma_fence_signal(&wait_pt->base.base);
>>> + dma_fence_put(wait_pt->signal_pt_fence);
>>> + wait_pt->signal_pt_fence = NULL;
>>> + dma_fence_put(&wait_pt->base.base);
>>> +}
>>> +
>>> +static void drm_syncobj_find_signal_pt_for_wait_pt(struct
>>> drm_syncobj *syncobj,
>>> + struct drm_syncobj_wait_pt *wait_pt)
>>> +{
>>
>> That whole approach still looks horrible complicated to me.
It's already very close to what you said before.
>>
>> Especially the separation of signal and wait pt is completely
>> unnecessary as far as I can see.
>> When a wait pt is requested we just need to search for the signal
>> point which it will trigger.
Yeah, I tried this, but when I implement cpu wait ioctl on specific
point, we need a advanced wait pt fence, otherwise, we could still need
old syncobj cb.
Thanks,
David Zhou
>>
>> Regards,
>> Christian.
>>
>>> + struct drm_syncobj_timeline *timeline =
>>> &syncobj->syncobj_timeline;
>>> + struct drm_syncobj_signal_pt *signal_pt;
>>> + int ret;
>>> +
>>> + if (wait_pt->signal_pt_fence) {
>>> + return;
>>> + } else if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
>>> + (wait_pt->value <= timeline->timeline)) {
>>> + dma_fence_signal(&wait_pt->base.base);
>>> + rb_erase(&wait_pt->node,
>>> + &timeline->wait_pt_tree);
>>> + RB_CLEAR_NODE(&wait_pt->node);
>>> + dma_fence_put(&wait_pt->base.base);
>>> + return;
>>> + }
>>> +
>>> + list_for_each_entry(signal_pt, &timeline->signal_pt_list, list) {
>>> + if (wait_pt->value < signal_pt->value)
>>> + continue;
>>> + if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) &&
>>> + (wait_pt->value != signal_pt->value))
>>> + continue;
>>> + wait_pt->signal_pt_fence =
>>> dma_fence_get(&signal_pt->base->base);
>>> + ret = dma_fence_add_callback(wait_pt->signal_pt_fence,
>>> + &wait_pt->wait_cb,
>>> + wait_pt_func);
>>> + if (ret == -ENOENT) {
>>> + dma_fence_signal(&wait_pt->base.base);
>>> + dma_fence_put(wait_pt->signal_pt_fence);
>>> + wait_pt->signal_pt_fence = NULL;
>>> + rb_erase(&wait_pt->node,
>>> + &timeline->wait_pt_tree);
>>> + RB_CLEAR_NODE(&wait_pt->node);
>>> + dma_fence_put(&wait_pt->base.base);
>>> + } else if (ret < 0) {
>>> + dma_fence_put(wait_pt->signal_pt_fence);
>>> + DRM_ERROR("add callback error!");
>>> + } else {
>>> + /* after adding callback, remove from rb tree */
>>> + rb_erase(&wait_pt->node,
>>> + &timeline->wait_pt_tree);
>>> + RB_CLEAR_NODE(&wait_pt->node);
>>> + }
>>> + return;
>>> + }
>>> + /* signaled pt was released */
>>> + if (!wait_pt->signal_pt_fence && (wait_pt->value <=
>>> + timeline->signal_point)) {
>>> + dma_fence_signal(&wait_pt->base.base);
>>> + rb_erase(&wait_pt->node,
>>> + &timeline->wait_pt_tree);
>>> + RB_CLEAR_NODE(&wait_pt->node);
>>> + dma_fence_put(&wait_pt->base.base);
>>> + }
>>> }
>>> -void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
>>> - struct drm_syncobj_cb *cb,
>>> - drm_syncobj_func_t func)
>>> +static int drm_syncobj_timeline_create_signal_pt(struct drm_syncobj
>>> *syncobj,
>>> + struct dma_fence *fence,
>>> + u64 point)
>>> {
>>> + struct drm_syncobj_signal_pt *signal_pt =
>>> + kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>>> + struct drm_syncobj_signal_pt *tail_pt;
>>> + struct dma_fence **fences;
>>> + struct rb_node *node;
>>> + struct drm_syncobj_wait_pt *tail_wait_pt = NULL;
>>> + int num_fences = 0;
>>> + int ret = 0, i;
>>> +
>>> + if (!signal_pt)
>>> + return -ENOMEM;
>>> + if (syncobj->syncobj_timeline.signal_point >= point) {
>>> + DRM_WARN("A later signal is ready!");
>>> + goto out;
>>> + }
>>> + if (!fence)
>>> + goto out;
>>> +
>>> + fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
>>> + if (!fences)
>>> + goto out;
>>> + fences[num_fences++] = dma_fence_get(fence);
>>> + /* timeline syncobj must take this dependency */
>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> + spin_lock(&syncobj->lock);
>>> + if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>> + tail_pt =
>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>> + struct drm_syncobj_signal_pt, list);
>>> + fences[num_fences++] =
>>> dma_fence_get(&tail_pt->base->base);
>>> + }
>>> + spin_unlock(&syncobj->lock);
>>> + }
>>> + signal_pt->base = dma_fence_array_create(num_fences, fences,
>>> + syncobj->syncobj_timeline.timeline_context,
>>> + point, false);
>>> + if (!signal_pt->base)
>>> + goto fail;
>>> +
>>> + signal_pt->value = point;
>>> spin_lock(&syncobj->lock);
>>> - drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> + syncobj->syncobj_timeline.signal_point = point;
>>> + INIT_LIST_HEAD(&signal_pt->list);
>>> + list_add_tail(&signal_pt->list,
>>> &syncobj->syncobj_timeline.signal_pt_list);
>>> +
>>> + /* check if there is advanced wait */
>>> + node = rb_last(&syncobj->syncobj_timeline.wait_pt_tree);
>>> + if (node)
>>> + tail_wait_pt = rb_entry(node, struct drm_syncobj_wait_pt,
>>> node);
>>> + if (tail_wait_pt && !tail_wait_pt->signal_pt_fence) {
>>> + for (node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>> + node != NULL; node = rb_next(node)) {
>>> + struct drm_syncobj_wait_pt *wait_pt =
>>> + rb_entry(node, struct drm_syncobj_wait_pt,
>>> + node);
>>> +
>>> + drm_syncobj_find_signal_pt_for_wait_pt(syncobj,
>>> + wait_pt);
>>> + }
>>> + }
>>> +
>>> spin_unlock(&syncobj->lock);
>>> + wake_up_all(&syncobj->syncobj_timeline.wq);
>>> +
>>> + return 0;
>>> +fail:
>>> + for (i = 0; i < num_fences; i++)
>>> + dma_fence_put(fences[i]);
>>> + kfree(fences);
>>> +out:
>>> + kfree(signal_pt);
>>> + return ret;
>>> }
>>> -void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>> - struct drm_syncobj_cb *cb)
>>> +static void drm_syncobj_timeline_garbage_collection(struct
>>> drm_syncobj *syncobj)
>>> {
>>> + struct drm_syncobj_timeline *timeline =
>>> &syncobj->syncobj_timeline;
>>> + struct rb_node *node;
>>> + struct drm_syncobj_wait_pt *wait_pt;
>>> + struct drm_syncobj_signal_pt *signal_pt, *tmp;
>>> +
>>> spin_lock(&syncobj->lock);
>>> - list_del_init(&cb->node);
>>> + node = rb_first(&timeline->wait_pt_tree);
>>> + while (node) {
>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> + if (!dma_fence_is_signaled(&wait_pt->base.base)) {
>>> + node = rb_next(node);
>>> + continue;
>>> + }
>>> + rb_erase(&wait_pt->node,
>>> + &timeline->wait_pt_tree);
>>> + RB_CLEAR_NODE(&wait_pt->node);
>>> + /* kfree(wait_pt) is excuted by fence put */
>>> + dma_fence_put(&wait_pt->base.base);
>>> + node = rb_first(&timeline->wait_pt_tree);
>>> + }
>>> + list_for_each_entry_safe(signal_pt, tmp,
>>> + &timeline->signal_pt_list, list) {
>>> + if (dma_fence_is_signaled(&signal_pt->base->base)) {
>>> + timeline->timeline = signal_pt->value;
>>> + list_del(&signal_pt->list);
>>> + dma_fence_put(&signal_pt->base->base);
>>> + kfree(signal_pt);
>>> + } else {
>>> + /*signal_pt is in order in list, from small to big, so
>>> + * the later must not be signal either */
>>> + break;
>>> + }
>>> + }
>>> +
>>> spin_unlock(&syncobj->lock);
>>> }
>>> -
>>> /**
>>> * drm_syncobj_replace_fence - replace fence in a sync object.
>>> * @syncobj: Sync object to replace fence in
>>> @@ -176,28 +368,29 @@ void drm_syncobj_replace_fence(struct
>>> drm_syncobj *syncobj,
>>> u64 point,
>>> struct dma_fence *fence)
>>> {
>>> - struct dma_fence *old_fence;
>>> - struct drm_syncobj_cb *cur, *tmp;
>>> -
>>> - if (fence)
>>> - dma_fence_get(fence);
>>> -
>>> - spin_lock(&syncobj->lock);
>>> -
>>> - old_fence = rcu_dereference_protected(syncobj->fence,
>>> - lockdep_is_held(&syncobj->lock));
>>> - rcu_assign_pointer(syncobj->fence, fence);
>>> -
>>> - if (fence != old_fence) {
>>> - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>>> - list_del_init(&cur->node);
>>> - cur->func(syncobj, cur);
>>> + drm_syncobj_timeline_garbage_collection(syncobj);
>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> + if (fence)
>>> + drm_syncobj_timeline_create_signal_pt(syncobj, fence,
>>> + point);
>>> + return;
>>> + } else if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>> + u64 pt_value;
>>> +
>>> + if (!fence) {
>>> + drm_syncobj_timeline_fini(syncobj,
>>> + &syncobj->syncobj_timeline);
>>> + drm_syncobj_timeline_init(syncobj,
>>> + &syncobj->syncobj_timeline);
>>> + return;
>>> }
>>> + pt_value = syncobj->syncobj_timeline.signal_point +
>>> + DRM_SYNCOBJ_NORMAL_POINT;
>>> + drm_syncobj_timeline_create_signal_pt(syncobj, fence,
>>> pt_value);
>>> + return;
>>> + } else {
>>> + DRM_ERROR("the syncobj type isn't support\n");
>>> }
>>> -
>>> - spin_unlock(&syncobj->lock);
>>> -
>>> - dma_fence_put(old_fence);
>>> }
>>> EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>> @@ -220,6 +413,120 @@ static int
>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>> return 0;
>>> }
>>> +static struct drm_syncobj_wait_pt *
>>> +drm_syncobj_timeline_lookup_wait_pt_fence(struct drm_syncobj *syncobj,
>>> + u64 point,
>>> + struct dma_fence **fence)
>>> +{
>>> + struct rb_node *node =
>>> syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>> + struct drm_syncobj_wait_pt *wait_pt = NULL;
>>> +
>>> + spin_lock(&syncobj->lock);
>>> + while(node) {
>>> + int result;
>>> +
>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> + result = point - wait_pt->value;
>>> + if (result < 0) {
>>> + node = node->rb_left;
>>> + } else if (result > 0) {
>>> + node = node->rb_right;
>>> + } else {
>>> + if (fence)
>>> + *fence = dma_fence_get(&wait_pt->base.base);
>>> + break;
>>> + }
>>> + }
>>> + spin_unlock(&syncobj->lock);
>>> +
>>> + return wait_pt;
>>> +}
>>> +
>>> +static struct drm_syncobj_wait_pt *
>>> +drm_syncobj_timeline_create_wait_pt_and_return_fence(struct
>>> drm_syncobj *syncobj,
>>> + u64 point,
>>> + struct dma_fence **fence)
>>> +{
>>> + struct drm_syncobj_wait_pt *wait_pt = kzalloc(sizeof(*wait_pt),
>>> + GFP_KERNEL);
>>> + struct drm_syncobj_stub_fence *base;
>>> + struct rb_node **new =
>>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>> +
>>> + if (!wait_pt)
>>> + return NULL;
>>> + base = &wait_pt->base;
>>> + spin_lock_init(&base->lock);
>>> + dma_fence_init(&base->base,
>>> + &drm_syncobj_stub_fence_ops,
>>> + &base->lock,
>>> + syncobj->syncobj_timeline.timeline_context, point);
>>> + wait_pt->value = point;
>>> + wait_pt->signal_pt_fence = NULL;
>>> +
>>> + /* wait pt must be in an order, so that we can easily lookup
>>> and signal
>>> + * it */
>>> + spin_lock(&syncobj->lock);
>>> + while(*new) {
>>> + struct drm_syncobj_wait_pt *this =
>>> + rb_entry(*new, struct drm_syncobj_wait_pt, node);
>>> + int result = wait_pt->value - this->value;
>>> +
>>> + parent = *new;
>>> + if (result < 0)
>>> + new = &((*new)->rb_left);
>>> + else if (result > 0)
>>> + new = &((*new)->rb_right);
>>> + else
>>> + goto exist;
>>> + }
>>> + if (fence)
>>> + *fence = dma_fence_get(&wait_pt->base.base);
>>> + rb_link_node(&wait_pt->node, parent, new);
>>> + rb_insert_color(&wait_pt->node,
>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>> + spin_unlock(&syncobj->lock);
>>> + return wait_pt;
>>> +exist:
>>> + spin_unlock(&syncobj->lock);
>>> + dma_fence_put(&wait_pt->base.base);
>>> + wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj,
>>> point,
>>> + fence);
>>> + return wait_pt;
>>> +}
>>> +
>>> +static struct dma_fence *
>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64
>>> point, u64 flags)
>>> +{
>>> + struct drm_syncobj_wait_pt *wait_pt;
>>> + struct dma_fence *fence = NULL;
>>> +
>>> + /* check if the wait pt exists */
>>> + wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj,
>>> point,
>>> + &fence);
>>> + if (!fence) {
>>> + /* This is a new wait pt, so create it */
>>> + wait_pt =
>>> drm_syncobj_timeline_create_wait_pt_and_return_fence(syncobj, point,
>>> + &fence);
>>> + if (!fence)
>>> + return NULL;
>>> + }
>>> + if (wait_pt) {
>>> + int ret = 0;
>>> +
>>> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> + ret =
>>> wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq,
>>> + wait_pt->value <=
>>> syncobj->syncobj_timeline.signal_point,
>>> + msecs_to_jiffies(10000)); /*
>>> wait 10s */
>>> + if (ret <= 0)
>>> + return NULL;
>>> + }
>>> + spin_lock(&syncobj->lock);
>>> + drm_syncobj_find_signal_pt_for_wait_pt(syncobj, wait_pt);
>>> + spin_unlock(&syncobj->lock);
>>> + return fence;
>>> + }
>>> + return NULL;
>>> +}
>>> +
>>> /**
>>> * drm_syncobj_find_fence - lookup and reference the fence in a
>>> sync object
>>> * @file_private: drm file private pointer
>>> @@ -234,20 +541,46 @@ static int
>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>> * contains a reference to the fence, which must be released by
>>> calling
>>> * dma_fence_put().
>>> */
>>> -int drm_syncobj_find_fence(struct drm_file *file_private,
>>> - u32 handle, u64 point,
>>> - struct dma_fence **fence)
>>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>> + u64 flags, struct dma_fence **fence)
>>> {
>>> - struct drm_syncobj *syncobj = drm_syncobj_find(file_private,
>>> handle);
>>> int ret = 0;
>>> if (!syncobj)
>>> return -ENOENT;
>>> - *fence = drm_syncobj_fence_get(syncobj);
>>> + drm_syncobj_timeline_garbage_collection(syncobj);
>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>> + /*NORMAL syncobj always wait on last pt */
>>> + u64 tail_pt_value = syncobj->syncobj_timeline.signal_point;
>>> +
>>> + if (tail_pt_value == 0)
>>> + tail_pt_value += DRM_SYNCOBJ_NORMAL_POINT;
>>> + /* NORMAL syncobj doesn't care point value */
>>> + WARN_ON(point != 0);
>>> + *fence = drm_syncobj_timeline_point_get(syncobj,
>>> tail_pt_value,
>>> + flags);
>>> + } else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> + *fence = drm_syncobj_timeline_point_get(syncobj, point,
>>> + flags);
>>> + } else {
>>> + DRM_ERROR("Don't support this type syncobj\n");
>>> + *fence = NULL;
>>> + }
>>> if (!*fence) {
>>> ret = -EINVAL;
>>> }
>>> + return ret;
>>> +}
>>> +EXPORT_SYMBOL(drm_syncobj_search_fence);
>>> +int drm_syncobj_find_fence(struct drm_file *file_private,
>>> + u32 handle, u64 point,
>>> + struct dma_fence **fence) {
>>> + struct drm_syncobj *syncobj = drm_syncobj_find(file_private,
>>> handle);
>>> +
>>> + int ret = drm_syncobj_search_fence(syncobj, point,
>>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>>> + fence);
>>> drm_syncobj_put(syncobj);
>>> return ret;
>>> }
>>> @@ -264,7 +597,7 @@ void drm_syncobj_free(struct kref *kref)
>>> struct drm_syncobj *syncobj = container_of(kref,
>>> struct drm_syncobj,
>>> refcount);
>>> - drm_syncobj_replace_fence(syncobj, 0, NULL);
>>> + drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>> kfree(syncobj);
>>> }
>>> EXPORT_SYMBOL(drm_syncobj_free);
>>> @@ -292,8 +625,12 @@ int drm_syncobj_create(struct drm_syncobj
>>> **out_syncobj, uint32_t flags,
>>> return -ENOMEM;
>>> kref_init(&syncobj->refcount);
>>> - INIT_LIST_HEAD(&syncobj->cb_list);
>>> spin_lock_init(&syncobj->lock);
>>> + if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
>>> + syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>> + else
>>> + syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>> + drm_syncobj_timeline_init(syncobj, &syncobj->syncobj_timeline);
>>> if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>> ret = drm_syncobj_assign_null_handle(syncobj);
>>> @@ -651,7 +988,6 @@ struct syncobj_wait_entry {
>>> struct task_struct *task;
>>> struct dma_fence *fence;
>>> struct dma_fence_cb fence_cb;
>>> - struct drm_syncobj_cb syncobj_cb;
>>> };
>>> static void syncobj_wait_fence_func(struct dma_fence *fence,
>>> @@ -663,18 +999,6 @@ static void syncobj_wait_fence_func(struct
>>> dma_fence *fence,
>>> wake_up_process(wait->task);
>>> }
>>> -static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>> - struct drm_syncobj_cb *cb)
>>> -{
>>> - struct syncobj_wait_entry *wait =
>>> - container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>> -
>>> - /* This happens inside the syncobj lock */
>>> - wait->fence =
>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>> - lockdep_is_held(&syncobj->lock)));
>>> - wake_up_process(wait->task);
>>> -}
>>> -
>>> static signed long drm_syncobj_array_wait_timeout(struct
>>> drm_syncobj **syncobjs,
>>> uint32_t count,
>>> uint32_t flags,
>>> @@ -698,14 +1022,11 @@ static signed long
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>> signaled_count = 0;
>>> for (i = 0; i < count; ++i) {
>>> entries[i].task = current;
>>> - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>>> + ret = drm_syncobj_search_fence(syncobjs[i], 0, 0,
>>> + &entries[i].fence);
>>> if (!entries[i].fence) {
>>> - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> - continue;
>>> - } else {
>>> - ret = -EINVAL;
>>> - goto cleanup_entries;
>>> - }
>>> + ret = -EINVAL;
>>> + goto cleanup_entries;
>>> }
>>> if (dma_fence_is_signaled(entries[i].fence)) {
>>> @@ -733,15 +1054,6 @@ static signed long
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>> * fallthough and try a 0 timeout wait!
>>> */
>>> - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> - for (i = 0; i < count; ++i) {
>>> - drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>> - &entries[i].fence,
>>> - &entries[i].syncobj_cb,
>>> - syncobj_wait_syncobj_func);
>>> - }
>>> - }
>>> -
>>> do {
>>> set_current_state(TASK_INTERRUPTIBLE);
>>> @@ -789,13 +1101,10 @@ static signed long
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>> cleanup_entries:
>>> for (i = 0; i < count; ++i) {
>>> - if (entries[i].syncobj_cb.func)
>>> - drm_syncobj_remove_callback(syncobjs[i],
>>> - &entries[i].syncobj_cb);
>>> + dma_fence_put(entries[i].fence);
>>> if (entries[i].fence_cb.func)
>>> dma_fence_remove_callback(entries[i].fence,
>>> &entries[i].fence_cb);
>>> - dma_fence_put(entries[i].fence);
>>> }
>>> kfree(entries);
>>> @@ -970,12 +1279,21 @@ drm_syncobj_reset_ioctl(struct drm_device
>>> *dev, void *data,
>>> if (ret < 0)
>>> return ret;
>>> - for (i = 0; i < args->count_handles; i++)
>>> - drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
>>> -
>>> + for (i = 0; i < args->count_handles; i++) {
>>> + if (syncobjs[i]->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> + DRM_ERROR("timeline syncobj cannot reset!\n");
>>> + ret = -EINVAL;
>>> + goto out;
>>> + }
>>> + drm_syncobj_timeline_fini(syncobjs[i],
>>> + &syncobjs[i]->syncobj_timeline);
>>> + drm_syncobj_timeline_init(syncobjs[i],
>>> + &syncobjs[i]->syncobj_timeline);
>>> + }
>>> +out:
>>> drm_syncobj_array_free(syncobjs, args->count_handles);
>>> - return 0;
>>> + return ret;
>>> }
>>> int
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> index 0a8d2d64f380..579e91a5858b 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> @@ -2137,7 +2137,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>> if (!(flags & I915_EXEC_FENCE_WAIT))
>>> continue;
>>> - fence = drm_syncobj_fence_get(syncobj);
>>> + drm_syncobj_search_fence(syncobj, 0,
>>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>>> + &fence);
>>> if (!fence)
>>> return -EINVAL;
>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>> index 425432b85a87..657c97dc25ec 100644
>>> --- a/include/drm/drm_syncobj.h
>>> +++ b/include/drm/drm_syncobj.h
>>> @@ -30,6 +30,25 @@
>>> struct drm_syncobj_cb;
>>> +enum drm_syncobj_type {
>>> + DRM_SYNCOBJ_TYPE_NORMAL,
>>> + DRM_SYNCOBJ_TYPE_TIMELINE
>>> +};
>>> +
>>> +struct drm_syncobj_timeline {
>>> + wait_queue_head_t wq;
>>> + u64 timeline_context;
>>> + /**
>>> + * @timeline: syncobj timeline
>>> + */
>>> + u64 timeline;
>>> + u64 signal_point;
>>> +
>>> +
>>> + struct rb_root wait_pt_tree;
>>> + struct list_head signal_pt_list;
>>> +};
>>> +
>>> /**
>>> * struct drm_syncobj - sync object.
>>> *
>>> @@ -41,19 +60,16 @@ struct drm_syncobj {
>>> */
>>> struct kref refcount;
>>> /**
>>> - * @fence:
>>> - * NULL or a pointer to the fence bound to this object.
>>> - *
>>> - * This field should not be used directly. Use
>>> drm_syncobj_fence_get()
>>> - * and drm_syncobj_replace_fence() instead.
>>> + * @type: indicate syncobj type
>>> */
>>> - struct dma_fence __rcu *fence;
>>> + enum drm_syncobj_type type;
>>> /**
>>> - * @cb_list: List of callbacks to call when the &fence gets
>>> replaced.
>>> + * @syncobj_timeline: timeline
>>> */
>>> - struct list_head cb_list;
>>> + struct drm_syncobj_timeline syncobj_timeline;
>>> +
>>> /**
>>> - * @lock: Protects &cb_list and write-locks &fence.
>>> + * @lock: Protects timeline list and write-locks &fence.
>>> */
>>> spinlock_t lock;
>>> /**
>>> @@ -62,25 +78,6 @@ struct drm_syncobj {
>>> struct file *file;
>>> };
>>> -typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
>>> - struct drm_syncobj_cb *cb);
>>> -
>>> -/**
>>> - * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>>> - * @node: used by drm_syncob_add_callback to append this struct to
>>> - * &drm_syncobj.cb_list
>>> - * @func: drm_syncobj_func_t to call
>>> - *
>>> - * This struct will be initialized by drm_syncobj_add_callback,
>>> additional
>>> - * data can be passed along by embedding drm_syncobj_cb in another
>>> struct.
>>> - * The callback will get called the next time
>>> drm_syncobj_replace_fence is
>>> - * called.
>>> - */
>>> -struct drm_syncobj_cb {
>>> - struct list_head node;
>>> - drm_syncobj_func_t func;
>>> -};
>>> -
>>> void drm_syncobj_free(struct kref *kref);
>>> /**
>>> @@ -106,29 +103,6 @@ drm_syncobj_put(struct drm_syncobj *obj)
>>> kref_put(&obj->refcount, drm_syncobj_free);
>>> }
>>> -/**
>>> - * drm_syncobj_fence_get - get a reference to a fence in a sync object
>>> - * @syncobj: sync object.
>>> - *
>>> - * This acquires additional reference to &drm_syncobj.fence
>>> contained in @obj,
>>> - * if not NULL. It is illegal to call this without already holding
>>> a reference.
>>> - * No locks required.
>>> - *
>>> - * Returns:
>>> - * Either the fence of @obj or NULL if there's none.
>>> - */
>>> -static inline struct dma_fence *
>>> -drm_syncobj_fence_get(struct drm_syncobj *syncobj)
>>> -{
>>> - struct dma_fence *fence;
>>> -
>>> - rcu_read_lock();
>>> - fence = dma_fence_get_rcu_safe(&syncobj->fence);
>>> - rcu_read_unlock();
>>> -
>>> - return fence;
>>> -}
>>> -
>>> struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>> u32 handle);
>>> void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64
>>> point,
>>> @@ -142,5 +116,7 @@ int drm_syncobj_create(struct drm_syncobj
>>> **out_syncobj, uint32_t flags,
>>> int drm_syncobj_get_handle(struct drm_file *file_private,
>>> struct drm_syncobj *syncobj, u32 *handle);
>>> int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
>>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>> + u64 flags, struct dma_fence **fence);
>>> #endif
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 300f336633f2..cebdb2541eb7 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -717,6 +717,7 @@ struct drm_prime_handle {
>>> struct drm_syncobj_create {
>>> __u32 handle;
>>> #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
>>> __u32 flags;
>>> };
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list