[PATCH 5/5] [RFC]drm: add syncobj timeline support v3

Christian König christian.koenig at amd.com
Thu Aug 30 07:25:27 UTC 2018


Am 30.08.2018 um 05:50 schrieb zhoucm1:
>
>
> On 2018年08月29日 19:42, Christian König wrote:
>> Am 29.08.2018 um 12:44 schrieb Chunming Zhou:
>>> VK_KHR_timeline_semaphore:
>>> This extension introduces a new type of semaphore that has an 
>>> integer payload
>>> identifying a point in a timeline. Such timeline semaphores support the
>>> following operations:
>>>     * CPU query - A host operation that allows querying the payload 
>>> of the
>>>       timeline semaphore.
>>>     * CPU wait - A host operation that allows a blocking wait for a
>>>       timeline semaphore to reach a specified value.
>>>     * Device wait - A device operation that allows waiting for a
>>>       timeline semaphore to reach a specified value.
>>>     * Device signal - A device operation that allows advancing the
>>>       timeline semaphore to a specified value.
>>>
>>> Since it's a timeline, that means the front time point(PT) always is 
>>> signaled before the late PT.
>>> a. signal PT design:
>>> Signal PT fence N depends on PT[N-1] fence and signal opertion 
>>> fence, when PT[N] fence is signaled,
>>> the timeline will increase to value of PT[N].
>>> b. wait PT design:
>>> Wait PT fence is signaled by reaching timeline point value, when 
>>> timeline is increasing, will compare
>>> wait PTs value with new timeline value, if PT value is lower than 
>>> timeline value, then wait PT will be
>>> signaled, otherwise keep in list. semaphore wait operation can wait 
>>> on any point of timeline,
>>> so need a RB tree to order them. And wait PT could ahead of signal 
>>> PT, we need a sumission fence to
>>> perform that.
>>>
>>> v2:
>>> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
>>> 2. move unexposed denitions to .c file. (Daniel Vetter)
>>> 3. split up the change to drm_syncobj_find_fence() in a separate 
>>> patch. (Christian)
>>> 4. split up the change to drm_syncobj_replace_fence() in a separate 
>>> patch.
>>> 5. drop the submission_fence implementation and instead use 
>>> wait_event() for that. (Christian)
>>> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)
>>>
>>> v3:
>>> 1. replace normal syncobj with timeline implemenation. (Vetter and 
>>> Christian)
>>>      a. normal syncobj signal op will create a signal PT to tail of 
>>> signal pt list.
>>>      b. normal syncobj wait op will create a wait pt with last 
>>> signal point, and this wait PT is only signaled by related signal 
>>> point PT.
>>> 2. some bug fix and clean up
>>> 3. tested by ./deqp-vk -n dEQP-VK*semaphore* for normal syncobj
>>>
>>> TODO:
>>> 1. CPU query and wait on timeline semaphore.
>>>
>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>> Cc: Christian Konig <christian.koenig at amd.com>
>>> Cc: Dave Airlie <airlied at redhat.com>
>>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>>> Cc: Daniel Vetter <daniel at ffwll.ch>
>>> ---
>>>   drivers/gpu/drm/drm_syncobj.c              | 593 
>>> ++++++++++++++++-----
>>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   4 +-
>>>   include/drm/drm_syncobj.h                  |  78 +--
>>>   include/uapi/drm/drm.h                     |   1 +
>>>   4 files changed, 505 insertions(+), 171 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index ab43559398d0..f701d9ef1b81 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -56,6 +56,50 @@
>>>   #include "drm_internal.h"
>>>   #include <drm/drm_syncobj.h>
>>>   +/* merge normal syncobj to timeline syncobj, the point interval 
>>> is 1 */
>>> +#define DRM_SYNCOBJ_NORMAL_POINT 1
>>> +
>>> +struct drm_syncobj_stub_fence {
>>> +    struct dma_fence base;
>>> +    spinlock_t lock;
>>> +};
>>> +
>>> +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence 
>>> *fence)
>>> +{
>>> +        return "syncobjstub";
>>> +}
>>> +
>>> +static bool drm_syncobj_stub_fence_enable_signaling(struct 
>>> dma_fence *fence)
>>> +{
>>> +    return !dma_fence_is_signaled(fence);
>>> +}
>>> +static void drm_syncobj_stub_fence_release(struct dma_fence *f)
>>> +{
>>> +    kfree(f);
>>> +}
>>> +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>> +    .get_driver_name = drm_syncobj_stub_fence_get_name,
>>> +    .get_timeline_name = drm_syncobj_stub_fence_get_name,
>>> +    .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
>>> +    .release = drm_syncobj_stub_fence_release,
>>> +};
>>
>> Do we really need to move that around? Could reduce the size of the 
>> patch quite a bit if we don't.
>
> stub fence is used widely in both normal and timeline syncobj, if you 
> think which increase patch size, I can do a separate patch for that.
>
>>
>>> +
>>> +struct drm_syncobj_wait_pt {
>>> +    struct drm_syncobj_stub_fence base;
>>> +    u64    value;
>>> +    struct rb_node   node;
>>> +};
>>> +struct drm_syncobj_signal_pt {
>>> +    struct drm_syncobj_stub_fence base;
>>> +    struct dma_fence *signal_fence;
>>> +    struct dma_fence *pre_pt_base;
>>> +    struct dma_fence_cb signal_cb;
>>> +    struct dma_fence_cb pre_pt_cb;
>>> +    struct drm_syncobj *syncobj;
>>> +    u64    value;
>>> +    struct list_head list;
>>> +};
>>
>> What are those two structures good for
> They are used to record wait ops points and signal ops points.
> For timeline, they are connected by timeline value, works like:
>     a. signal pt increase timeline value to signal_pt value when 
> signal_pt is signaled. signal_pt is depending on previous pt fence and 
> itself signal fence from signal ops.
>     b. wait pt tree checks if timeline value over itself value.
>
> For normal, works like:
>     a. signal pt increase 1 for syncobj_timeline->signal_point every 
> time when signal ops is performed.
>     b. when wait ops is comming, wait pt will fetch above last signal 
> pt value as its wait point. wait pt will be only signaled by equal 
> point value signal_pt.
>
>
>> and why is the stub fence their base?
> Good question, I tried to kzalloc for them as well when I debug them, 
> I encountered a problem:
> I lookup/find wait_pt or signal_pt successfully, but when I tried to 
> use them, they are freed sometimes, and results in NULL point.
> and generally, when lookup them, we often need their stub fence as 
> well, and in the meantime,  their lifecycle are same.
> Above reason, I make stub fence as their base.

That sounds like you only did this because you messed up the lifecycle.

Additional to that I don't think you correctly considered the lifecycle 
of the waits and the sync object itself. E.g. blocking in 
drm_syncobj_timeline_fini() until all waits are done is not a good idea.

What you should do instead is to create a fence_array object with all 
the fence we need to wait for when a wait point is requested.

Otherwise somebody can easily construct a situation where timeline sync 
obj A waits on B and B waits on A.

>
>>
>>> +
>>>   /**
>>>    * drm_syncobj_find - lookup and reference a sync object.
>>>    * @file_private: drm file private pointer
>>> @@ -82,59 +126,247 @@ struct drm_syncobj *drm_syncobj_find(struct 
>>> drm_file *file_private,
>>>   }
>>>   EXPORT_SYMBOL(drm_syncobj_find);
>>>   -static void drm_syncobj_add_callback_locked(struct drm_syncobj 
>>> *syncobj,
>>> -                        struct drm_syncobj_cb *cb,
>>> -                        drm_syncobj_func_t func)
>>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>>> +                      *syncobj_timeline)
>>>   {
>>> -    cb->func = func;
>>> -    list_add_tail(&cb->node, &syncobj->cb_list);
>>> +    syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>>> +    syncobj_timeline->timeline = 0;
>>> +    syncobj_timeline->signal_point = 0;
>>> +    init_waitqueue_head(&syncobj_timeline->wq);
>>> +
>>> +    syncobj_timeline->wait_pt_tree = RB_ROOT;
>>> +    INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>>   }
>>>   -static int drm_syncobj_fence_get_or_add_callback(struct 
>>> drm_syncobj *syncobj,
>>> -                         struct dma_fence **fence,
>>> -                         struct drm_syncobj_cb *cb,
>>> -                         drm_syncobj_func_t func)
>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>> +                      struct drm_syncobj_timeline *syncobj_timeline)
>>>   {
>>> -    int ret;
>>> +    struct rb_node *node = NULL;
>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>> +    struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>> +
>>> +    spin_lock(&syncobj->lock);
>>> +    for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>>> +        node != NULL; ) {
>>
>> Better use rbtree_postorder_for_each_entry_safe() for this.
> From the comments, seems we cannot use it here, we need erase node here.
> Comments:
>  * Note, however, that it cannot handle other modifications that 
> re-order the
>  * rbtree it is iterating over. This includes calling rb_erase() on 
> @pos, as
>  * rb_erase() may rebalance the tree, causing us to miss some nodes.
>  */

Yeah, but your current implementation has the same problem :)

You need to use something like "while (node = rb_first(...))" instead if 
you want to destruct the whole tree.

Regards,
Christian.

>
> Thanks,
> David Zhou
>>
>> Regards,
>> Christian.
>>
>>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> +        node = rb_next(node);
>>> +        rb_erase(&wait_pt->node,
>>> +             &syncobj_timeline->wait_pt_tree);
>>> +        RB_CLEAR_NODE(&wait_pt->node);
>>> +        spin_unlock(&syncobj->lock);
>>> +        dma_fence_wait(&wait_pt->base.base, true);
>>> +        spin_lock(&syncobj->lock);
>>> +        /* kfree(wait_pt) is excuted by fence put */
>>> +        dma_fence_put(&wait_pt->base.base);
>>> +    }
>>> +    list_for_each_entry_safe(signal_pt, tmp,
>>> +                 &syncobj_timeline->signal_pt_list, list) {
>>> +        list_del(&signal_pt->list);
>>> +        if (signal_pt->signal_fence) {
>>> + dma_fence_remove_callback(signal_pt->signal_fence,
>>> +                          &signal_pt->signal_cb);
>>> +            dma_fence_put(signal_pt->signal_fence);
>>> +        }
>>> +        if (signal_pt->pre_pt_base) {
>>> + dma_fence_remove_callback(signal_pt->pre_pt_base,
>>> +                          &signal_pt->pre_pt_cb);
>>> +            dma_fence_put(signal_pt->pre_pt_base);
>>> +        }
>>> +        dma_fence_put(&signal_pt->base.base);
>>> +    }
>>> +    spin_unlock(&syncobj->lock);
>>> +}
>>> +
>>>   -    *fence = drm_syncobj_fence_get(syncobj);
>>> -    if (*fence)
>>> -        return 1;
>>> +static bool drm_syncobj_normal_signal_wait_pts(struct drm_syncobj 
>>> *syncobj,
>>> +                           u64 signal_pt)
>>> +{
>>> +    struct rb_node *node = NULL;
>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>         spin_lock(&syncobj->lock);
>>> -    /* We've already tried once to get a fence and failed. Now that we
>>> -     * have the lock, try one more time just to be sure we don't add a
>>> -     * callback when a fence has already been set.
>>> -     */
>>> -    if (syncobj->fence) {
>>> -        *fence = 
>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>> - lockdep_is_held(&syncobj->lock)));
>>> -        ret = 1;
>>> -    } else {
>>> -        *fence = NULL;
>>> -        drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> -        ret = 0;
>>> +    for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>> +        node != NULL; ) {
>>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> +        node = rb_next(node);
>>> +        /* for normal syncobj */
>>> +        if (wait_pt->value == signal_pt) {
>>> +            dma_fence_signal(&wait_pt->base.base);
>>> +            rb_erase(&wait_pt->node,
>>> + &syncobj->syncobj_timeline.wait_pt_tree);
>>> +            RB_CLEAR_NODE(&wait_pt->node);
>>> +            /* kfree(wait_pt) is excuted by fence put */
>>> +            dma_fence_put(&wait_pt->base.base);
>>> +            spin_unlock(&syncobj->lock);
>>> +            return true;
>>> +        }
>>>       }
>>>       spin_unlock(&syncobj->lock);
>>> +    return false;
>>> +}
>>>   -    return ret;
>>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj 
>>> *syncobj)
>>> +{
>>> +    struct rb_node *node = NULL;
>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>> +
>>> +    spin_lock(&syncobj->lock);
>>> +    for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>> +        node != NULL; ) {
>>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> +        node = rb_next(node);
>>> +        if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
>>> +            dma_fence_signal(&wait_pt->base.base);
>>> +            rb_erase(&wait_pt->node,
>>> + &syncobj->syncobj_timeline.wait_pt_tree);
>>> +            RB_CLEAR_NODE(&wait_pt->node);
>>> +            /* kfree(wait_pt) is excuted by fence put */
>>> +            dma_fence_put(&wait_pt->base.base);
>>> +        } else {
>>> +            /* the loop is from left to right, the later entry 
>>> value is
>>> +             * bigger, so don't need to check any more */
>>> +            break;
>>> +        }
>>> +    }
>>> +    spin_unlock(&syncobj->lock);
>>>   }
>>>   -void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
>>> -                  struct drm_syncobj_cb *cb,
>>> -                  drm_syncobj_func_t func)
>>> +
>>> +
>>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>>>   {
>>> +    struct dma_fence *fence = NULL;
>>> +    struct drm_syncobj *syncobj;
>>> +    struct drm_syncobj_signal_pt *tail_pt;
>>> +    u64 pt_value = signal_pt->value;
>>> +
>>> +    dma_fence_signal(&signal_pt->base.base);
>>> +    fence = signal_pt->signal_fence;
>>> +    signal_pt->signal_fence = NULL;
>>> +    dma_fence_put(fence);
>>> +    fence = signal_pt->pre_pt_base;
>>> +    signal_pt->pre_pt_base = NULL;
>>> +    dma_fence_put(fence);
>>> +
>>> +    syncobj = signal_pt->syncobj;
>>>       spin_lock(&syncobj->lock);
>>> -    drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> +    syncobj->syncobj_timeline.timeline = pt_value;
>>> +    tail_pt = 
>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>> +                  struct drm_syncobj_signal_pt, list);
>>> +    if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL && signal_pt != 
>>> tail_pt)
>>> +        || syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> +        list_del(&signal_pt->list);
>>> +        /* kfree(signal_pt) will be  executed by below fence put */
>>> +        dma_fence_put(&signal_pt->base.base);
>>> +    }
>>>       spin_unlock(&syncobj->lock);
>>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>>> +        drm_syncobj_normal_signal_wait_pts(syncobj, pt_value);
>>> +    else
>>> +        drm_syncobj_timeline_signal_wait_pts(syncobj);
>>>   }
>>> +static void pt_signal_fence_func(struct dma_fence *fence,
>>> +                 struct dma_fence_cb *cb)
>>> +{
>>> +    struct drm_syncobj_signal_pt *signal_pt =
>>> +        container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
>>>   -void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>> -                 struct drm_syncobj_cb *cb)
>>> +    if (signal_pt->pre_pt_base &&
>>> +        !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>> +        return;
>>> +
>>> +    pt_fence_cb(signal_pt);
>>> +}
>>> +static void pt_pre_fence_func(struct dma_fence *fence,
>>> +                 struct dma_fence_cb *cb)
>>> +{
>>> +    struct drm_syncobj_signal_pt *signal_pt =
>>> +        container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
>>> +
>>> +    if (signal_pt->signal_fence &&
>>> +        !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>> +        return;
>>> +
>>> +    pt_fence_cb(signal_pt);
>>> +}
>>> +
>>> +static int drm_syncobj_timeline_create_signal_pt(struct drm_syncobj 
>>> *syncobj,
>>> +                         struct dma_fence *fence,
>>> +                         u64 point)
>>>   {
>>> +    struct drm_syncobj_signal_pt *signal_pt =
>>> +        kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>>> +    struct drm_syncobj_stub_fence *base;
>>> +    struct drm_syncobj_signal_pt *tail_pt;
>>> +    struct dma_fence *tail_pt_fence = NULL;
>>> +    int ret = 0;
>>> +
>>> +    if (!signal_pt)
>>> +        return -ENOMEM;
>>> +    if (syncobj->syncobj_timeline.signal_point >= point) {
>>> +        DRM_WARN("A later signal is ready!");
>>> +        goto out;
>>> +    }
>>> +    if (!fence)
>>> +        goto out;
>>> +    dma_fence_get(fence);
>>>       spin_lock(&syncobj->lock);
>>> -    list_del_init(&cb->node);
>>> +    base = &signal_pt->base;
>>> +    spin_lock_init(&base->lock);
>>> +    dma_fence_init(&base->base,
>>> +               &drm_syncobj_stub_fence_ops,
>>> +               &base->lock,
>>> +               syncobj->syncobj_timeline.timeline_context, point);
>>> +    signal_pt->signal_fence = fence;
>>> +    /* timeline syncobj must take this dependency */
>>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> +        if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>> +            tail_pt = 
>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>> +                          struct drm_syncobj_signal_pt, list);
>>> +            tail_pt_fence = &tail_pt->base.base;
>>> +            if (dma_fence_is_signaled(tail_pt_fence))
>>> +                tail_pt_fence = NULL;
>>> +            else
>>> +                signal_pt->pre_pt_base =
>>> +                    dma_fence_get(tail_pt_fence);
>>> +        }
>>> +    }
>>> +
>>> +    signal_pt->value = point;
>>> +    syncobj->syncobj_timeline.signal_point = point;
>>> +    signal_pt->syncobj = syncobj;
>>> +    INIT_LIST_HEAD(&signal_pt->list);
>>> +    list_add_tail(&signal_pt->list, 
>>> &syncobj->syncobj_timeline.signal_pt_list);
>>>       spin_unlock(&syncobj->lock);
>>> +    wake_up_all(&syncobj->syncobj_timeline.wq);
>>> +    /**
>>> +     * Every pt is depending on signal fence and previous pt fence, 
>>> add
>>> +     * callbacks to them
>>> +     */
>>> +    ret = dma_fence_add_callback(signal_pt->signal_fence,
>>> +                     &signal_pt->signal_cb,
>>> +                     pt_signal_fence_func);
>>> +
>>> +    if (signal_pt->pre_pt_base) {
>>> +        ret = dma_fence_add_callback(signal_pt->pre_pt_base,
>>> +                         &signal_pt->pre_pt_cb,
>>> +                         pt_pre_fence_func);
>>> +        if (ret == -ENOENT)
>>> +            pt_pre_fence_func(signal_pt->pre_pt_base,
>>> +                      &signal_pt->pre_pt_cb);
>>> +        else if (ret)
>>> +            goto out;
>>> +    } else if (ret == -ENOENT) {
>>> +        pt_signal_fence_func(signal_pt->signal_fence,
>>> +                     &signal_pt->signal_cb);
>>> +    } else if (ret) {
>>> +            goto out;
>>> +    }
>>> +
>>> +    return 0;
>>> +out:
>>> +    dma_fence_put(&signal_pt->base.base);
>>> +    return ret;
>>>   }
>>>     /**
>>> @@ -149,53 +381,30 @@ void drm_syncobj_replace_fence(struct 
>>> drm_syncobj *syncobj,
>>>                      u64 point,
>>>                      struct dma_fence *fence)
>>>   {
>>> -    struct dma_fence *old_fence;
>>> -    struct drm_syncobj_cb *cur, *tmp;
>>> -
>>> -    if (fence)
>>> -        dma_fence_get(fence);
>>> -
>>> -    spin_lock(&syncobj->lock);
>>> -
>>> -    old_fence = rcu_dereference_protected(syncobj->fence,
>>> - lockdep_is_held(&syncobj->lock));
>>> -    rcu_assign_pointer(syncobj->fence, fence);
>>> -
>>> -    if (fence != old_fence) {
>>> -        list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>>> -            list_del_init(&cur->node);
>>> -            cur->func(syncobj, cur);
>>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> +        if (fence)
>>> +            drm_syncobj_timeline_create_signal_pt(syncobj, fence,
>>> +                                  point);
>>> +        return;
>>> +    } else if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>> +        u64 pt_value;
>>> +
>>> +        if (!fence) {
>>> +            drm_syncobj_timeline_fini(syncobj,
>>> +                          &syncobj->syncobj_timeline);
>>> + drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>> +            return;
>>>           }
>>> +        pt_value = syncobj->syncobj_timeline.signal_point +
>>> +            DRM_SYNCOBJ_NORMAL_POINT;
>>> +        drm_syncobj_timeline_create_signal_pt(syncobj, fence, 
>>> pt_value);
>>> +        return;
>>> +    } else {
>>> +        DRM_ERROR("the syncobj type isn't support\n");
>>>       }
>>> -
>>> -    spin_unlock(&syncobj->lock);
>>> -
>>> -    dma_fence_put(old_fence);
>>>   }
>>>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>   -struct drm_syncobj_stub_fence {
>>> -    struct dma_fence base;
>>> -    spinlock_t lock;
>>> -};
>>> -
>>> -static const char *drm_syncobj_stub_fence_get_name(struct dma_fence 
>>> *fence)
>>> -{
>>> -        return "syncobjstub";
>>> -}
>>> -
>>> -static bool drm_syncobj_stub_fence_enable_signaling(struct 
>>> dma_fence *fence)
>>> -{
>>> -    return !dma_fence_is_signaled(fence);
>>> -}
>>> -
>>> -static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>> -    .get_driver_name = drm_syncobj_stub_fence_get_name,
>>> -    .get_timeline_name = drm_syncobj_stub_fence_get_name,
>>> -    .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
>>> -    .release = NULL,
>>> -};
>>> -
>>>   static int drm_syncobj_assign_null_handle(struct drm_syncobj 
>>> *syncobj)
>>>   {
>>>       struct drm_syncobj_stub_fence *fence;
>>> @@ -215,6 +424,143 @@ static int 
>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>       return 0;
>>>   }
>>>   +static struct drm_syncobj_wait_pt *
>>> +drm_syncobj_timeline_lookup_wait_pt_fence(struct drm_syncobj *syncobj,
>>> +                      u64 point,
>>> +                      struct dma_fence **fence)
>>> +{
>>> +    struct rb_node *node = 
>>> syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>> +
>>> +
>>> +    spin_lock(&syncobj->lock);
>>> +    while(node) {
>>> +        int result;
>>> +
>>> +        wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>> +        result = point - wait_pt->value;
>>> +        if (result < 0) {
>>> +            node = node->rb_left;
>>> +        } else if (result > 0) {
>>> +            node = node->rb_right;
>>> +        } else {
>>> +            if (fence)
>>> +            *fence = dma_fence_get(&wait_pt->base.base);
>>> +            break;
>>> +        }
>>> +    }
>>> +    spin_unlock(&syncobj->lock);
>>> +
>>> +    return wait_pt;
>>> +}
>>> +
>>> +static struct drm_syncobj_wait_pt *
>>> +drm_syncobj_timeline_create_wait_pt_and_return_fence(struct 
>>> drm_syncobj *syncobj,
>>> +                             u64 point,
>>> +                             struct dma_fence **fence)
>>> +{
>>> +    struct drm_syncobj_wait_pt *wait_pt = kzalloc(sizeof(*wait_pt),
>>> +                              GFP_KERNEL);
>>> +    struct drm_syncobj_stub_fence *base;
>>> +    struct rb_node **new = 
>>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>> +    struct drm_syncobj_signal_pt *tail_signal_pt =
>>> + list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>> +                struct drm_syncobj_signal_pt, list);
>>> +
>>> +    if (!wait_pt)
>>> +        return NULL;
>>> +    base = &wait_pt->base;
>>> +    spin_lock_init(&base->lock);
>>> +    dma_fence_init(&base->base,
>>> +               &drm_syncobj_stub_fence_ops,
>>> +               &base->lock,
>>> +               syncobj->syncobj_timeline.timeline_context, point);
>>> +    wait_pt->value = point;
>>> +
>>> +    /* wait pt must be in an order, so that we can easily lookup 
>>> and signal
>>> +     * it */
>>> +    spin_lock(&syncobj->lock);
>>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE &&
>>> +        point <= syncobj->syncobj_timeline.timeline)
>>> +        dma_fence_signal(&wait_pt->base.base);
>>> +    if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) &&
>>> +        (point == tail_signal_pt->value) &&
>>> + dma_fence_is_signaled(&tail_signal_pt->base.base))
>>> +        dma_fence_signal(&wait_pt->base.base);
>>> +    while(*new) {
>>> +        struct drm_syncobj_wait_pt *this =
>>> +            rb_entry(*new, struct drm_syncobj_wait_pt, node);
>>> +        int result = wait_pt->value - this->value;
>>> +
>>> +        parent = *new;
>>> +        if (result < 0)
>>> +            new = &((*new)->rb_left);
>>> +        else if (result > 0)
>>> +            new = &((*new)->rb_right);
>>> +        else
>>> +            goto exist;
>>> +    }
>>> +    if (fence)
>>> +        *fence = dma_fence_get(&wait_pt->base.base);
>>> +    rb_link_node(&wait_pt->node, parent, new);
>>> +    rb_insert_color(&wait_pt->node, 
>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>> +    spin_unlock(&syncobj->lock);
>>> +    return wait_pt;
>>> +exist:
>>> +    spin_unlock(&syncobj->lock);
>>> +    dma_fence_put(&wait_pt->base.base);
>>> +    wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj, 
>>> point,
>>> +                                fence);
>>> +    return wait_pt;
>>> +}
>>> +
>>> +static struct dma_fence *
>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 
>>> point, u64 flags)
>>> +{
>>> +    struct drm_syncobj_wait_pt *wait_pt;
>>> +    struct dma_fence *fence = NULL;
>>> +
>>> +    /* already signaled, simply return a signaled stub fence */
>>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE &&
>>> +        point <= syncobj->syncobj_timeline.timeline) {
>>> +        struct drm_syncobj_stub_fence *fence;
>>> +
>>> +        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>> +        if (fence == NULL)
>>> +            return NULL;
>>> +
>>> +        spin_lock_init(&fence->lock);
>>> +        dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
>>> +                   &fence->lock, 0, 0);
>>> +        dma_fence_signal(&fence->base);
>>> +        return &fence->base;
>>> +    }
>>> +
>>> +    /* check if the wait pt exists */
>>> +    wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj, 
>>> point,
>>> +                                &fence);
>>> +    if (!fence) {
>>> +        /* This is a new wait pt, so create it */
>>> +        wait_pt = 
>>> drm_syncobj_timeline_create_wait_pt_and_return_fence(syncobj, point,
>>> +                                  &fence);
>>> +        if (!fence)
>>> +            return NULL;
>>> +    }
>>> +    if (wait_pt) {
>>> +        int ret = 0;
>>> +
>>> +        if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> +            ret = 
>>> wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq,
>>> +                                   wait_pt->value <= 
>>> syncobj->syncobj_timeline.signal_point,
>>> +                                   msecs_to_jiffies(10000)); /* 
>>> wait 10s */
>>> +            if (ret <= 0)
>>> +                return NULL;
>>> +        }
>>> +        return fence;
>>> +    }
>>> +    return NULL;
>>> +}
>>> +
>>>   /**
>>>    * drm_syncobj_find_fence - lookup and reference the fence in a 
>>> sync object
>>>    * @file_private: drm file private pointer
>>> @@ -229,20 +575,45 @@ static int 
>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>    * contains a reference to the fence, which must be released by 
>>> calling
>>>    * dma_fence_put().
>>>    */
>>> -int drm_syncobj_find_fence(struct drm_file *file_private,
>>> -               u32 handle, u64 point,
>>> -               struct dma_fence **fence)
>>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>> +                 u64 flags, struct dma_fence **fence)
>>>   {
>>> -    struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>>> handle);
>>>       int ret = 0;
>>>         if (!syncobj)
>>>           return -ENOENT;
>>>   -    *fence = drm_syncobj_fence_get(syncobj);
>>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>> +        /*NORMAL syncobj always wait on last pt */
>>> +        u64 tail_pt_value = syncobj->syncobj_timeline.signal_point;
>>> +
>>> +        if (tail_pt_value == 0)
>>> +            tail_pt_value += DRM_SYNCOBJ_NORMAL_POINT;
>>> +        /* NORMAL syncobj doesn't care point value */
>>> +        WARN_ON(point != 0);
>>> +        *fence = drm_syncobj_timeline_point_get(syncobj, 
>>> tail_pt_value,
>>> +                            flags);
>>> +    } else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> +        *fence = drm_syncobj_timeline_point_get(syncobj, point,
>>> +                            flags);
>>> +    } else {
>>> +        DRM_ERROR("Don't support this type syncobj\n");
>>> +        *fence = NULL;
>>> +    }
>>>       if (!*fence) {
>>>           ret = -EINVAL;
>>>       }
>>> +    return ret;
>>> +}
>>> +EXPORT_SYMBOL(drm_syncobj_search_fence);
>>> +int drm_syncobj_find_fence(struct drm_file *file_private,
>>> +               u32 handle, u64 point,
>>> +               struct dma_fence **fence) {
>>> +    struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>>> handle);
>>> +
>>> +    int ret = drm_syncobj_search_fence(syncobj, point,
>>> +                    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>>> +                    fence);
>>>       drm_syncobj_put(syncobj);
>>>       return ret;
>>>   }
>>> @@ -259,7 +630,7 @@ void drm_syncobj_free(struct kref *kref)
>>>       struct drm_syncobj *syncobj = container_of(kref,
>>>                              struct drm_syncobj,
>>>                              refcount);
>>> -    drm_syncobj_replace_fence(syncobj, 0, NULL);
>>> +    drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>>       kfree(syncobj);
>>>   }
>>>   EXPORT_SYMBOL(drm_syncobj_free);
>>> @@ -287,8 +658,12 @@ int drm_syncobj_create(struct drm_syncobj 
>>> **out_syncobj, uint32_t flags,
>>>           return -ENOMEM;
>>>         kref_init(&syncobj->refcount);
>>> -    INIT_LIST_HEAD(&syncobj->cb_list);
>>>       spin_lock_init(&syncobj->lock);
>>> +    if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
>>> +        syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>> +    else
>>> +        syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>> + drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>         if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>           ret = drm_syncobj_assign_null_handle(syncobj);
>>> @@ -646,7 +1021,6 @@ struct syncobj_wait_entry {
>>>       struct task_struct *task;
>>>       struct dma_fence *fence;
>>>       struct dma_fence_cb fence_cb;
>>> -    struct drm_syncobj_cb syncobj_cb;
>>>   };
>>>     static void syncobj_wait_fence_func(struct dma_fence *fence,
>>> @@ -658,18 +1032,6 @@ static void syncobj_wait_fence_func(struct 
>>> dma_fence *fence,
>>>       wake_up_process(wait->task);
>>>   }
>>>   -static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>> -                      struct drm_syncobj_cb *cb)
>>> -{
>>> -    struct syncobj_wait_entry *wait =
>>> -        container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>> -
>>> -    /* This happens inside the syncobj lock */
>>> -    wait->fence = 
>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>> - lockdep_is_held(&syncobj->lock)));
>>> -    wake_up_process(wait->task);
>>> -}
>>> -
>>>   static signed long drm_syncobj_array_wait_timeout(struct 
>>> drm_syncobj **syncobjs,
>>>                             uint32_t count,
>>>                             uint32_t flags,
>>> @@ -693,14 +1055,11 @@ static signed long 
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>       signaled_count = 0;
>>>       for (i = 0; i < count; ++i) {
>>>           entries[i].task = current;
>>> -        entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>>> +        ret = drm_syncobj_search_fence(syncobjs[i], 0, 0,
>>> +                           &entries[i].fence);
>>>           if (!entries[i].fence) {
>>> -            if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> -                continue;
>>> -            } else {
>>> -                ret = -EINVAL;
>>> -                goto cleanup_entries;
>>> -            }
>>> +            ret = -EINVAL;
>>> +            goto cleanup_entries;
>>>           }
>>>             if (dma_fence_is_signaled(entries[i].fence)) {
>>> @@ -728,15 +1087,6 @@ static signed long 
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>        * fallthough and try a 0 timeout wait!
>>>        */
>>>   -    if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> -        for (i = 0; i < count; ++i) {
>>> - drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>> -                                  &entries[i].fence,
>>> -                                  &entries[i].syncobj_cb,
>>> -                                  syncobj_wait_syncobj_func);
>>> -        }
>>> -    }
>>> -
>>>       do {
>>>           set_current_state(TASK_INTERRUPTIBLE);
>>>   @@ -784,13 +1134,10 @@ static signed long 
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>     cleanup_entries:
>>>       for (i = 0; i < count; ++i) {
>>> -        if (entries[i].syncobj_cb.func)
>>> -            drm_syncobj_remove_callback(syncobjs[i],
>>> -                            &entries[i].syncobj_cb);
>>> +        dma_fence_put(entries[i].fence);
>>>           if (entries[i].fence_cb.func)
>>>               dma_fence_remove_callback(entries[i].fence,
>>>                             &entries[i].fence_cb);
>>> -        dma_fence_put(entries[i].fence);
>>>       }
>>>       kfree(entries);
>>>   @@ -965,12 +1312,20 @@ drm_syncobj_reset_ioctl(struct drm_device 
>>> *dev, void *data,
>>>       if (ret < 0)
>>>           return ret;
>>>   -    for (i = 0; i < args->count_handles; i++)
>>> -        drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
>>> -
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        if (syncobjs[i]->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> +            DRM_ERROR("timeline syncobj cannot reset!\n");
>>> +            ret = -EINVAL;
>>> +            goto out;
>>> +        }
>>> +        drm_syncobj_timeline_fini(syncobjs[i],
>>> +                      &syncobjs[i]->syncobj_timeline);
>>> + drm_syncobj_timeline_init(&syncobjs[i]->syncobj_timeline);
>>> +    }
>>> +out:
>>>       drm_syncobj_array_free(syncobjs, args->count_handles);
>>>   -    return 0;
>>> +    return ret;
>>>   }
>>>     int
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
>>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> index 7209dd832d39..bb20d318c9d6 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> @@ -2182,7 +2182,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>>           if (!(flags & I915_EXEC_FENCE_WAIT))
>>>               continue;
>>>   -        fence = drm_syncobj_fence_get(syncobj);
>>> +        drm_syncobj_search_fence(syncobj, 0,
>>> +                     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>>> +                     &fence);
>>>           if (!fence)
>>>               return -EINVAL;
>>>   diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>> index 425432b85a87..657c97dc25ec 100644
>>> --- a/include/drm/drm_syncobj.h
>>> +++ b/include/drm/drm_syncobj.h
>>> @@ -30,6 +30,25 @@
>>>     struct drm_syncobj_cb;
>>>   +enum drm_syncobj_type {
>>> +    DRM_SYNCOBJ_TYPE_NORMAL,
>>> +    DRM_SYNCOBJ_TYPE_TIMELINE
>>> +};
>>> +
>>> +struct drm_syncobj_timeline {
>>> +    wait_queue_head_t    wq;
>>> +    u64 timeline_context;
>>> +    /**
>>> +     * @timeline: syncobj timeline
>>> +     */
>>> +    u64 timeline;
>>> +    u64 signal_point;
>>> +
>>> +
>>> +    struct rb_root wait_pt_tree;
>>> +    struct list_head signal_pt_list;
>>> +};
>>> +
>>>   /**
>>>    * struct drm_syncobj - sync object.
>>>    *
>>> @@ -41,19 +60,16 @@ struct drm_syncobj {
>>>        */
>>>       struct kref refcount;
>>>       /**
>>> -     * @fence:
>>> -     * NULL or a pointer to the fence bound to this object.
>>> -     *
>>> -     * This field should not be used directly. Use 
>>> drm_syncobj_fence_get()
>>> -     * and drm_syncobj_replace_fence() instead.
>>> +     * @type: indicate syncobj type
>>>        */
>>> -    struct dma_fence __rcu *fence;
>>> +    enum drm_syncobj_type type;
>>>       /**
>>> -     * @cb_list: List of callbacks to call when the &fence gets 
>>> replaced.
>>> +     * @syncobj_timeline: timeline
>>>        */
>>> -    struct list_head cb_list;
>>> +    struct drm_syncobj_timeline syncobj_timeline;
>>> +
>>>       /**
>>> -     * @lock: Protects &cb_list and write-locks &fence.
>>> +     * @lock: Protects timeline list and write-locks &fence.
>>>        */
>>>       spinlock_t lock;
>>>       /**
>>> @@ -62,25 +78,6 @@ struct drm_syncobj {
>>>       struct file *file;
>>>   };
>>>   -typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
>>> -                   struct drm_syncobj_cb *cb);
>>> -
>>> -/**
>>> - * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>>> - * @node: used by drm_syncob_add_callback to append this struct to
>>> - *      &drm_syncobj.cb_list
>>> - * @func: drm_syncobj_func_t to call
>>> - *
>>> - * This struct will be initialized by drm_syncobj_add_callback, 
>>> additional
>>> - * data can be passed along by embedding drm_syncobj_cb in another 
>>> struct.
>>> - * The callback will get called the next time 
>>> drm_syncobj_replace_fence is
>>> - * called.
>>> - */
>>> -struct drm_syncobj_cb {
>>> -    struct list_head node;
>>> -    drm_syncobj_func_t func;
>>> -};
>>> -
>>>   void drm_syncobj_free(struct kref *kref);
>>>     /**
>>> @@ -106,29 +103,6 @@ drm_syncobj_put(struct drm_syncobj *obj)
>>>       kref_put(&obj->refcount, drm_syncobj_free);
>>>   }
>>>   -/**
>>> - * drm_syncobj_fence_get - get a reference to a fence in a sync object
>>> - * @syncobj: sync object.
>>> - *
>>> - * This acquires additional reference to &drm_syncobj.fence 
>>> contained in @obj,
>>> - * if not NULL. It is illegal to call this without already holding 
>>> a reference.
>>> - * No locks required.
>>> - *
>>> - * Returns:
>>> - * Either the fence of @obj or NULL if there's none.
>>> - */
>>> -static inline struct dma_fence *
>>> -drm_syncobj_fence_get(struct drm_syncobj *syncobj)
>>> -{
>>> -    struct dma_fence *fence;
>>> -
>>> -    rcu_read_lock();
>>> -    fence = dma_fence_get_rcu_safe(&syncobj->fence);
>>> -    rcu_read_unlock();
>>> -
>>> -    return fence;
>>> -}
>>> -
>>>   struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>>                        u32 handle);
>>>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 
>>> point,
>>> @@ -142,5 +116,7 @@ int drm_syncobj_create(struct drm_syncobj 
>>> **out_syncobj, uint32_t flags,
>>>   int drm_syncobj_get_handle(struct drm_file *file_private,
>>>                  struct drm_syncobj *syncobj, u32 *handle);
>>>   int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
>>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>> +                 u64 flags, struct dma_fence **fence);
>>>     #endif
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 300f336633f2..cebdb2541eb7 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -717,6 +717,7 @@ struct drm_prime_handle {
>>>   struct drm_syncobj_create {
>>>       __u32 handle;
>>>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
>>>       __u32 flags;
>>>   };
>>
>



More information about the amd-gfx mailing list