[PATCH 5/5] [RFC]drm: add syncobj timeline support v3
zhoucm1
zhoucm1 at amd.com
Thu Aug 30 10:40:13 UTC 2018
On 2018年08月30日 15:25, Christian König wrote:
> Am 30.08.2018 um 05:50 schrieb zhoucm1:
>>
>>
>> On 2018年08月29日 19:42, Christian König wrote:
>>> Am 29.08.2018 um 12:44 schrieb Chunming Zhou:
>>>> VK_KHR_timeline_semaphore:
>>>> This extension introduces a new type of semaphore that has an
>>>> integer payload
>>>> identifying a point in a timeline. Such timeline semaphores support
>>>> the
>>>> following operations:
>>>> * CPU query - A host operation that allows querying the payload
>>>> of the
>>>> timeline semaphore.
>>>> * CPU wait - A host operation that allows a blocking wait for a
>>>> timeline semaphore to reach a specified value.
>>>> * Device wait - A device operation that allows waiting for a
>>>> timeline semaphore to reach a specified value.
>>>> * Device signal - A device operation that allows advancing the
>>>> timeline semaphore to a specified value.
>>>>
>>>> Since it's a timeline, that means the front time point(PT) always
>>>> is signaled before the late PT.
>>>> a. signal PT design:
>>>> Signal PT fence N depends on PT[N-1] fence and signal opertion
>>>> fence, when PT[N] fence is signaled,
>>>> the timeline will increase to value of PT[N].
>>>> b. wait PT design:
>>>> Wait PT fence is signaled by reaching timeline point value, when
>>>> timeline is increasing, will compare
>>>> wait PTs value with new timeline value, if PT value is lower than
>>>> timeline value, then wait PT will be
>>>> signaled, otherwise keep in list. semaphore wait operation can wait
>>>> on any point of timeline,
>>>> so need a RB tree to order them. And wait PT could ahead of signal
>>>> PT, we need a sumission fence to
>>>> perform that.
>>>>
>>>> v2:
>>>> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
>>>> 2. move unexposed denitions to .c file. (Daniel Vetter)
>>>> 3. split up the change to drm_syncobj_find_fence() in a separate
>>>> patch. (Christian)
>>>> 4. split up the change to drm_syncobj_replace_fence() in a separate
>>>> patch.
>>>> 5. drop the submission_fence implementation and instead use
>>>> wait_event() for that. (Christian)
>>>> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)
>>>>
>>>> v3:
>>>> 1. replace normal syncobj with timeline implemenation. (Vetter and
>>>> Christian)
>>>> a. normal syncobj signal op will create a signal PT to tail of
>>>> signal pt list.
>>>> b. normal syncobj wait op will create a wait pt with last
>>>> signal point, and this wait PT is only signaled by related signal
>>>> point PT.
>>>> 2. some bug fix and clean up
>>>> 3. tested by ./deqp-vk -n dEQP-VK*semaphore* for normal syncobj
>>>>
>>>> TODO:
>>>> 1. CPU query and wait on timeline semaphore.
>>>>
>>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>>> Cc: Christian Konig <christian.koenig at amd.com>
>>>> Cc: Dave Airlie <airlied at redhat.com>
>>>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>>>> Cc: Daniel Vetter <daniel at ffwll.ch>
>>>> ---
>>>> drivers/gpu/drm/drm_syncobj.c | 593
>>>> ++++++++++++++++-----
>>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-
>>>> include/drm/drm_syncobj.h | 78 +--
>>>> include/uapi/drm/drm.h | 1 +
>>>> 4 files changed, 505 insertions(+), 171 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>> index ab43559398d0..f701d9ef1b81 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -56,6 +56,50 @@
>>>> #include "drm_internal.h"
>>>> #include <drm/drm_syncobj.h>
>>>> +/* merge normal syncobj to timeline syncobj, the point interval
>>>> is 1 */
>>>> +#define DRM_SYNCOBJ_NORMAL_POINT 1
>>>> +
>>>> +struct drm_syncobj_stub_fence {
>>>> + struct dma_fence base;
>>>> + spinlock_t lock;
>>>> +};
>>>> +
>>>> +static const char *drm_syncobj_stub_fence_get_name(struct
>>>> dma_fence *fence)
>>>> +{
>>>> + return "syncobjstub";
>>>> +}
>>>> +
>>>> +static bool drm_syncobj_stub_fence_enable_signaling(struct
>>>> dma_fence *fence)
>>>> +{
>>>> + return !dma_fence_is_signaled(fence);
>>>> +}
>>>> +static void drm_syncobj_stub_fence_release(struct dma_fence *f)
>>>> +{
>>>> + kfree(f);
>>>> +}
>>>> +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>>> + .get_driver_name = drm_syncobj_stub_fence_get_name,
>>>> + .get_timeline_name = drm_syncobj_stub_fence_get_name,
>>>> + .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
>>>> + .release = drm_syncobj_stub_fence_release,
>>>> +};
>>>
>>> Do we really need to move that around? Could reduce the size of the
>>> patch quite a bit if we don't.
>>
>> stub fence is used widely in both normal and timeline syncobj, if you
>> think which increase patch size, I can do a separate patch for that.
>>
>>>
>>>> +
>>>> +struct drm_syncobj_wait_pt {
>>>> + struct drm_syncobj_stub_fence base;
>>>> + u64 value;
>>>> + struct rb_node node;
>>>> +};
>>>> +struct drm_syncobj_signal_pt {
>>>> + struct drm_syncobj_stub_fence base;
>>>> + struct dma_fence *signal_fence;
>>>> + struct dma_fence *pre_pt_base;
>>>> + struct dma_fence_cb signal_cb;
>>>> + struct dma_fence_cb pre_pt_cb;
>>>> + struct drm_syncobj *syncobj;
>>>> + u64 value;
>>>> + struct list_head list;
>>>> +};
>>>
>>> What are those two structures good for
>> They are used to record wait ops points and signal ops points.
>> For timeline, they are connected by timeline value, works like:
>> a. signal pt increase timeline value to signal_pt value when
>> signal_pt is signaled. signal_pt is depending on previous pt fence
>> and itself signal fence from signal ops.
>> b. wait pt tree checks if timeline value over itself value.
>>
>> For normal, works like:
>> a. signal pt increase 1 for syncobj_timeline->signal_point every
>> time when signal ops is performed.
>> b. when wait ops is comming, wait pt will fetch above last signal
>> pt value as its wait point. wait pt will be only signaled by equal
>> point value signal_pt.
>>
>>
>>> and why is the stub fence their base?
>> Good question, I tried to kzalloc for them as well when I debug them,
>> I encountered a problem:
>> I lookup/find wait_pt or signal_pt successfully, but when I tried to
>> use them, they are freed sometimes, and results in NULL point.
>> and generally, when lookup them, we often need their stub fence as
>> well, and in the meantime, their lifecycle are same.
>> Above reason, I make stub fence as their base.
>
> That sounds like you only did this because you messed up the lifecycle.
>
> Additional to that I don't think you correctly considered the
> lifecycle of the waits and the sync object itself. E.g. blocking in
> drm_syncobj_timeline_fini() until all waits are done is not a good idea.
>
> What you should do instead is to create a fence_array object with all
> the fence we need to wait for when a wait point is requested.
Yeah, this is our initial discussion result, but when I tried to do
that, I found that cannot meet the advance signal requirement:
a. We need to consider the wait and signal pt value are not
one-to-one match, it's difficult to find dependent point, at least,
there is some overhead.
b. because we allowed "wait-before-signal", if "wait-before-signal"
happens, there isn't signal fence which can be used to create fence array.
So timeline value is good to resolve that.
>
> Otherwise somebody can easily construct a situation where timeline
> sync obj A waits on B and B waits on A.
Same situation also can happens with fence-array, we only can see this
is a programming bug with incorrect use.
>
>>
>>>
>>>> +
>>>> /**
>>>> * drm_syncobj_find - lookup and reference a sync object.
>>>> * @file_private: drm file private pointer
>>>> @@ -82,59 +126,247 @@ struct drm_syncobj *drm_syncobj_find(struct
>>>> drm_file *file_private,
>>>> }
>>>> EXPORT_SYMBOL(drm_syncobj_find);
>>>> -static void drm_syncobj_add_callback_locked(struct drm_syncobj
>>>> *syncobj,
>>>> - struct drm_syncobj_cb *cb,
>>>> - drm_syncobj_func_t func)
>>>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>>>> + *syncobj_timeline)
>>>> {
>>>> - cb->func = func;
>>>> - list_add_tail(&cb->node, &syncobj->cb_list);
>>>> + syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>>>> + syncobj_timeline->timeline = 0;
>>>> + syncobj_timeline->signal_point = 0;
>>>> + init_waitqueue_head(&syncobj_timeline->wq);
>>>> +
>>>> + syncobj_timeline->wait_pt_tree = RB_ROOT;
>>>> + INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>>> }
>>>> -static int drm_syncobj_fence_get_or_add_callback(struct
>>>> drm_syncobj *syncobj,
>>>> - struct dma_fence **fence,
>>>> - struct drm_syncobj_cb *cb,
>>>> - drm_syncobj_func_t func)
>>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>>> + struct drm_syncobj_timeline *syncobj_timeline)
>>>> {
>>>> - int ret;
>>>> + struct rb_node *node = NULL;
>>>> + struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> + struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>>> +
>>>> + spin_lock(&syncobj->lock);
>>>> + for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>>>> + node != NULL; ) {
>>>
>>> Better use rbtree_postorder_for_each_entry_safe() for this.
>> From the comments, seems we cannot use it here, we need erase node here.
>> Comments:
>> * Note, however, that it cannot handle other modifications that
>> re-order the
>> * rbtree it is iterating over. This includes calling rb_erase() on
>> @pos, as
>> * rb_erase() may rebalance the tree, causing us to miss some nodes.
>> */
>
> Yeah, but your current implementation has the same problem :)
>
> You need to use something like "while (node = rb_first(...))" instead
> if you want to destruct the whole tree.
>
> Regards,
> Christian.
>
>>
>> Thanks,
>> David Zhou
>>>
>>> Regards,
>>> Christian.
>>>
>>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> + node = rb_next(node);
I already get the next node before erasing this node, so the "for (..."
sentence is equal with "while (...)"
Thanks,
David Zhou
>>>> + rb_erase(&wait_pt->node,
>>>> + &syncobj_timeline->wait_pt_tree);
>>>> + RB_CLEAR_NODE(&wait_pt->node);
>>>> + spin_unlock(&syncobj->lock);
>>>> + dma_fence_wait(&wait_pt->base.base, true);
>>>> + spin_lock(&syncobj->lock);
>>>> + /* kfree(wait_pt) is excuted by fence put */
>>>> + dma_fence_put(&wait_pt->base.base);
>>>> + }
>>>> + list_for_each_entry_safe(signal_pt, tmp,
>>>> + &syncobj_timeline->signal_pt_list, list) {
>>>> + list_del(&signal_pt->list);
>>>> + if (signal_pt->signal_fence) {
>>>> + dma_fence_remove_callback(signal_pt->signal_fence,
>>>> + &signal_pt->signal_cb);
>>>> + dma_fence_put(signal_pt->signal_fence);
>>>> + }
>>>> + if (signal_pt->pre_pt_base) {
>>>> + dma_fence_remove_callback(signal_pt->pre_pt_base,
>>>> + &signal_pt->pre_pt_cb);
>>>> + dma_fence_put(signal_pt->pre_pt_base);
>>>> + }
>>>> + dma_fence_put(&signal_pt->base.base);
>>>> + }
>>>> + spin_unlock(&syncobj->lock);
>>>> +}
>>>> +
>>>> - *fence = drm_syncobj_fence_get(syncobj);
>>>> - if (*fence)
>>>> - return 1;
>>>> +static bool drm_syncobj_normal_signal_wait_pts(struct drm_syncobj
>>>> *syncobj,
>>>> + u64 signal_pt)
>>>> +{
>>>> + struct rb_node *node = NULL;
>>>> + struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> spin_lock(&syncobj->lock);
>>>> - /* We've already tried once to get a fence and failed. Now
>>>> that we
>>>> - * have the lock, try one more time just to be sure we don't
>>>> add a
>>>> - * callback when a fence has already been set.
>>>> - */
>>>> - if (syncobj->fence) {
>>>> - *fence =
>>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>>> - lockdep_is_held(&syncobj->lock)));
>>>> - ret = 1;
>>>> - } else {
>>>> - *fence = NULL;
>>>> - drm_syncobj_add_callback_locked(syncobj, cb, func);
>>>> - ret = 0;
>>>> + for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>> + node != NULL; ) {
>>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> + node = rb_next(node);
>>>> + /* for normal syncobj */
>>>> + if (wait_pt->value == signal_pt) {
>>>> + dma_fence_signal(&wait_pt->base.base);
>>>> + rb_erase(&wait_pt->node,
>>>> + &syncobj->syncobj_timeline.wait_pt_tree);
>>>> + RB_CLEAR_NODE(&wait_pt->node);
>>>> + /* kfree(wait_pt) is excuted by fence put */
>>>> + dma_fence_put(&wait_pt->base.base);
>>>> + spin_unlock(&syncobj->lock);
>>>> + return true;
>>>> + }
>>>> }
>>>> spin_unlock(&syncobj->lock);
>>>> + return false;
>>>> +}
>>>> - return ret;
>>>> +static void drm_syncobj_timeline_signal_wait_pts(struct
>>>> drm_syncobj *syncobj)
>>>> +{
>>>> + struct rb_node *node = NULL;
>>>> + struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> + spin_lock(&syncobj->lock);
>>>> + for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>> + node != NULL; ) {
>>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> + node = rb_next(node);
>>>> + if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
>>>> + dma_fence_signal(&wait_pt->base.base);
>>>> + rb_erase(&wait_pt->node,
>>>> + &syncobj->syncobj_timeline.wait_pt_tree);
>>>> + RB_CLEAR_NODE(&wait_pt->node);
>>>> + /* kfree(wait_pt) is excuted by fence put */
>>>> + dma_fence_put(&wait_pt->base.base);
>>>> + } else {
>>>> + /* the loop is from left to right, the later entry
>>>> value is
>>>> + * bigger, so don't need to check any more */
>>>> + break;
>>>> + }
>>>> + }
>>>> + spin_unlock(&syncobj->lock);
>>>> }
>>>> -void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
>>>> - struct drm_syncobj_cb *cb,
>>>> - drm_syncobj_func_t func)
>>>> +
>>>> +
>>>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>>>> {
>>>> + struct dma_fence *fence = NULL;
>>>> + struct drm_syncobj *syncobj;
>>>> + struct drm_syncobj_signal_pt *tail_pt;
>>>> + u64 pt_value = signal_pt->value;
>>>> +
>>>> + dma_fence_signal(&signal_pt->base.base);
>>>> + fence = signal_pt->signal_fence;
>>>> + signal_pt->signal_fence = NULL;
>>>> + dma_fence_put(fence);
>>>> + fence = signal_pt->pre_pt_base;
>>>> + signal_pt->pre_pt_base = NULL;
>>>> + dma_fence_put(fence);
>>>> +
>>>> + syncobj = signal_pt->syncobj;
>>>> spin_lock(&syncobj->lock);
>>>> - drm_syncobj_add_callback_locked(syncobj, cb, func);
>>>> + syncobj->syncobj_timeline.timeline = pt_value;
>>>> + tail_pt =
>>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>> + struct drm_syncobj_signal_pt, list);
>>>> + if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL && signal_pt !=
>>>> tail_pt)
>>>> + || syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> + list_del(&signal_pt->list);
>>>> + /* kfree(signal_pt) will be executed by below fence put */
>>>> + dma_fence_put(&signal_pt->base.base);
>>>> + }
>>>> spin_unlock(&syncobj->lock);
>>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>>>> + drm_syncobj_normal_signal_wait_pts(syncobj, pt_value);
>>>> + else
>>>> + drm_syncobj_timeline_signal_wait_pts(syncobj);
>>>> }
>>>> +static void pt_signal_fence_func(struct dma_fence *fence,
>>>> + struct dma_fence_cb *cb)
>>>> +{
>>>> + struct drm_syncobj_signal_pt *signal_pt =
>>>> + container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
>>>> -void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>>> - struct drm_syncobj_cb *cb)
>>>> + if (signal_pt->pre_pt_base &&
>>>> + !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> + return;
>>>> +
>>>> + pt_fence_cb(signal_pt);
>>>> +}
>>>> +static void pt_pre_fence_func(struct dma_fence *fence,
>>>> + struct dma_fence_cb *cb)
>>>> +{
>>>> + struct drm_syncobj_signal_pt *signal_pt =
>>>> + container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
>>>> +
>>>> + if (signal_pt->signal_fence &&
>>>> + !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> + return;
>>>> +
>>>> + pt_fence_cb(signal_pt);
>>>> +}
>>>> +
>>>> +static int drm_syncobj_timeline_create_signal_pt(struct
>>>> drm_syncobj *syncobj,
>>>> + struct dma_fence *fence,
>>>> + u64 point)
>>>> {
>>>> + struct drm_syncobj_signal_pt *signal_pt =
>>>> + kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>>>> + struct drm_syncobj_stub_fence *base;
>>>> + struct drm_syncobj_signal_pt *tail_pt;
>>>> + struct dma_fence *tail_pt_fence = NULL;
>>>> + int ret = 0;
>>>> +
>>>> + if (!signal_pt)
>>>> + return -ENOMEM;
>>>> + if (syncobj->syncobj_timeline.signal_point >= point) {
>>>> + DRM_WARN("A later signal is ready!");
>>>> + goto out;
>>>> + }
>>>> + if (!fence)
>>>> + goto out;
>>>> + dma_fence_get(fence);
>>>> spin_lock(&syncobj->lock);
>>>> - list_del_init(&cb->node);
>>>> + base = &signal_pt->base;
>>>> + spin_lock_init(&base->lock);
>>>> + dma_fence_init(&base->base,
>>>> + &drm_syncobj_stub_fence_ops,
>>>> + &base->lock,
>>>> + syncobj->syncobj_timeline.timeline_context, point);
>>>> + signal_pt->signal_fence = fence;
>>>> + /* timeline syncobj must take this dependency */
>>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> + if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>>> + tail_pt =
>>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>> + struct drm_syncobj_signal_pt, list);
>>>> + tail_pt_fence = &tail_pt->base.base;
>>>> + if (dma_fence_is_signaled(tail_pt_fence))
>>>> + tail_pt_fence = NULL;
>>>> + else
>>>> + signal_pt->pre_pt_base =
>>>> + dma_fence_get(tail_pt_fence);
>>>> + }
>>>> + }
>>>> +
>>>> + signal_pt->value = point;
>>>> + syncobj->syncobj_timeline.signal_point = point;
>>>> + signal_pt->syncobj = syncobj;
>>>> + INIT_LIST_HEAD(&signal_pt->list);
>>>> + list_add_tail(&signal_pt->list,
>>>> &syncobj->syncobj_timeline.signal_pt_list);
>>>> spin_unlock(&syncobj->lock);
>>>> + wake_up_all(&syncobj->syncobj_timeline.wq);
>>>> + /**
>>>> + * Every pt is depending on signal fence and previous pt
>>>> fence, add
>>>> + * callbacks to them
>>>> + */
>>>> + ret = dma_fence_add_callback(signal_pt->signal_fence,
>>>> + &signal_pt->signal_cb,
>>>> + pt_signal_fence_func);
>>>> +
>>>> + if (signal_pt->pre_pt_base) {
>>>> + ret = dma_fence_add_callback(signal_pt->pre_pt_base,
>>>> + &signal_pt->pre_pt_cb,
>>>> + pt_pre_fence_func);
>>>> + if (ret == -ENOENT)
>>>> + pt_pre_fence_func(signal_pt->pre_pt_base,
>>>> + &signal_pt->pre_pt_cb);
>>>> + else if (ret)
>>>> + goto out;
>>>> + } else if (ret == -ENOENT) {
>>>> + pt_signal_fence_func(signal_pt->signal_fence,
>>>> + &signal_pt->signal_cb);
>>>> + } else if (ret) {
>>>> + goto out;
>>>> + }
>>>> +
>>>> + return 0;
>>>> +out:
>>>> + dma_fence_put(&signal_pt->base.base);
>>>> + return ret;
>>>> }
>>>> /**
>>>> @@ -149,53 +381,30 @@ void drm_syncobj_replace_fence(struct
>>>> drm_syncobj *syncobj,
>>>> u64 point,
>>>> struct dma_fence *fence)
>>>> {
>>>> - struct dma_fence *old_fence;
>>>> - struct drm_syncobj_cb *cur, *tmp;
>>>> -
>>>> - if (fence)
>>>> - dma_fence_get(fence);
>>>> -
>>>> - spin_lock(&syncobj->lock);
>>>> -
>>>> - old_fence = rcu_dereference_protected(syncobj->fence,
>>>> - lockdep_is_held(&syncobj->lock));
>>>> - rcu_assign_pointer(syncobj->fence, fence);
>>>> -
>>>> - if (fence != old_fence) {
>>>> - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>>>> - list_del_init(&cur->node);
>>>> - cur->func(syncobj, cur);
>>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> + if (fence)
>>>> + drm_syncobj_timeline_create_signal_pt(syncobj, fence,
>>>> + point);
>>>> + return;
>>>> + } else if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>>> + u64 pt_value;
>>>> +
>>>> + if (!fence) {
>>>> + drm_syncobj_timeline_fini(syncobj,
>>>> + &syncobj->syncobj_timeline);
>>>> + drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>> + return;
>>>> }
>>>> + pt_value = syncobj->syncobj_timeline.signal_point +
>>>> + DRM_SYNCOBJ_NORMAL_POINT;
>>>> + drm_syncobj_timeline_create_signal_pt(syncobj, fence,
>>>> pt_value);
>>>> + return;
>>>> + } else {
>>>> + DRM_ERROR("the syncobj type isn't support\n");
>>>> }
>>>> -
>>>> - spin_unlock(&syncobj->lock);
>>>> -
>>>> - dma_fence_put(old_fence);
>>>> }
>>>> EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>> -struct drm_syncobj_stub_fence {
>>>> - struct dma_fence base;
>>>> - spinlock_t lock;
>>>> -};
>>>> -
>>>> -static const char *drm_syncobj_stub_fence_get_name(struct
>>>> dma_fence *fence)
>>>> -{
>>>> - return "syncobjstub";
>>>> -}
>>>> -
>>>> -static bool drm_syncobj_stub_fence_enable_signaling(struct
>>>> dma_fence *fence)
>>>> -{
>>>> - return !dma_fence_is_signaled(fence);
>>>> -}
>>>> -
>>>> -static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>>> - .get_driver_name = drm_syncobj_stub_fence_get_name,
>>>> - .get_timeline_name = drm_syncobj_stub_fence_get_name,
>>>> - .enable_signaling = drm_syncobj_stub_fence_enable_signaling,
>>>> - .release = NULL,
>>>> -};
>>>> -
>>>> static int drm_syncobj_assign_null_handle(struct drm_syncobj
>>>> *syncobj)
>>>> {
>>>> struct drm_syncobj_stub_fence *fence;
>>>> @@ -215,6 +424,143 @@ static int
>>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>> return 0;
>>>> }
>>>> +static struct drm_syncobj_wait_pt *
>>>> +drm_syncobj_timeline_lookup_wait_pt_fence(struct drm_syncobj
>>>> *syncobj,
>>>> + u64 point,
>>>> + struct dma_fence **fence)
>>>> +{
>>>> + struct rb_node *node =
>>>> syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>>> + struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +
>>>> + spin_lock(&syncobj->lock);
>>>> + while(node) {
>>>> + int result;
>>>> +
>>>> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> + result = point - wait_pt->value;
>>>> + if (result < 0) {
>>>> + node = node->rb_left;
>>>> + } else if (result > 0) {
>>>> + node = node->rb_right;
>>>> + } else {
>>>> + if (fence)
>>>> + *fence = dma_fence_get(&wait_pt->base.base);
>>>> + break;
>>>> + }
>>>> + }
>>>> + spin_unlock(&syncobj->lock);
>>>> +
>>>> + return wait_pt;
>>>> +}
>>>> +
>>>> +static struct drm_syncobj_wait_pt *
>>>> +drm_syncobj_timeline_create_wait_pt_and_return_fence(struct
>>>> drm_syncobj *syncobj,
>>>> + u64 point,
>>>> + struct dma_fence **fence)
>>>> +{
>>>> + struct drm_syncobj_wait_pt *wait_pt = kzalloc(sizeof(*wait_pt),
>>>> + GFP_KERNEL);
>>>> + struct drm_syncobj_stub_fence *base;
>>>> + struct rb_node **new =
>>>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>>> + struct drm_syncobj_signal_pt *tail_signal_pt =
>>>> + list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>> + struct drm_syncobj_signal_pt, list);
>>>> +
>>>> + if (!wait_pt)
>>>> + return NULL;
>>>> + base = &wait_pt->base;
>>>> + spin_lock_init(&base->lock);
>>>> + dma_fence_init(&base->base,
>>>> + &drm_syncobj_stub_fence_ops,
>>>> + &base->lock,
>>>> + syncobj->syncobj_timeline.timeline_context, point);
>>>> + wait_pt->value = point;
>>>> +
>>>> + /* wait pt must be in an order, so that we can easily lookup
>>>> and signal
>>>> + * it */
>>>> + spin_lock(&syncobj->lock);
>>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE &&
>>>> + point <= syncobj->syncobj_timeline.timeline)
>>>> + dma_fence_signal(&wait_pt->base.base);
>>>> + if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) &&
>>>> + (point == tail_signal_pt->value) &&
>>>> + dma_fence_is_signaled(&tail_signal_pt->base.base))
>>>> + dma_fence_signal(&wait_pt->base.base);
>>>> + while(*new) {
>>>> + struct drm_syncobj_wait_pt *this =
>>>> + rb_entry(*new, struct drm_syncobj_wait_pt, node);
>>>> + int result = wait_pt->value - this->value;
>>>> +
>>>> + parent = *new;
>>>> + if (result < 0)
>>>> + new = &((*new)->rb_left);
>>>> + else if (result > 0)
>>>> + new = &((*new)->rb_right);
>>>> + else
>>>> + goto exist;
>>>> + }
>>>> + if (fence)
>>>> + *fence = dma_fence_get(&wait_pt->base.base);
>>>> + rb_link_node(&wait_pt->node, parent, new);
>>>> + rb_insert_color(&wait_pt->node,
>>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>>> + spin_unlock(&syncobj->lock);
>>>> + return wait_pt;
>>>> +exist:
>>>> + spin_unlock(&syncobj->lock);
>>>> + dma_fence_put(&wait_pt->base.base);
>>>> + wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj,
>>>> point,
>>>> + fence);
>>>> + return wait_pt;
>>>> +}
>>>> +
>>>> +static struct dma_fence *
>>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64
>>>> point, u64 flags)
>>>> +{
>>>> + struct drm_syncobj_wait_pt *wait_pt;
>>>> + struct dma_fence *fence = NULL;
>>>> +
>>>> + /* already signaled, simply return a signaled stub fence */
>>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE &&
>>>> + point <= syncobj->syncobj_timeline.timeline) {
>>>> + struct drm_syncobj_stub_fence *fence;
>>>> +
>>>> + fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>>> + if (fence == NULL)
>>>> + return NULL;
>>>> +
>>>> + spin_lock_init(&fence->lock);
>>>> + dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
>>>> + &fence->lock, 0, 0);
>>>> + dma_fence_signal(&fence->base);
>>>> + return &fence->base;
>>>> + }
>>>> +
>>>> + /* check if the wait pt exists */
>>>> + wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj,
>>>> point,
>>>> + &fence);
>>>> + if (!fence) {
>>>> + /* This is a new wait pt, so create it */
>>>> + wait_pt =
>>>> drm_syncobj_timeline_create_wait_pt_and_return_fence(syncobj, point,
>>>> + &fence);
>>>> + if (!fence)
>>>> + return NULL;
>>>> + }
>>>> + if (wait_pt) {
>>>> + int ret = 0;
>>>> +
>>>> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>> + ret =
>>>> wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq,
>>>> + wait_pt->value <=
>>>> syncobj->syncobj_timeline.signal_point,
>>>> + msecs_to_jiffies(10000)); /* wait 10s */
>>>> + if (ret <= 0)
>>>> + return NULL;
>>>> + }
>>>> + return fence;
>>>> + }
>>>> + return NULL;
>>>> +}
>>>> +
>>>> /**
>>>> * drm_syncobj_find_fence - lookup and reference the fence in a
>>>> sync object
>>>> * @file_private: drm file private pointer
>>>> @@ -229,20 +575,45 @@ static int
>>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>> * contains a reference to the fence, which must be released by
>>>> calling
>>>> * dma_fence_put().
>>>> */
>>>> -int drm_syncobj_find_fence(struct drm_file *file_private,
>>>> - u32 handle, u64 point,
>>>> - struct dma_fence **fence)
>>>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>>> + u64 flags, struct dma_fence **fence)
>>>> {
>>>> - struct drm_syncobj *syncobj = drm_syncobj_find(file_private,
>>>> handle);
>>>> int ret = 0;
>>>> if (!syncobj)
>>>> return -ENOENT;
>>>> - *fence = drm_syncobj_fence_get(syncobj);
>>>> + if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>>> + /*NORMAL syncobj always wait on last pt */
>>>> + u64 tail_pt_value = syncobj->syncobj_timeline.signal_point;
>>>> +
>>>> + if (tail_pt_value == 0)
>>>> + tail_pt_value += DRM_SYNCOBJ_NORMAL_POINT;
>>>> + /* NORMAL syncobj doesn't care point value */
>>>> + WARN_ON(point != 0);
>>>> + *fence = drm_syncobj_timeline_point_get(syncobj,
>>>> tail_pt_value,
>>>> + flags);
>>>> + } else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> + *fence = drm_syncobj_timeline_point_get(syncobj, point,
>>>> + flags);
>>>> + } else {
>>>> + DRM_ERROR("Don't support this type syncobj\n");
>>>> + *fence = NULL;
>>>> + }
>>>> if (!*fence) {
>>>> ret = -EINVAL;
>>>> }
>>>> + return ret;
>>>> +}
>>>> +EXPORT_SYMBOL(drm_syncobj_search_fence);
>>>> +int drm_syncobj_find_fence(struct drm_file *file_private,
>>>> + u32 handle, u64 point,
>>>> + struct dma_fence **fence) {
>>>> + struct drm_syncobj *syncobj = drm_syncobj_find(file_private,
>>>> handle);
>>>> +
>>>> + int ret = drm_syncobj_search_fence(syncobj, point,
>>>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>>>> + fence);
>>>> drm_syncobj_put(syncobj);
>>>> return ret;
>>>> }
>>>> @@ -259,7 +630,7 @@ void drm_syncobj_free(struct kref *kref)
>>>> struct drm_syncobj *syncobj = container_of(kref,
>>>> struct drm_syncobj,
>>>> refcount);
>>>> - drm_syncobj_replace_fence(syncobj, 0, NULL);
>>>> + drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>>> kfree(syncobj);
>>>> }
>>>> EXPORT_SYMBOL(drm_syncobj_free);
>>>> @@ -287,8 +658,12 @@ int drm_syncobj_create(struct drm_syncobj
>>>> **out_syncobj, uint32_t flags,
>>>> return -ENOMEM;
>>>> kref_init(&syncobj->refcount);
>>>> - INIT_LIST_HEAD(&syncobj->cb_list);
>>>> spin_lock_init(&syncobj->lock);
>>>> + if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
>>>> + syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>>> + else
>>>> + syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>>> + drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>> if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>> ret = drm_syncobj_assign_null_handle(syncobj);
>>>> @@ -646,7 +1021,6 @@ struct syncobj_wait_entry {
>>>> struct task_struct *task;
>>>> struct dma_fence *fence;
>>>> struct dma_fence_cb fence_cb;
>>>> - struct drm_syncobj_cb syncobj_cb;
>>>> };
>>>> static void syncobj_wait_fence_func(struct dma_fence *fence,
>>>> @@ -658,18 +1032,6 @@ static void syncobj_wait_fence_func(struct
>>>> dma_fence *fence,
>>>> wake_up_process(wait->task);
>>>> }
>>>> -static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>>> - struct drm_syncobj_cb *cb)
>>>> -{
>>>> - struct syncobj_wait_entry *wait =
>>>> - container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>>> -
>>>> - /* This happens inside the syncobj lock */
>>>> - wait->fence =
>>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>>> - lockdep_is_held(&syncobj->lock)));
>>>> - wake_up_process(wait->task);
>>>> -}
>>>> -
>>>> static signed long drm_syncobj_array_wait_timeout(struct
>>>> drm_syncobj **syncobjs,
>>>> uint32_t count,
>>>> uint32_t flags,
>>>> @@ -693,14 +1055,11 @@ static signed long
>>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>> signaled_count = 0;
>>>> for (i = 0; i < count; ++i) {
>>>> entries[i].task = current;
>>>> - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>>>> + ret = drm_syncobj_search_fence(syncobjs[i], 0, 0,
>>>> + &entries[i].fence);
>>>> if (!entries[i].fence) {
>>>> - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>> - continue;
>>>> - } else {
>>>> - ret = -EINVAL;
>>>> - goto cleanup_entries;
>>>> - }
>>>> + ret = -EINVAL;
>>>> + goto cleanup_entries;
>>>> }
>>>> if (dma_fence_is_signaled(entries[i].fence)) {
>>>> @@ -728,15 +1087,6 @@ static signed long
>>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>> * fallthough and try a 0 timeout wait!
>>>> */
>>>> - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>> - for (i = 0; i < count; ++i) {
>>>> - drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>>> - &entries[i].fence,
>>>> - &entries[i].syncobj_cb,
>>>> - syncobj_wait_syncobj_func);
>>>> - }
>>>> - }
>>>> -
>>>> do {
>>>> set_current_state(TASK_INTERRUPTIBLE);
>>>> @@ -784,13 +1134,10 @@ static signed long
>>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>> cleanup_entries:
>>>> for (i = 0; i < count; ++i) {
>>>> - if (entries[i].syncobj_cb.func)
>>>> - drm_syncobj_remove_callback(syncobjs[i],
>>>> - &entries[i].syncobj_cb);
>>>> + dma_fence_put(entries[i].fence);
>>>> if (entries[i].fence_cb.func)
>>>> dma_fence_remove_callback(entries[i].fence,
>>>> &entries[i].fence_cb);
>>>> - dma_fence_put(entries[i].fence);
>>>> }
>>>> kfree(entries);
>>>> @@ -965,12 +1312,20 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>> *dev, void *data,
>>>> if (ret < 0)
>>>> return ret;
>>>> - for (i = 0; i < args->count_handles; i++)
>>>> - drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
>>>> -
>>>> + for (i = 0; i < args->count_handles; i++) {
>>>> + if (syncobjs[i]->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> + DRM_ERROR("timeline syncobj cannot reset!\n");
>>>> + ret = -EINVAL;
>>>> + goto out;
>>>> + }
>>>> + drm_syncobj_timeline_fini(syncobjs[i],
>>>> + &syncobjs[i]->syncobj_timeline);
>>>> + drm_syncobj_timeline_init(&syncobjs[i]->syncobj_timeline);
>>>> + }
>>>> +out:
>>>> drm_syncobj_array_free(syncobjs, args->count_handles);
>>>> - return 0;
>>>> + return ret;
>>>> }
>>>> int
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>>> index 7209dd832d39..bb20d318c9d6 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>>> @@ -2182,7 +2182,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>>> if (!(flags & I915_EXEC_FENCE_WAIT))
>>>> continue;
>>>> - fence = drm_syncobj_fence_get(syncobj);
>>>> + drm_syncobj_search_fence(syncobj, 0,
>>>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>>>> + &fence);
>>>> if (!fence)
>>>> return -EINVAL;
>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>> index 425432b85a87..657c97dc25ec 100644
>>>> --- a/include/drm/drm_syncobj.h
>>>> +++ b/include/drm/drm_syncobj.h
>>>> @@ -30,6 +30,25 @@
>>>> struct drm_syncobj_cb;
>>>> +enum drm_syncobj_type {
>>>> + DRM_SYNCOBJ_TYPE_NORMAL,
>>>> + DRM_SYNCOBJ_TYPE_TIMELINE
>>>> +};
>>>> +
>>>> +struct drm_syncobj_timeline {
>>>> + wait_queue_head_t wq;
>>>> + u64 timeline_context;
>>>> + /**
>>>> + * @timeline: syncobj timeline
>>>> + */
>>>> + u64 timeline;
>>>> + u64 signal_point;
>>>> +
>>>> +
>>>> + struct rb_root wait_pt_tree;
>>>> + struct list_head signal_pt_list;
>>>> +};
>>>> +
>>>> /**
>>>> * struct drm_syncobj - sync object.
>>>> *
>>>> @@ -41,19 +60,16 @@ struct drm_syncobj {
>>>> */
>>>> struct kref refcount;
>>>> /**
>>>> - * @fence:
>>>> - * NULL or a pointer to the fence bound to this object.
>>>> - *
>>>> - * This field should not be used directly. Use
>>>> drm_syncobj_fence_get()
>>>> - * and drm_syncobj_replace_fence() instead.
>>>> + * @type: indicate syncobj type
>>>> */
>>>> - struct dma_fence __rcu *fence;
>>>> + enum drm_syncobj_type type;
>>>> /**
>>>> - * @cb_list: List of callbacks to call when the &fence gets
>>>> replaced.
>>>> + * @syncobj_timeline: timeline
>>>> */
>>>> - struct list_head cb_list;
>>>> + struct drm_syncobj_timeline syncobj_timeline;
>>>> +
>>>> /**
>>>> - * @lock: Protects &cb_list and write-locks &fence.
>>>> + * @lock: Protects timeline list and write-locks &fence.
>>>> */
>>>> spinlock_t lock;
>>>> /**
>>>> @@ -62,25 +78,6 @@ struct drm_syncobj {
>>>> struct file *file;
>>>> };
>>>> -typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
>>>> - struct drm_syncobj_cb *cb);
>>>> -
>>>> -/**
>>>> - * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>>>> - * @node: used by drm_syncob_add_callback to append this struct to
>>>> - * &drm_syncobj.cb_list
>>>> - * @func: drm_syncobj_func_t to call
>>>> - *
>>>> - * This struct will be initialized by drm_syncobj_add_callback,
>>>> additional
>>>> - * data can be passed along by embedding drm_syncobj_cb in another
>>>> struct.
>>>> - * The callback will get called the next time
>>>> drm_syncobj_replace_fence is
>>>> - * called.
>>>> - */
>>>> -struct drm_syncobj_cb {
>>>> - struct list_head node;
>>>> - drm_syncobj_func_t func;
>>>> -};
>>>> -
>>>> void drm_syncobj_free(struct kref *kref);
>>>> /**
>>>> @@ -106,29 +103,6 @@ drm_syncobj_put(struct drm_syncobj *obj)
>>>> kref_put(&obj->refcount, drm_syncobj_free);
>>>> }
>>>> -/**
>>>> - * drm_syncobj_fence_get - get a reference to a fence in a sync
>>>> object
>>>> - * @syncobj: sync object.
>>>> - *
>>>> - * This acquires additional reference to &drm_syncobj.fence
>>>> contained in @obj,
>>>> - * if not NULL. It is illegal to call this without already holding
>>>> a reference.
>>>> - * No locks required.
>>>> - *
>>>> - * Returns:
>>>> - * Either the fence of @obj or NULL if there's none.
>>>> - */
>>>> -static inline struct dma_fence *
>>>> -drm_syncobj_fence_get(struct drm_syncobj *syncobj)
>>>> -{
>>>> - struct dma_fence *fence;
>>>> -
>>>> - rcu_read_lock();
>>>> - fence = dma_fence_get_rcu_safe(&syncobj->fence);
>>>> - rcu_read_unlock();
>>>> -
>>>> - return fence;
>>>> -}
>>>> -
>>>> struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>>> u32 handle);
>>>> void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64
>>>> point,
>>>> @@ -142,5 +116,7 @@ int drm_syncobj_create(struct drm_syncobj
>>>> **out_syncobj, uint32_t flags,
>>>> int drm_syncobj_get_handle(struct drm_file *file_private,
>>>> struct drm_syncobj *syncobj, u32 *handle);
>>>> int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
>>>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>>> + u64 flags, struct dma_fence **fence);
>>>> #endif
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index 300f336633f2..cebdb2541eb7 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -717,6 +717,7 @@ struct drm_prime_handle {
>>>> struct drm_syncobj_create {
>>>> __u32 handle;
>>>> #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
>>>> __u32 flags;
>>>> };
>>>
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list