[PATCH] [RFC]drm: add syncobj timeline support v5

zhoucm1 zhoucm1 at amd.com
Mon Sep 17 09:33:47 UTC 2018



On 2018年09月17日 16:37, Christian König wrote:
> Am 14.09.2018 um 12:37 schrieb Chunming Zhou:
>> This patch is for VK_KHR_timeline_semaphore extension, semaphore is 
>> called syncobj in kernel side:
>> This extension introduces a new type of syncobj that has an integer 
>> payload
>> identifying a point in a timeline. Such timeline syncobjs support the
>> following operations:
>>     * CPU query - A host operation that allows querying the payload 
>> of the
>>       timeline syncobj.
>>     * CPU wait - A host operation that allows a blocking wait for a
>>       timeline syncobj to reach a specified value.
>>     * Device wait - A device operation that allows waiting for a
>>       timeline syncobj to reach a specified value.
>>     * Device signal - A device operation that allows advancing the
>>       timeline syncobj to a specified value.
>>
>> Since it's a timeline, that means the front time point(PT) always is 
>> signaled before the late PT.
>> a. signal PT design:
>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, 
>> when PT[N] fence is signaled,
>> the timeline will increase to value of PT[N].
>> b. wait PT design:
>> Wait PT fence is signaled by reaching timeline point value, when 
>> timeline is increasing, will compare
>> wait PTs value with new timeline value, if PT value is lower than 
>> timeline value, then wait PT will be
>> signaled, otherwise keep in list. syncobj wait operation can wait on 
>> any point of timeline,
>> so need a RB tree to order them. And wait PT could ahead of signal 
>> PT, we need a sumission fence to
>> perform that.
>>
>> v2:
>> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
>> 2. move unexposed denitions to .c file. (Daniel Vetter)
>> 3. split up the change to drm_syncobj_find_fence() in a separate 
>> patch. (Christian)
>> 4. split up the change to drm_syncobj_replace_fence() in a separate 
>> patch.
>> 5. drop the submission_fence implementation and instead use 
>> wait_event() for that. (Christian)
>> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)
>>
>> v3:
>> 1. replace normal syncobj with timeline implemenation. (Vetter and 
>> Christian)
>>      a. normal syncobj signal op will create a signal PT to tail of 
>> signal pt list.
>>      b. normal syncobj wait op will create a wait pt with last signal 
>> point, and this wait PT is only signaled by related signal point PT.
>> 2. many bug fix and clean up
>> 3. stub fence moving is moved to other patch.
>>
>> v4:
>> 1. fix RB tree loop with while(node=rb_first(...)). (Christian)
>> 2. fix syncobj lifecycle. (Christian)
>> 3. only enable_signaling when there is wait_pt. (Christian)
>> 4. fix timeline path issues.
>> 5. write a timeline test in libdrm
>>
>> v5: (Christian)
>> 1. semaphore is called syncobj in kernel side.
>> 2. don't need 'timeline' characters in some function name.
>> 3. keep syncobj cb
>>
>> normal syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore*
>> timeline syncobj is tested by ./amdgpu_test -s 9
>>
>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>> Cc: Christian Konig <christian.koenig at amd.com>
>> Cc: Dave Airlie <airlied at redhat.com>
>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>> Cc: Daniel Vetter <daniel at ffwll.ch>
>> ---
>>   drivers/gpu/drm/drm_syncobj.c              | 294 ++++++++++++++++++---
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   4 +-
>>   include/drm/drm_syncobj.h                  |  62 +++--
>>   include/uapi/drm/drm.h                     |   1 +
>>   4 files changed, 292 insertions(+), 69 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index e9ce623d049e..e78d076f2703 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -56,6 +56,9 @@
>>   #include "drm_internal.h"
>>   #include <drm/drm_syncobj.h>
>>   +/* merge normal syncobj to timeline syncobj, the point interval is 
>> 1 */
>> +#define DRM_SYNCOBJ_NORMAL_POINT 1
>> +
>>   struct drm_syncobj_stub_fence {
>>       struct dma_fence base;
>>       spinlock_t lock;
>> @@ -82,6 +85,11 @@ static const struct dma_fence_ops 
>> drm_syncobj_stub_fence_ops = {
>>       .release = drm_syncobj_stub_fence_release,
>>   };
>>   +struct drm_syncobj_signal_pt {
>> +    struct dma_fence_array *base;
>> +    u64    value;
>> +    struct list_head list;
>> +};
>>     /**
>>    * drm_syncobj_find - lookup and reference a sync object.
>> @@ -124,7 +132,7 @@ static int 
>> drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>>   {
>>       int ret;
>>   -    *fence = drm_syncobj_fence_get(syncobj);
>> +    ret = drm_syncobj_search_fence(syncobj, 0, 0, fence);
>>       if (*fence)
>
> Don't we need to check ret here instead?
both ok, if you like check ret, will update it in v6.

>
>>           return 1;
>>   @@ -133,10 +141,10 @@ static int 
>> drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>>        * have the lock, try one more time just to be sure we don't add a
>>        * callback when a fence has already been set.
>>        */
>> -    if (syncobj->fence) {
>> -        *fence = 
>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>> - lockdep_is_held(&syncobj->lock)));
>> -        ret = 1;
>> +    if (fence) {
>> +        drm_syncobj_search_fence(syncobj, 0, 0, fence);
>> +        if (*fence)
>> +            ret = 1;
>
> That doesn't looks correct to me, drm_syncobj_search_fence() would try 
> to grab the lock once more.
>
> That should probably be something like checking the signal_pt list and 
> if it is not empty any more drop the lock and retry.
Good catch, will change that.

>
>>       } else {
>>           *fence = NULL;
>>           drm_syncobj_add_callback_locked(syncobj, cb, func);
>> @@ -164,6 +172,151 @@ void drm_syncobj_remove_callback(struct 
>> drm_syncobj *syncobj,
>>       spin_unlock(&syncobj->lock);
>>   }
>>   +static void drm_syncobj_timeline_init(struct drm_syncobj *syncobj,
>> +                      struct drm_syncobj_timeline *syncobj_timeline)
>> +{
>> +    spin_lock(&syncobj->lock);
>> +    syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>> +    syncobj_timeline->timeline = 0;
>> +    syncobj_timeline->signal_point = 0;
>> +    init_waitqueue_head(&syncobj_timeline->wq);
>> +
>> +    INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>> +    spin_unlock(&syncobj->lock);
>> +}
>> +
>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>> +                      struct drm_syncobj_timeline *syncobj_timeline)
>> +{
>> +    struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>> +
>> +    spin_lock(&syncobj->lock);
>> +    list_for_each_entry_safe(signal_pt, tmp,
>> +                 &syncobj_timeline->signal_pt_list, list) {
>> +        list_del(&signal_pt->list);
>> +        dma_fence_put(&signal_pt->base->base);
>> +        kfree(signal_pt);
>> +    }
>> +    spin_unlock(&syncobj->lock);
>> +}
>> +
>> +static struct dma_fence
>> +*drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj,
>> +                      uint64_t point)
>> +{
>> +    struct drm_syncobj_timeline *timeline = &syncobj->syncobj_timeline;
>> +    struct drm_syncobj_signal_pt *signal_pt;
>> +
>> +    if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
>> +        (point <= timeline->timeline)) {
>> +        struct drm_syncobj_stub_fence *fence =
>> +            kzalloc(sizeof(struct drm_syncobj_stub_fence),
>> +                GFP_KERNEL);
>> +
>> +        if (!fence)
>> +            return NULL;
>> +        spin_lock_init(&fence->lock);
>> +        dma_fence_init(&fence->base,
>> +                   &drm_syncobj_stub_fence_ops,
>> +                   &fence->lock,
>> + syncobj->syncobj_timeline.timeline_context,
>> +                   point);
>> +
>> +        dma_fence_signal(&fence->base);
>> +        return &fence->base;
>> +    }
>> +
>> +    list_for_each_entry(signal_pt, &timeline->signal_pt_list, list) {
>> +        if (point > signal_pt->value)
>> +            continue;
>> +        if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) &&
>> +            (point != signal_pt->value))
>> +            continue;
>> +        return dma_fence_get(&signal_pt->base->base);
>> +    }
>> +    return NULL;
>> +}
>> +
>> +static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
>> +                    struct dma_fence *fence,
>> +                    u64 point)
>> +{
>> +    struct drm_syncobj_signal_pt *signal_pt =
>> +        kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>> +    struct drm_syncobj_signal_pt *tail_pt;
>> +    struct dma_fence **fences;
>> +    int num_fences = 0;
>> +    int ret = 0, i;
>> +
>> +    if (!signal_pt)
>> +        return -ENOMEM;
>> +    if (syncobj->syncobj_timeline.signal_point >= point) {
>> +        DRM_WARN("A later signal is ready!");
>> +        goto out;
>> +    }
>> +    if (!fence)
>> +        goto out;
>> +
>> +    fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
>> +    if (!fences)
>> +        goto out;
>> +    fences[num_fences++] = dma_fence_get(fence);
>> +    /* timeline syncobj must take this dependency */
>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> +        spin_lock(&syncobj->lock);
>> +        if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>> +            tail_pt = 
>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>> +                          struct drm_syncobj_signal_pt, list);
>> +            fences[num_fences++] = dma_fence_get(&tail_pt->base->base);
>> +        }
>> +        spin_unlock(&syncobj->lock);
>> +    }
>> +    signal_pt->base = dma_fence_array_create(num_fences, fences,
>> + syncobj->syncobj_timeline.timeline_context,
>> +                         point, false);
>> +    if (!signal_pt->base)
>> +        goto fail;
>> +
>> +    spin_lock(&syncobj->lock);
>> +    signal_pt->value = point;
>> +    INIT_LIST_HEAD(&signal_pt->list);
>> +    list_add_tail(&signal_pt->list, 
>> &syncobj->syncobj_timeline.signal_pt_list);
>> +    syncobj->syncobj_timeline.signal_point = point;
>> +    spin_unlock(&syncobj->lock);
>> +    wake_up_all(&syncobj->syncobj_timeline.wq);
>> +
>> +    return 0;
>> +fail:
>> +    for (i = 0; i < num_fences; i++)
>> +        dma_fence_put(fences[i]);
>> +    kfree(fences);
>> +out:
>> +    kfree(signal_pt);
>> +    return ret;
>> +}
>> +
>> +static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj)
>> +{
>> +    struct drm_syncobj_timeline *timeline = &syncobj->syncobj_timeline;
>> +    struct drm_syncobj_signal_pt *signal_pt, *tmp;
>> +
>> +    spin_lock(&syncobj->lock);
>> +    list_for_each_entry_safe(signal_pt, tmp,
>> +                 &timeline->signal_pt_list, list) {
>> +        if (dma_fence_is_signaled(&signal_pt->base->base)) {
>> +            timeline->timeline = signal_pt->value;
>> +            list_del(&signal_pt->list);
>> +            dma_fence_put(&signal_pt->base->base);
>> +            kfree(signal_pt);
>> +        } else {
>> +            /*signal_pt is in order in list, from small to big, so
>> +             * the later must not be signal either */
>> +            break;
>> +        }
>> +    }
>> +
>> +    spin_unlock(&syncobj->lock);
>> +}
>>   /**
>>    * drm_syncobj_replace_fence - replace fence in a sync object.
>>    * @syncobj: Sync object to replace fence in
>> @@ -176,28 +329,37 @@ void drm_syncobj_replace_fence(struct 
>> drm_syncobj *syncobj,
>>                      u64 point,
>>                      struct dma_fence *fence)
>>   {
>> -    struct dma_fence *old_fence;
>> -    struct drm_syncobj_cb *cur, *tmp;
>> -
>> -    if (fence)
>> -        dma_fence_get(fence);
>> -
>> -    spin_lock(&syncobj->lock);
>> -
>> -    old_fence = rcu_dereference_protected(syncobj->fence,
>> - lockdep_is_held(&syncobj->lock));
>> -    rcu_assign_pointer(syncobj->fence, fence);
>> +    drm_syncobj_garbage_collection(syncobj);
>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> +        if (fence)
>> +            drm_syncobj_create_signal_pt(syncobj, fence, point);
>> +    } else if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>> +        u64 pt_value;
>> +
>> +        if (!fence) {
>> +            drm_syncobj_timeline_fini(syncobj,
>> +                          &syncobj->syncobj_timeline);
>> +            drm_syncobj_timeline_init(syncobj,
>> +                          &syncobj->syncobj_timeline);
>> +            return;
>> +        }
>> +        pt_value = syncobj->syncobj_timeline.signal_point +
>> +            DRM_SYNCOBJ_NORMAL_POINT;
>
> Looks like this could be simplified. If we don't have a timeline 
> syncobj we just finish and reinitialize it.
>
> Adding the fence then becomes common for both code paths.
>
>> + drm_syncobj_create_signal_pt(syncobj, fence, pt_value);
>> +    } else {
>> +        DRM_ERROR("the syncobj type isn't support\n");
>> +        return;
>> +    }
>> +    if (fence) {
>> +        struct drm_syncobj_cb *cur, *tmp;
>>   -    if (fence != old_fence) {
>> +        spin_lock(&syncobj->lock);
>>           list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>>               list_del_init(&cur->node);
>>               cur->func(syncobj, cur);
>>           }
>> +        spin_unlock(&syncobj->lock);
>>       }
>> -
>> -    spin_unlock(&syncobj->lock);
>> -
>> -    dma_fence_put(old_fence);
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>   @@ -220,6 +382,25 @@ static int 
>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>       return 0;
>>   }
>>   +static struct dma_fence *
>> +drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 
>> flags)
>> +{
>> +    struct dma_fence *fence;
>> +    int ret = 0;
>> +
>> +    if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>> +        ret = wait_event_timeout(syncobj->syncobj_timeline.wq,
>> +                    point <= syncobj->syncobj_timeline.signal_point,
>> +                    msecs_to_jiffies(10000)); /* wait 10s */
>
> Either don't use a timeout or provide the timeout explicitely, but 
> don't hard code it here.
>
> Additional to that we probably need an incorruptible wait.
Initially, I used interruptible wait, I found it sometimes often return 
-ERESTARTSYS without waiting any second when I wrote unit test.
Any ideas for that? still need to interruptible wait?
if no interruptible, we have to use a timeout.


>
>> +        if (ret <= 0)
>> +            return NULL;
>> +    }
>> +    spin_lock(&syncobj->lock);
>> +    fence = drm_syncobj_find_signal_pt_for_point(syncobj, point);
>> +    spin_unlock(&syncobj->lock);
>> +    return fence;
>> +}
>> +
>>   /**
>>    * drm_syncobj_find_fence - lookup and reference the fence in a 
>> sync object
>>    * @file_private: drm file private pointer
>> @@ -234,20 +415,46 @@ static int 
>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>    * contains a reference to the fence, which must be released by 
>> calling
>>    * dma_fence_put().
>>    */
>> -int drm_syncobj_find_fence(struct drm_file *file_private,
>> -               u32 handle, u64 point,
>> -               struct dma_fence **fence)
>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>> +                 u64 flags, struct dma_fence **fence)
>>   {
>> -    struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>> handle);
>>       int ret = 0;
>>         if (!syncobj)
>>           return -ENOENT;
>>   -    *fence = drm_syncobj_fence_get(syncobj);
>> +    drm_syncobj_garbage_collection(syncobj);
>> +    if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>> +        /*NORMAL syncobj always wait on last pt */
>> +        u64 tail_pt_value = syncobj->syncobj_timeline.signal_point;
>> +
>> +        if (tail_pt_value == 0)
>> +            tail_pt_value += DRM_SYNCOBJ_NORMAL_POINT;
>> +        /* NORMAL syncobj doesn't care point value */
>> +        WARN_ON(point != 0);
>> +        *fence = drm_syncobj_point_get(syncobj, tail_pt_value,
>> +                            flags);
>> +    } else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> +        *fence = drm_syncobj_point_get(syncobj, point,
>> +                            flags);
>
> Finding the fence is common for normal and timeline fence, only 
> figuring out the sequence number is different.
>
> So that can probably be simplified further here.
will do.

>
>> +    } else {
>> +        DRM_ERROR("Don't support this type syncobj\n");
>> +        *fence = NULL;
>> +    }
>>       if (!*fence) {
>>           ret = -EINVAL;
>>       }
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL(drm_syncobj_search_fence);
>> +int drm_syncobj_find_fence(struct drm_file *file_private,
>> +               u32 handle, u64 point,
>> +               struct dma_fence **fence) {
>> +    struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>> handle);
>> +
>> +    int ret = drm_syncobj_search_fence(syncobj, point,
>> +                    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>> +                    fence);
>>       drm_syncobj_put(syncobj);
>>       return ret;
>>   }
>> @@ -264,7 +471,7 @@ void drm_syncobj_free(struct kref *kref)
>>       struct drm_syncobj *syncobj = container_of(kref,
>>                              struct drm_syncobj,
>>                              refcount);
>> -    drm_syncobj_replace_fence(syncobj, 0, NULL);
>> +    drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>       kfree(syncobj);
>>   }
>>   EXPORT_SYMBOL(drm_syncobj_free);
>> @@ -294,6 +501,11 @@ int drm_syncobj_create(struct drm_syncobj 
>> **out_syncobj, uint32_t flags,
>>       kref_init(&syncobj->refcount);
>>       INIT_LIST_HEAD(&syncobj->cb_list);
>>       spin_lock_init(&syncobj->lock);
>> +    if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
>> +        syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>> +    else
>> +        syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>> +    drm_syncobj_timeline_init(syncobj, &syncobj->syncobj_timeline);
>>         if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>           ret = drm_syncobj_assign_null_handle(syncobj);
>> @@ -576,7 +788,8 @@ drm_syncobj_create_ioctl(struct drm_device *dev, 
>> void *data,
>>           return -ENODEV;
>>         /* no valid flags yet */
>> -    if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
>> +    if (args->flags & ~(DRM_SYNCOBJ_CREATE_SIGNALED |
>> +                DRM_SYNCOBJ_CREATE_TYPE_TIMELINE))
>>           return -EINVAL;
>>         return drm_syncobj_create_as_handle(file_private,
>> @@ -669,9 +882,8 @@ static void syncobj_wait_syncobj_func(struct 
>> drm_syncobj *syncobj,
>>       struct syncobj_wait_entry *wait =
>>           container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>   -    /* This happens inside the syncobj lock */
>> -    wait->fence = 
>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>> - lockdep_is_held(&syncobj->lock)));
>> +    drm_syncobj_search_fence(syncobj, 0, 0, &wait->fence);
>> +
>>       wake_up_process(wait->task);
>>   }
>>   @@ -698,7 +910,8 @@ static signed long 
>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>       signaled_count = 0;
>>       for (i = 0; i < count; ++i) {
>>           entries[i].task = current;
>> -        entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>> +        ret = drm_syncobj_search_fence(syncobjs[i], 0, 0,
>> +                           &entries[i].fence);
>>           if (!entries[i].fence) {
>>               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>                   continue;
>> @@ -970,12 +1183,21 @@ drm_syncobj_reset_ioctl(struct drm_device 
>> *dev, void *data,
>>       if (ret < 0)
>>           return ret;
>>   -    for (i = 0; i < args->count_handles; i++)
>> -        drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
>> -
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        if (syncobjs[i]->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> +            DRM_ERROR("timeline syncobj cannot reset!\n");
>
> Why not? I mean that should still work or do I miss anything?
timeline semaphore spec doesn't require reset interface, it says the 
timeline value only can be changed by signal operations.

>
>> +            ret = -EINVAL;
>> +            goto out;
>> +        }
>> +        drm_syncobj_timeline_fini(syncobjs[i],
>> +                      &syncobjs[i]->syncobj_timeline);
>> +        drm_syncobj_timeline_init(syncobjs[i],
>> +                      &syncobjs[i]->syncobj_timeline);
>> +    }
>> +out:
>>       drm_syncobj_array_free(syncobjs, args->count_handles);
>>   -    return 0;
>> +    return ret;
>>   }
>>     int
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 0a8d2d64f380..579e91a5858b 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -2137,7 +2137,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>           if (!(flags & I915_EXEC_FENCE_WAIT))
>>               continue;
>>   -        fence = drm_syncobj_fence_get(syncobj);
>> +        drm_syncobj_search_fence(syncobj, 0,
>> +                     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>
> Not sure if we can make DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT the 
> default here.
>
> Maybe better if drivers explicitly need to ask for it?
if you don't like the flag used in specific driver, I can wrap it to a 
new function.


>
>> +                     &fence);
>>           if (!fence)
>>               return -EINVAL;
>>   diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>> index 425432b85a87..9535521e6623 100644
>> --- a/include/drm/drm_syncobj.h
>> +++ b/include/drm/drm_syncobj.h
>> @@ -30,6 +30,25 @@
>>     struct drm_syncobj_cb;
>>   +enum drm_syncobj_type {
>> +    DRM_SYNCOBJ_TYPE_NORMAL,
>
> Does anybody have a better suggestion for _TYPE_NORMAL? That sounds 
> like timeline would be special.
How about _TYPE_INDIVIDUAL?

>
>> +    DRM_SYNCOBJ_TYPE_TIMELINE
>> +};
>> +
>> +struct drm_syncobj_timeline {
>> +    wait_queue_head_t    wq;
>> +    u64 timeline_context;
>> +    /**
>> +     * @timeline: syncobj timeline
>> +     */
>> +    u64 timeline;
>> +    u64 signal_point;
>> +
>> +
>> +    struct rb_root wait_pt_tree;
>
> Not used any more?
will clean up.

>
>> +    struct list_head signal_pt_list;
>> +};
>> +
>>   /**
>>    * struct drm_syncobj - sync object.
>>    *
>> @@ -41,19 +60,19 @@ struct drm_syncobj {
>>        */
>>       struct kref refcount;
>>       /**
>> -     * @fence:
>> -     * NULL or a pointer to the fence bound to this object.
>> -     *
>> -     * This field should not be used directly. Use 
>> drm_syncobj_fence_get()
>> -     * and drm_syncobj_replace_fence() instead.
>> +     * @type: indicate syncobj type
>>        */
>> -    struct dma_fence __rcu *fence;
>> +    enum drm_syncobj_type type;
>>       /**
>> -     * @cb_list: List of callbacks to call when the &fence gets 
>> replaced.
>> +     * @syncobj_timeline: timeline
>>        */
>> +    struct drm_syncobj_timeline syncobj_timeline;
>
> Looks like that can now be embedded into drm_syncobj.
will try.

btw: I encounter a problem after removing advanced wait pt, I debug it 
two days, but still don't find reason, from log, it's timeount when 
wait_event_timeout, that means it don't receive signal operation.

"./deqp-vk -n dEQP-VK*semaphore*" will fail on 
'dEQP-VK.synchronization.basic.semaphore.chain' case, but can pass if 
only run that one case like "./deqp-vk -n 
dEQP-VK.synchronization.basic.semaphore.chain".

Both old and my previous implementation can pass and have no this problem.


Thanks,
David Zhou
>
> Regards,
> Christian.
>
>> +    /**
>> +         * @cb_list: List of callbacks to call when the &fence gets 
>> replaced.
>> +         */
>>       struct list_head cb_list;
>>       /**
>> -     * @lock: Protects &cb_list and write-locks &fence.
>> +     * @lock: Protects syncobj list and write-locks &fence.
>>        */
>>       spinlock_t lock;
>>       /**
>> @@ -68,7 +87,7 @@ typedef void (*drm_syncobj_func_t)(struct 
>> drm_syncobj *syncobj,
>>   /**
>>    * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>>    * @node: used by drm_syncob_add_callback to append this struct to
>> - *      &drm_syncobj.cb_list
>> + *       &drm_syncobj.cb_list
>>    * @func: drm_syncobj_func_t to call
>>    *
>>    * This struct will be initialized by drm_syncobj_add_callback, 
>> additional
>> @@ -106,29 +125,6 @@ drm_syncobj_put(struct drm_syncobj *obj)
>>       kref_put(&obj->refcount, drm_syncobj_free);
>>   }
>>   -/**
>> - * drm_syncobj_fence_get - get a reference to a fence in a sync object
>> - * @syncobj: sync object.
>> - *
>> - * This acquires additional reference to &drm_syncobj.fence 
>> contained in @obj,
>> - * if not NULL. It is illegal to call this without already holding a 
>> reference.
>> - * No locks required.
>> - *
>> - * Returns:
>> - * Either the fence of @obj or NULL if there's none.
>> - */
>> -static inline struct dma_fence *
>> -drm_syncobj_fence_get(struct drm_syncobj *syncobj)
>> -{
>> -    struct dma_fence *fence;
>> -
>> -    rcu_read_lock();
>> -    fence = dma_fence_get_rcu_safe(&syncobj->fence);
>> -    rcu_read_unlock();
>> -
>> -    return fence;
>> -}
>> -
>>   struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>                        u32 handle);
>>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point,
>> @@ -142,5 +138,7 @@ int drm_syncobj_create(struct drm_syncobj 
>> **out_syncobj, uint32_t flags,
>>   int drm_syncobj_get_handle(struct drm_file *file_private,
>>                  struct drm_syncobj *syncobj, u32 *handle);
>>   int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>> +                 u64 flags, struct dma_fence **fence);
>>     #endif
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 300f336633f2..cebdb2541eb7 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -717,6 +717,7 @@ struct drm_prime_handle {
>>   struct drm_syncobj_create {
>>       __u32 handle;
>>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
>>       __u32 flags;
>>   };
>



More information about the amd-gfx mailing list