[PATCH 5/5] drm: add syncobj timeline support v2

Christian König ckoenig.leichtzumerken at gmail.com
Thu Aug 23 09:15:10 UTC 2018


Am 23.08.2018 um 10:25 schrieb Chunming Zhou:
> VK_KHR_timeline_semaphore:
> This extension introduces a new type of semaphore that has an integer payload
> identifying a point in a timeline. Such timeline semaphores support the
> following operations:
>     * Host query - A host operation that allows querying the payload of the
>       timeline semaphore.
>     * Host wait - A host operation that allows a blocking wait for a
>       timeline semaphore to reach a specified value.

I think I have a idea what "Host" means in this context, but it would 
probably be better to describe it.

>     * Device wait - A device operation that allows waiting for a
>       timeline semaphore to reach a specified value.
>     * Device signal - A device operation that allows advancing the
>       timeline semaphore to a specified value.
>
> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
> a. signal PT design:
> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
> the timeline will increase to value of PT[N].
> b. wait PT design:
> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
> perform that.
>
> v2:
> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
> 2. move unexposed denitions to .c file. (Daniel Vetter)
> 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian)
> 4. split up the change to drm_syncobj_replace_fence() in a separate patch.
> 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian)
> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)

I really liked Daniels idea to handle the classic syncobj like a 
timeline synobj with just 1 entry. That can probably simplify the 
implementation quite a bit.

Additional to that an amdgpu patch which shows how the interface is to 
be used is probably something Daniel will want to see as well.

Christian.

>
> TODO:
> 1. CPU query and wait on timeline semaphore.
> 2. test application (Daniel Vetter)
>
> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
> Cc: Christian Konig <christian.koenig at amd.com>
> Cc: Dave Airlie <airlied at redhat.com>
> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> ---
>   drivers/gpu/drm/drm_syncobj.c | 383 +++++++++++++++++++++++++++++++++++++++---
>   include/drm/drm_syncobj.h     |  28 +++
>   include/uapi/drm/drm.h        |   1 +
>   3 files changed, 389 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 6227df2cc0a4..f738d78edf65 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -56,6 +56,44 @@
>   #include "drm_internal.h"
>   #include <drm/drm_syncobj.h>
>   
> +struct drm_syncobj_stub_fence {
> +	struct dma_fence base;
> +	spinlock_t lock;
> +};
> +
> +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
> +{
> +        return "syncobjstub";
> +}
> +
> +static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
> +{
> +    return !dma_fence_is_signaled(fence);
> +}
> +
> +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
> +	.get_driver_name = drm_syncobj_stub_fence_get_name,
> +	.get_timeline_name = drm_syncobj_stub_fence_get_name,
> +	.enable_signaling = drm_syncobj_stub_fence_enable_signaling,
> +	.release = NULL,
> +};
> +
> +struct drm_syncobj_wait_pt {
> +	struct drm_syncobj_stub_fence base;
> +	u64    value;
> +	struct rb_node   node;
> +};
> +struct drm_syncobj_signal_pt {
> +	struct drm_syncobj_stub_fence base;
> +	struct dma_fence *signal_fence;
> +	struct dma_fence *pre_pt_base;
> +	struct dma_fence_cb signal_cb;
> +	struct dma_fence_cb pre_pt_cb;
> +	struct drm_syncobj *syncobj;
> +	u64    value;
> +	struct list_head list;
> +};
> +
>   /**
>    * drm_syncobj_find - lookup and reference a sync object.
>    * @file_private: drm file private pointer
> @@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>   	spin_unlock(&syncobj->lock);
>   }
>   
> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> +	    node != NULL; ) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		node = rb_next(node);
> +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
> +			dma_fence_signal(&wait_pt->base.base);
> +			rb_erase(&wait_pt->node,
> +				 &syncobj->syncobj_timeline.wait_pt_tree);
> +			RB_CLEAR_NODE(&wait_pt->node);
> +			/* kfree(wait_pt) is excuted by fence put */
> +			dma_fence_put(&wait_pt->base.base);
> +		} else {
> +			/* the loop is from left to right, the later entry value is
> +			 * bigger, so don't need to check any more */
> +			break;
> +		}
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
> +
> +
> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
> +{
> +	struct dma_fence *fence = NULL;
> +	struct drm_syncobj *syncobj;
> +
> +	fence = signal_pt->signal_fence;
> +	signal_pt->signal_fence = NULL;
> +	dma_fence_put(fence);
> +	fence = signal_pt->pre_pt_base;
> +	signal_pt->pre_pt_base = NULL;
> +	dma_fence_put(fence);
> +
> +	syncobj = signal_pt->syncobj;
> +	spin_lock(&syncobj->lock);
> +	list_del(&signal_pt->list);
> +	syncobj->syncobj_timeline.timeline = signal_pt->value;
> +	spin_unlock(&syncobj->lock);
> +	/* kfree(signal_pt) will be  executed by below fence put */
> +	dma_fence_put(&signal_pt->base.base);
> +	drm_syncobj_timeline_signal_wait_pts(syncobj);
> +}
> +static void pt_signal_fence_func(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
> +
> +	if (signal_pt->pre_pt_base &&
> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		return;
> +
> +	pt_fence_cb(signal_pt);
> +}
> +static void pt_pre_fence_func(struct dma_fence *fence,
> +				 struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
> +
> +	if (signal_pt->signal_fence &&
> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		return;
> +
> +	pt_fence_cb(signal_pt);
> +}
> +
> +static int drm_syncobj_timeline_replace_fence(struct drm_syncobj *syncobj,
> +					      struct dma_fence *fence,
> +					      u64 point)
> +{
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
> +	struct drm_syncobj_signal_pt *tail_pt;
> +	struct dma_fence *tail_pt_fence = NULL;
> +	int ret = 0;
> +
> +	if (!signal_pt)
> +		return -ENOMEM;
> +	if (syncobj->syncobj_timeline.signal_point >= point) {
> +		DRM_WARN("A later signal is ready!");
> +		goto out;
> +	}
> +	if (fence)
> +		dma_fence_get(fence);
> +	spin_lock(&syncobj->lock);
> +	spin_lock_init(&signal_pt->base.lock);
> +	dma_fence_init(&signal_pt->base.base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &signal_pt->base.lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	signal_pt->signal_fence =
> +		rcu_dereference_protected(fence,
> +					  lockdep_is_held(&fence->lock));
> +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
> +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
> +					  struct drm_syncobj_signal_pt, list);
> +		tail_pt_fence = &tail_pt->base.base;
> +		if (dma_fence_is_signaled(tail_pt_fence))
> +			tail_pt_fence = NULL;
> +	}
> +	if (tail_pt_fence)
> +		signal_pt->pre_pt_base =
> +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
> +								lockdep_is_held(&tail_pt_fence->lock)));
> +
> +	signal_pt->value = point;
> +	syncobj->syncobj_timeline.signal_point = point;
> +	signal_pt->syncobj = syncobj;
> +	INIT_LIST_HEAD(&signal_pt->list);
> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
> +	spin_unlock(&syncobj->lock);
> +	wake_up_all(&syncobj->syncobj_timeline.wq);
> +	/**
> +	 * Every pt is depending on signal fence and previous pt fence, add
> +	 * callbacks to them
> +	 */
> +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
> +		dma_fence_add_callback(signal_pt->signal_fence,
> +				       &signal_pt->signal_cb,
> +				       pt_signal_fence_func);
> +	else
> +		pt_signal_fence_func(signal_pt->signal_fence,
> +				     &signal_pt->signal_cb);
> +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
> +		dma_fence_add_callback(signal_pt->pre_pt_base,
> +				       &signal_pt->pre_pt_cb,
> +				       pt_pre_fence_func);
> +	else
> +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
> +
> +
> +	return 0;
> +out:
> +	kfree(signal_pt);
> +	return ret;
> +}
> +
>   /**
>    * drm_syncobj_replace_fence - replace fence in a sync object.
>    * @syncobj: Sync object to replace fence in
> @@ -152,6 +334,11 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   	struct dma_fence *old_fence;
>   	struct drm_syncobj_cb *cur, *tmp;
>   
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		drm_syncobj_timeline_replace_fence(syncobj, fence,
> +						   point);
> +		return;
> +	}
>   	if (fence)
>   		dma_fence_get(fence);
>   
> @@ -174,28 +361,6 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   }
>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>   
> -struct drm_syncobj_stub_fence {
> -	struct dma_fence base;
> -	spinlock_t lock;
> -};
> -
> -static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
> -{
> -        return "syncobjstub";
> -}
> -
> -static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence)
> -{
> -    return !dma_fence_is_signaled(fence);
> -}
> -
> -static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
> -	.get_driver_name = drm_syncobj_stub_fence_get_name,
> -	.get_timeline_name = drm_syncobj_stub_fence_get_name,
> -	.enable_signaling = drm_syncobj_stub_fence_enable_signaling,
> -	.release = NULL,
> -};
> -
>   static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   {
>   	struct drm_syncobj_stub_fence *fence;
> @@ -215,6 +380,121 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   	return 0;
>   }
>   
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
> +{
> +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +
> +    spin_lock(&syncobj->lock);
> +    while(node) {
> +	    int result = point - wait_pt->value;
> +
> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +	    if (result < 0)
> +		    node = node->rb_left;
> +	    else if (result > 0)
> +		    node = node->rb_right;
> +	    else
> +		    break;
> +    }
> +    spin_unlock(&syncobj->lock);
> +
> +    return wait_pt;
> +}
> +
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
> +
> +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
> +	if (!wait_pt)
> +		return NULL;
> +	spin_lock_init(&wait_pt->base.lock);
> +	dma_fence_init(&wait_pt->base.base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &wait_pt->base.lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	wait_pt->value = point;
> +
> +	/* wait pt must be in an order, so that we can easily lookup and signal
> +	 * it */
> +	spin_lock(&syncobj->lock);
> +	if (point <= syncobj->syncobj_timeline.timeline)
> +		dma_fence_signal(&wait_pt->base.base);
> +	while(*new) {
> +		struct drm_syncobj_wait_pt *this =
> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
> +		int result = wait_pt->value - this->value;
> +
> +		parent = *new;
> +		if (result < 0)
> +			new = &((*new)->rb_left);
> +		else if (result > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			goto exist;
> +	}
> +
> +	rb_link_node(&wait_pt->node, parent, new);
> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
> +	spin_unlock(&syncobj->lock);
> +	return wait_pt;
> +exist:
> +	spin_unlock(&syncobj->lock);
> +	dma_fence_put(&wait_pt->base.base);
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> +	return wait_pt;
> +}
> +
> +static struct dma_fence *
> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +
> +	/* already signaled, simply return a signaled stub fence */
> +	if (point <= syncobj->syncobj_timeline.timeline) {
> +		struct drm_syncobj_stub_fence *fence;
> +
> +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +		if (fence == NULL)
> +			return NULL;
> +
> +		spin_lock_init(&fence->lock);
> +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
> +			       &fence->lock, 0, 0);
> +		dma_fence_signal(&fence->base);
> +		return &fence->base;
> +	}
> +
> +	/* check if the wait pt exists */
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
> +	if (!wait_pt) {
> +		/* This is a new wait pt, so create it */
> +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
> +		if (!wait_pt)
> +			return NULL;
> +	}
> +	if (wait_pt) {
> +		struct dma_fence *fence;
> +		int ret =
> +			wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq,
> +				wait_pt->value <= syncobj->syncobj_timeline.signal_point,
> +				msecs_to_jiffies(10000)); /* wait 10s */
> +
> +		if (ret <= 0)
> +			return NULL;
> +		rcu_read_lock();
> +		fence = dma_fence_get_rcu(&wait_pt->base.base);
> +		rcu_read_unlock();
> +		return fence;
> +	}
> +	return NULL;
> +}
> +
>   /**
>    * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>    * @file_private: drm file private pointer
> @@ -240,7 +520,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>   	if (!syncobj)
>   		return -ENOENT;
>   
> -	*fence = drm_syncobj_fence_get(syncobj);
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
> +		/* NORMAL syncobj doesn't care point value */
> +		WARN_ON(point != 0);
> +		*fence = drm_syncobj_fence_get(syncobj);
> +	} else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
> +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
> +	} else {
> +		DRM_ERROR("Don't support this type syncobj\n");
> +		*fence = NULL;
> +	}
>   	if (!*fence) {
>   		ret = -EINVAL;
>   	}
> @@ -249,6 +539,34 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>   }
>   EXPORT_SYMBOL(drm_syncobj_find_fence);
>   
> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
> +				      struct drm_syncobj_timeline *syncobj_timeline)
> +{
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
> +
> +	spin_lock(&syncobj->lock);
> +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
> +	    node != NULL; ) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		node = rb_next(node);
> +		rb_erase(&wait_pt->node,
> +			 &syncobj_timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		/* kfree(wait_pt) is excuted by fence put */
> +		dma_fence_put(&wait_pt->base.base);
> +	}
> +	list_for_each_entry_safe(signal_pt, tmp,
> +				 &syncobj_timeline->signal_pt_list, list) {
> +		list_del(&signal_pt->list);
> +		dma_fence_put(signal_pt->signal_fence);
> +		dma_fence_put(signal_pt->pre_pt_base);
> +		dma_fence_put(&signal_pt->base.base);
> +	}
> +	spin_unlock(&syncobj->lock);
> +}
> +
>   /**
>    * drm_syncobj_free - free a sync object.
>    * @kref: kref to free.
> @@ -261,10 +579,23 @@ void drm_syncobj_free(struct kref *kref)
>   						   struct drm_syncobj,
>   						   refcount);
>   	drm_syncobj_replace_fence(syncobj, NULL, 0);
> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>   	kfree(syncobj);
>   }
>   EXPORT_SYMBOL(drm_syncobj_free);
>   
> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
> +				      *syncobj_timeline)
> +{
> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
> +	syncobj_timeline->timeline = 0;
> +	syncobj_timeline->signal_point = 0;
> +	init_waitqueue_head(&syncobj_timeline->wq);
> +
> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
> +}
> +
>   /**
>    * drm_syncobj_create - create a new syncobj
>    * @out_syncobj: returned syncobj
> @@ -290,6 +621,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   	kref_init(&syncobj->refcount);
>   	INIT_LIST_HEAD(&syncobj->cb_list);
>   	spin_lock_init(&syncobj->lock);
> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
> +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
> +	} else {
> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
> +	}
>   
>   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>   		ret = drm_syncobj_assign_null_handle(syncobj);
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index 335ec501001a..342b3ced3e56 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -30,6 +30,25 @@
>   
>   struct drm_syncobj_cb;
>   
> +enum drm_syncobj_type {
> +	DRM_SYNCOBJ_TYPE_NORMAL,
> +	DRM_SYNCOBJ_TYPE_TIMELINE
> +};
> +
> +struct drm_syncobj_timeline {
> +	wait_queue_head_t	wq;
> +	u64 timeline_context;
> +	/**
> +	 * @timeline: syncobj timeline
> +	 */
> +	u64 timeline;
> +	u64 signal_point;
> +
> +
> +	struct rb_root wait_pt_tree;
> +	struct list_head signal_pt_list;
> +};
> +
>   /**
>    * struct drm_syncobj - sync object.
>    *
> @@ -40,6 +59,15 @@ struct drm_syncobj {
>   	 * @refcount: Reference count of this object.
>   	 */
>   	struct kref refcount;
> +	/**
> +	 * @type: indicate syncobj type
> +	 */
> +	enum drm_syncobj_type type;
> +	/**
> +	 * @syncobj_timeline: timeline
> +	 */
> +	struct drm_syncobj_timeline syncobj_timeline;
> +
>   	/**
>   	 * @fence:
>   	 * NULL or a pointer to the fence bound to this object.
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 300f336633f2..cebdb2541eb7 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -717,6 +717,7 @@ struct drm_prime_handle {
>   struct drm_syncobj_create {
>   	__u32 handle;
>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
>   	__u32 flags;
>   };
>   



More information about the amd-gfx mailing list