[PATCH 1/3] [RFC]drm: add syncobj timeline support v4

Christian König christian.koenig at amd.com
Thu Sep 6 07:25:25 UTC 2018


Am 06.09.2018 um 08:25 schrieb Chunming Zhou:
> VK_KHR_timeline_semaphore:
> This extension introduces a new type of semaphore that has an integer payload
> identifying a point in a timeline. Such timeline semaphores support the

Drop the term semaphore here, better use syncobj.

> following operations:
>     * CPU query - A host operation that allows querying the payload of the
>       timeline semaphore.
>     * CPU wait - A host operation that allows a blocking wait for a
>       timeline semaphore to reach a specified value.
>     * Device wait - A device operation that allows waiting for a
>       timeline semaphore to reach a specified value.
>     * Device signal - A device operation that allows advancing the
>       timeline semaphore to a specified value.
>
> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
> a. signal PT design:
> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
> the timeline will increase to value of PT[N].
> b. wait PT design:
> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
> perform that.
>
> v2:
> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian)
> 2. move unexposed denitions to .c file. (Daniel Vetter)
> 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian)
> 4. split up the change to drm_syncobj_replace_fence() in a separate patch.
> 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian)
> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter)
>
> v3:
> 1. replace normal syncobj with timeline implemenation. (Vetter and Christian)
>      a. normal syncobj signal op will create a signal PT to tail of signal pt list.
>      b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT.
> 2. many bug fix and clean up
> 3. stub fence moving is moved to other patch.
>
> v4:
> 1. fix RB tree loop with while(node=rb_first(...)). (Christian)
> 2. fix syncobj lifecycle. (Christian)
> 3. only enable_signaling when there is wait_pt. (Christian)
>
> Tested by ./deqp-vk -n dEQP-VK*semaphore* for normal syncobj
>
> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
> Cc: Christian Konig <christian.koenig at amd.com>
> Cc: Dave Airlie <airlied at redhat.com>
> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> ---
>   drivers/gpu/drm/drm_syncobj.c              | 516 +++++++++++++++++----
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   4 +-
>   include/drm/drm_syncobj.h                  |  78 ++--
>   include/uapi/drm/drm.h                     |   1 +
>   4 files changed, 448 insertions(+), 151 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index e9ce623d049e..94b31de23858 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -56,6 +56,9 @@
>   #include "drm_internal.h"
>   #include <drm/drm_syncobj.h>
>   
> +/* merge normal syncobj to timeline syncobj, the point interval is 1 */
> +#define DRM_SYNCOBJ_NORMAL_POINT 1
> +
>   struct drm_syncobj_stub_fence {
>   	struct dma_fence base;
>   	spinlock_t lock;
> @@ -82,6 +85,18 @@ static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>   	.release = drm_syncobj_stub_fence_release,
>   };
>   
> +struct drm_syncobj_signal_pt {
> +	struct dma_fence_array *base;
> +	u64    value;
> +	struct list_head list;
> +};
> +struct drm_syncobj_wait_pt {
> +	struct drm_syncobj_stub_fence base;
> +	struct dma_fence *signal_pt_fence;
> +	struct dma_fence_cb wait_cb;
> +	struct rb_node   node;
> +	u64    value;
> +};
>   
>   /**
>    * drm_syncobj_find - lookup and reference a sync object.
> @@ -109,61 +124,238 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>   }
>   EXPORT_SYMBOL(drm_syncobj_find);
>   
> -static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
> -					    struct drm_syncobj_cb *cb,
> -					    drm_syncobj_func_t func)
> +static void drm_syncobj_timeline_init(struct drm_syncobj *syncobj,
> +				      struct drm_syncobj_timeline *syncobj_timeline)

Since we merged timeline and singleton syncobj you can drop the extra 
_timeline_ part in the function name I think.

>   {
> -	cb->func = func;
> -	list_add_tail(&cb->node, &syncobj->cb_list);
> +	spin_lock(&syncobj->lock);
> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
> +	syncobj_timeline->timeline = 0;
> +	syncobj_timeline->signal_point = 0;
> +	init_waitqueue_head(&syncobj_timeline->wq);
> +
> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
> +	spin_unlock(&syncobj->lock);
>   }
>   
> -static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> -						 struct dma_fence **fence,
> -						 struct drm_syncobj_cb *cb,
> -						 drm_syncobj_func_t func)
> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
> +				      struct drm_syncobj_timeline *syncobj_timeline)
>   {
> -	int ret;
> -
> -	*fence = drm_syncobj_fence_get(syncobj);
> -	if (*fence)
> -		return 1;
> +	struct rb_node *node = NULL;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>   
>   	spin_lock(&syncobj->lock);
> -	/* We've already tried once to get a fence and failed.  Now that we
> -	 * have the lock, try one more time just to be sure we don't add a
> -	 * callback when a fence has already been set.
> -	 */
> -	if (syncobj->fence) {
> -		*fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
> -								 lockdep_is_held(&syncobj->lock)));
> -		ret = 1;
> -	} else {
> -		*fence = NULL;
> -		drm_syncobj_add_callback_locked(syncobj, cb, func);
> -		ret = 0;
> +	if (!RB_EMPTY_ROOT(&syncobj_timeline->wait_pt_tree))
> +		DRM_ERROR("There still are advanced wait_pts! pls check!");
> +	while ((node = rb_first(&syncobj_timeline->wait_pt_tree))) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		rb_erase(&wait_pt->node,
> +			 &syncobj_timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		if (!wait_pt->signal_pt_fence) {
> +			/* if wait_pt still is advanced, fake signal it.
> +			 * generally shouldn't happens */
> +			dma_fence_set_error(&wait_pt->base.base, -ENOENT);
> +			dma_fence_signal(&wait_pt->base.base);
> +		}
> +		/* kfree(wait_pt) is excuted by fence put */
> +		dma_fence_put(&wait_pt->base.base);
> +	}
> +	list_for_each_entry_safe(signal_pt, tmp,
> +				 &syncobj_timeline->signal_pt_list, list) {
> +		list_del(&signal_pt->list);
> +		dma_fence_put(&signal_pt->base->base);
> +		kfree(signal_pt);
>   	}
>   	spin_unlock(&syncobj->lock);
> +}
>   
> -	return ret;
> +static void wait_pt_func(struct dma_fence *fence, struct dma_fence_cb *cb)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt =
> +		container_of(cb, struct drm_syncobj_wait_pt, wait_cb);
> +
> +	dma_fence_signal(&wait_pt->base.base);
> +	dma_fence_put(wait_pt->signal_pt_fence);
> +	wait_pt->signal_pt_fence = NULL;
> +	dma_fence_put(&wait_pt->base.base);
> +}
> +
> +static void drm_syncobj_find_signal_pt_for_wait_pt(struct drm_syncobj *syncobj,
> +						   struct drm_syncobj_wait_pt *wait_pt)
> +{

That whole approach still looks horrible complicated to me.

Especially the separation of signal and wait pt is completely 
unnecessary as far as I can see.

When a wait pt is requested we just need to search for the signal point 
which it will trigger.

Regards,
Christian.

> +	struct drm_syncobj_timeline *timeline = &syncobj->syncobj_timeline;
> +	struct drm_syncobj_signal_pt *signal_pt;
> +	int ret;
> +
> +	if (wait_pt->signal_pt_fence) {
> +		return;
> +	} else if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
> +		   (wait_pt->value <= timeline->timeline)) {
> +		dma_fence_signal(&wait_pt->base.base);
> +		rb_erase(&wait_pt->node,
> +			 &timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		dma_fence_put(&wait_pt->base.base);
> +		return;
> +	}
> +
> +	list_for_each_entry(signal_pt, &timeline->signal_pt_list, list) {
> +		if (wait_pt->value < signal_pt->value)
> +			continue;
> +		if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) &&
> +		    (wait_pt->value != signal_pt->value))
> +			continue;
> +		wait_pt->signal_pt_fence = dma_fence_get(&signal_pt->base->base);
> +		ret = dma_fence_add_callback(wait_pt->signal_pt_fence,
> +					     &wait_pt->wait_cb,
> +					     wait_pt_func);
> +		if (ret == -ENOENT) {
> +			dma_fence_signal(&wait_pt->base.base);
> +			dma_fence_put(wait_pt->signal_pt_fence);
> +			wait_pt->signal_pt_fence = NULL;
> +			rb_erase(&wait_pt->node,
> +				 &timeline->wait_pt_tree);
> +			RB_CLEAR_NODE(&wait_pt->node);
> +			dma_fence_put(&wait_pt->base.base);
> +		} else if (ret < 0) {
> +			dma_fence_put(wait_pt->signal_pt_fence);
> +			DRM_ERROR("add callback error!");
> +		} else {
> +			/* after adding callback, remove from rb tree */
> +			rb_erase(&wait_pt->node,
> +				 &timeline->wait_pt_tree);
> +			RB_CLEAR_NODE(&wait_pt->node);
> +		}
> +		return;
> +	}
> +	/* signaled pt was released */
> +	if (!wait_pt->signal_pt_fence && (wait_pt->value <=
> +					  timeline->signal_point)) {
> +		dma_fence_signal(&wait_pt->base.base);
> +		rb_erase(&wait_pt->node,
> +			 &timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		dma_fence_put(&wait_pt->base.base);
> +	}
>   }
>   
> -void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
> -			      struct drm_syncobj_cb *cb,
> -			      drm_syncobj_func_t func)
> +static int drm_syncobj_timeline_create_signal_pt(struct drm_syncobj *syncobj,
> +						 struct dma_fence *fence,
> +						 u64 point)
>   {
> +	struct drm_syncobj_signal_pt *signal_pt =
> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
> +	struct drm_syncobj_signal_pt *tail_pt;
> +	struct dma_fence **fences;
> +	struct rb_node *node;
> +	struct drm_syncobj_wait_pt *tail_wait_pt = NULL;
> +	int num_fences = 0;
> +	int ret = 0, i;
> +
> +	if (!signal_pt)
> +		return -ENOMEM;
> +	if (syncobj->syncobj_timeline.signal_point >= point) {
> +		DRM_WARN("A later signal is ready!");
> +		goto out;
> +	}
> +	if (!fence)
> +		goto out;
> +
> +	fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
> +	if (!fences)
> +		goto out;
> +	fences[num_fences++] = dma_fence_get(fence);
> +	/* timeline syncobj must take this dependency */
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		spin_lock(&syncobj->lock);
> +		if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
> +			tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
> +						  struct drm_syncobj_signal_pt, list);
> +			fences[num_fences++] = dma_fence_get(&tail_pt->base->base);
> +		}
> +		spin_unlock(&syncobj->lock);
> +	}
> +	signal_pt->base = dma_fence_array_create(num_fences, fences,
> +						 syncobj->syncobj_timeline.timeline_context,
> +						 point, false);
> +	if (!signal_pt->base)
> +		goto fail;
> +
> +	signal_pt->value = point;
>   	spin_lock(&syncobj->lock);
> -	drm_syncobj_add_callback_locked(syncobj, cb, func);
> +	syncobj->syncobj_timeline.signal_point = point;
> +	INIT_LIST_HEAD(&signal_pt->list);
> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
> +
> +	/* check if there is advanced wait */
> +	node = rb_last(&syncobj->syncobj_timeline.wait_pt_tree);
> +	if (node)
> +		tail_wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +	if (tail_wait_pt && !tail_wait_pt->signal_pt_fence) {
> +		for (node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
> +		     node != NULL; node = rb_next(node)) {
> +			struct drm_syncobj_wait_pt *wait_pt =
> +				rb_entry(node, struct drm_syncobj_wait_pt,
> +					 node);
> +
> +			drm_syncobj_find_signal_pt_for_wait_pt(syncobj,
> +							       wait_pt);
> +		}
> +	}
> +
>   	spin_unlock(&syncobj->lock);
> +	wake_up_all(&syncobj->syncobj_timeline.wq);
> +
> +	return 0;
> +fail:
> +	for (i = 0; i < num_fences; i++)
> +		dma_fence_put(fences[i]);
> +	kfree(fences);
> +out:
> +	kfree(signal_pt);
> +	return ret;
>   }
>   
> -void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
> -				 struct drm_syncobj_cb *cb)
> +static void drm_syncobj_timeline_garbage_collection(struct drm_syncobj *syncobj)
>   {
> +	struct drm_syncobj_timeline *timeline = &syncobj->syncobj_timeline;
> +	struct rb_node *node;
> +	struct drm_syncobj_wait_pt *wait_pt;
> +	struct drm_syncobj_signal_pt *signal_pt, *tmp;
> +
>   	spin_lock(&syncobj->lock);
> -	list_del_init(&cb->node);
> +	node = rb_first(&timeline->wait_pt_tree);
> +	while (node) {
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		if (!dma_fence_is_signaled(&wait_pt->base.base)) {
> +			node = rb_next(node);
> +			continue;
> +		}
> +		rb_erase(&wait_pt->node,
> +			 &timeline->wait_pt_tree);
> +		RB_CLEAR_NODE(&wait_pt->node);
> +		/* kfree(wait_pt) is excuted by fence put */
> +		dma_fence_put(&wait_pt->base.base);
> +		node = rb_first(&timeline->wait_pt_tree);
> +	}
> +	list_for_each_entry_safe(signal_pt, tmp,
> +				 &timeline->signal_pt_list, list) {
> +		if (dma_fence_is_signaled(&signal_pt->base->base)) {
> +			timeline->timeline = signal_pt->value;
> +			list_del(&signal_pt->list);
> +			dma_fence_put(&signal_pt->base->base);
> +			kfree(signal_pt);
> +		} else {
> +			/*signal_pt is in order in list, from small to big, so
> +			 * the later must not be signal either */
> +			break;
> +		}
> +	}
> +
>   	spin_unlock(&syncobj->lock);
>   }
> -
>   /**
>    * drm_syncobj_replace_fence - replace fence in a sync object.
>    * @syncobj: Sync object to replace fence in
> @@ -176,28 +368,29 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   			       u64 point,
>   			       struct dma_fence *fence)
>   {
> -	struct dma_fence *old_fence;
> -	struct drm_syncobj_cb *cur, *tmp;
> -
> -	if (fence)
> -		dma_fence_get(fence);
> -
> -	spin_lock(&syncobj->lock);
> -
> -	old_fence = rcu_dereference_protected(syncobj->fence,
> -					      lockdep_is_held(&syncobj->lock));
> -	rcu_assign_pointer(syncobj->fence, fence);
> -
> -	if (fence != old_fence) {
> -		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
> -			list_del_init(&cur->node);
> -			cur->func(syncobj, cur);
> +	drm_syncobj_timeline_garbage_collection(syncobj);
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		if (fence)
> +			drm_syncobj_timeline_create_signal_pt(syncobj, fence,
> +							      point);
> +		return;
> +	} else if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
> +		u64 pt_value;
> +
> +		if (!fence) {
> +			drm_syncobj_timeline_fini(syncobj,
> +						  &syncobj->syncobj_timeline);
> +			drm_syncobj_timeline_init(syncobj,
> +						  &syncobj->syncobj_timeline);
> +			return;
>   		}
> +		pt_value = syncobj->syncobj_timeline.signal_point +
> +			DRM_SYNCOBJ_NORMAL_POINT;
> +		drm_syncobj_timeline_create_signal_pt(syncobj, fence, pt_value);
> +		return;
> +	} else {
> +		DRM_ERROR("the syncobj type isn't support\n");
>   	}
> -
> -	spin_unlock(&syncobj->lock);
> -
> -	dma_fence_put(old_fence);
>   }
>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>   
> @@ -220,6 +413,120 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   	return 0;
>   }
>   
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_lookup_wait_pt_fence(struct drm_syncobj *syncobj,
> +					  u64 point,
> +					  struct dma_fence **fence)
> +{
> +	struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
> +
> +	spin_lock(&syncobj->lock);
> +	while(node) {
> +		int result;
> +
> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
> +		result = point - wait_pt->value;
> +		if (result < 0) {
> +			node = node->rb_left;
> +		} else if (result > 0) {
> +			node = node->rb_right;
> +		} else {
> +			if (fence)
> +				*fence = dma_fence_get(&wait_pt->base.base);
> +			break;
> +		}
> +	}
> +	spin_unlock(&syncobj->lock);
> +
> +	return wait_pt;
> +}
> +
> +static struct drm_syncobj_wait_pt *
> +drm_syncobj_timeline_create_wait_pt_and_return_fence(struct drm_syncobj *syncobj,
> +						     u64 point,
> +						     struct dma_fence **fence)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt = kzalloc(sizeof(*wait_pt),
> +						      GFP_KERNEL);
> +	struct drm_syncobj_stub_fence *base;
> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
> +
> +	if (!wait_pt)
> +		return NULL;
> +	base = &wait_pt->base;
> +	spin_lock_init(&base->lock);
> +	dma_fence_init(&base->base,
> +		       &drm_syncobj_stub_fence_ops,
> +		       &base->lock,
> +		       syncobj->syncobj_timeline.timeline_context, point);
> +	wait_pt->value = point;
> +	wait_pt->signal_pt_fence = NULL;
> +
> +	/* wait pt must be in an order, so that we can easily lookup and signal
> +	 * it */
> +	spin_lock(&syncobj->lock);
> +	while(*new) {
> +		struct drm_syncobj_wait_pt *this =
> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
> +		int result = wait_pt->value - this->value;
> +
> +		parent = *new;
> +		if (result < 0)
> +			new = &((*new)->rb_left);
> +		else if (result > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			goto exist;
> +	}
> +	if (fence)
> +		*fence = dma_fence_get(&wait_pt->base.base);
> +	rb_link_node(&wait_pt->node, parent, new);
> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
> +	spin_unlock(&syncobj->lock);
> +	return wait_pt;
> +exist:
> +	spin_unlock(&syncobj->lock);
> +	dma_fence_put(&wait_pt->base.base);
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj, point,
> +							    fence);
> +	return wait_pt;
> +}
> +
> +static struct dma_fence *
> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags)
> +{
> +	struct drm_syncobj_wait_pt *wait_pt;
> +	struct dma_fence *fence = NULL;
> +
> +	/* check if the wait pt exists */
> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj, point,
> +							    &fence);
> +	if (!fence) {
> +		/* This is a new wait pt, so create it */
> +		wait_pt = drm_syncobj_timeline_create_wait_pt_and_return_fence(syncobj, point,
> +							      &fence);
> +		if (!fence)
> +			return NULL;
> +	}
> +	if (wait_pt) {
> +		int ret = 0;
> +
> +		if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> +			ret = wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq,
> +							       wait_pt->value <= syncobj->syncobj_timeline.signal_point,
> +							       msecs_to_jiffies(10000)); /* wait 10s */
> +			if (ret <= 0)
> +				return NULL;
> +		}
> +		spin_lock(&syncobj->lock);
> +		drm_syncobj_find_signal_pt_for_wait_pt(syncobj, wait_pt);
> +		spin_unlock(&syncobj->lock);
> +		return fence;
> +	}
> +	return NULL;
> +}
> +
>   /**
>    * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>    * @file_private: drm file private pointer
> @@ -234,20 +541,46 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>    * contains a reference to the fence, which must be released by calling
>    * dma_fence_put().
>    */
> -int drm_syncobj_find_fence(struct drm_file *file_private,
> -			   u32 handle, u64 point,
> -			   struct dma_fence **fence)
> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
> +			     u64 flags, struct dma_fence **fence)
>   {
> -	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>   	int ret = 0;
>   
>   	if (!syncobj)
>   		return -ENOENT;
>   
> -	*fence = drm_syncobj_fence_get(syncobj);
> +	drm_syncobj_timeline_garbage_collection(syncobj);
> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
> +		/*NORMAL syncobj always wait on last pt */
> +		u64 tail_pt_value = syncobj->syncobj_timeline.signal_point;
> +
> +		if (tail_pt_value == 0)
> +			tail_pt_value += DRM_SYNCOBJ_NORMAL_POINT;
> +		/* NORMAL syncobj doesn't care point value */
> +		WARN_ON(point != 0);
> +		*fence = drm_syncobj_timeline_point_get(syncobj, tail_pt_value,
> +							flags);
> +	} else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
> +							flags);
> +	} else {
> +		DRM_ERROR("Don't support this type syncobj\n");
> +		*fence = NULL;
> +	}
>   	if (!*fence) {
>   		ret = -EINVAL;
>   	}
> +	return ret;
> +}
> +EXPORT_SYMBOL(drm_syncobj_search_fence);
> +int drm_syncobj_find_fence(struct drm_file *file_private,
> +			   u32 handle, u64 point,
> +			   struct dma_fence **fence) {
> +	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> +
> +	int ret = drm_syncobj_search_fence(syncobj, point,
> +					DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +					fence);
>   	drm_syncobj_put(syncobj);
>   	return ret;
>   }
> @@ -264,7 +597,7 @@ void drm_syncobj_free(struct kref *kref)
>   	struct drm_syncobj *syncobj = container_of(kref,
>   						   struct drm_syncobj,
>   						   refcount);
> -	drm_syncobj_replace_fence(syncobj, 0, NULL);
> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>   	kfree(syncobj);
>   }
>   EXPORT_SYMBOL(drm_syncobj_free);
> @@ -292,8 +625,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   		return -ENOMEM;
>   
>   	kref_init(&syncobj->refcount);
> -	INIT_LIST_HEAD(&syncobj->cb_list);
>   	spin_lock_init(&syncobj->lock);
> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
> +	else
> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
> +	drm_syncobj_timeline_init(syncobj, &syncobj->syncobj_timeline);
>   
>   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>   		ret = drm_syncobj_assign_null_handle(syncobj);
> @@ -651,7 +988,6 @@ struct syncobj_wait_entry {
>   	struct task_struct *task;
>   	struct dma_fence *fence;
>   	struct dma_fence_cb fence_cb;
> -	struct drm_syncobj_cb syncobj_cb;
>   };
>   
>   static void syncobj_wait_fence_func(struct dma_fence *fence,
> @@ -663,18 +999,6 @@ static void syncobj_wait_fence_func(struct dma_fence *fence,
>   	wake_up_process(wait->task);
>   }
>   
> -static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
> -				      struct drm_syncobj_cb *cb)
> -{
> -	struct syncobj_wait_entry *wait =
> -		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
> -
> -	/* This happens inside the syncobj lock */
> -	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
> -							      lockdep_is_held(&syncobj->lock)));
> -	wake_up_process(wait->task);
> -}
> -
>   static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   						  uint32_t count,
>   						  uint32_t flags,
> @@ -698,14 +1022,11 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   	signaled_count = 0;
>   	for (i = 0; i < count; ++i) {
>   		entries[i].task = current;
> -		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
> +		ret = drm_syncobj_search_fence(syncobjs[i], 0, 0,
> +					       &entries[i].fence);
>   		if (!entries[i].fence) {
> -			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> -				continue;
> -			} else {
> -				ret = -EINVAL;
> -				goto cleanup_entries;
> -			}
> +			ret = -EINVAL;
> +			goto cleanup_entries;
>   		}
>   
>   		if (dma_fence_is_signaled(entries[i].fence)) {
> @@ -733,15 +1054,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   	 * fallthough and try a 0 timeout wait!
>   	 */
>   
> -	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> -		for (i = 0; i < count; ++i) {
> -			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
> -							      &entries[i].fence,
> -							      &entries[i].syncobj_cb,
> -							      syncobj_wait_syncobj_func);
> -		}
> -	}
> -
>   	do {
>   		set_current_state(TASK_INTERRUPTIBLE);
>   
> @@ -789,13 +1101,10 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   
>   cleanup_entries:
>   	for (i = 0; i < count; ++i) {
> -		if (entries[i].syncobj_cb.func)
> -			drm_syncobj_remove_callback(syncobjs[i],
> -						    &entries[i].syncobj_cb);
> +		dma_fence_put(entries[i].fence);
>   		if (entries[i].fence_cb.func)
>   			dma_fence_remove_callback(entries[i].fence,
>   						  &entries[i].fence_cb);
> -		dma_fence_put(entries[i].fence);
>   	}
>   	kfree(entries);
>   
> @@ -970,12 +1279,21 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	if (ret < 0)
>   		return ret;
>   
> -	for (i = 0; i < args->count_handles; i++)
> -		drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
> -
> +	for (i = 0; i < args->count_handles; i++) {
> +		if (syncobjs[i]->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> +			DRM_ERROR("timeline syncobj cannot reset!\n");
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		drm_syncobj_timeline_fini(syncobjs[i],
> +					  &syncobjs[i]->syncobj_timeline);
> +		drm_syncobj_timeline_init(syncobjs[i],
> +					  &syncobjs[i]->syncobj_timeline);
> +	}
> +out:
>   	drm_syncobj_array_free(syncobjs, args->count_handles);
>   
> -	return 0;
> +	return ret;
>   }
>   
>   int
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0a8d2d64f380..579e91a5858b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2137,7 +2137,9 @@ await_fence_array(struct i915_execbuffer *eb,
>   		if (!(flags & I915_EXEC_FENCE_WAIT))
>   			continue;
>   
> -		fence = drm_syncobj_fence_get(syncobj);
> +		drm_syncobj_search_fence(syncobj, 0,
> +					 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +					 &fence);
>   		if (!fence)
>   			return -EINVAL;
>   
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index 425432b85a87..657c97dc25ec 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -30,6 +30,25 @@
>   
>   struct drm_syncobj_cb;
>   
> +enum drm_syncobj_type {
> +	DRM_SYNCOBJ_TYPE_NORMAL,
> +	DRM_SYNCOBJ_TYPE_TIMELINE
> +};
> +
> +struct drm_syncobj_timeline {
> +	wait_queue_head_t	wq;
> +	u64 timeline_context;
> +	/**
> +	 * @timeline: syncobj timeline
> +	 */
> +	u64 timeline;
> +	u64 signal_point;
> +
> +
> +	struct rb_root wait_pt_tree;
> +	struct list_head signal_pt_list;
> +};
> +
>   /**
>    * struct drm_syncobj - sync object.
>    *
> @@ -41,19 +60,16 @@ struct drm_syncobj {
>   	 */
>   	struct kref refcount;
>   	/**
> -	 * @fence:
> -	 * NULL or a pointer to the fence bound to this object.
> -	 *
> -	 * This field should not be used directly. Use drm_syncobj_fence_get()
> -	 * and drm_syncobj_replace_fence() instead.
> +	 * @type: indicate syncobj type
>   	 */
> -	struct dma_fence __rcu *fence;
> +	enum drm_syncobj_type type;
>   	/**
> -	 * @cb_list: List of callbacks to call when the &fence gets replaced.
> +	 * @syncobj_timeline: timeline
>   	 */
> -	struct list_head cb_list;
> +	struct drm_syncobj_timeline syncobj_timeline;
> +
>   	/**
> -	 * @lock: Protects &cb_list and write-locks &fence.
> +	 * @lock: Protects timeline list and write-locks &fence.
>   	 */
>   	spinlock_t lock;
>   	/**
> @@ -62,25 +78,6 @@ struct drm_syncobj {
>   	struct file *file;
>   };
>   
> -typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
> -				   struct drm_syncobj_cb *cb);
> -
> -/**
> - * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
> - * @node: used by drm_syncob_add_callback to append this struct to
> - *	  &drm_syncobj.cb_list
> - * @func: drm_syncobj_func_t to call
> - *
> - * This struct will be initialized by drm_syncobj_add_callback, additional
> - * data can be passed along by embedding drm_syncobj_cb in another struct.
> - * The callback will get called the next time drm_syncobj_replace_fence is
> - * called.
> - */
> -struct drm_syncobj_cb {
> -	struct list_head node;
> -	drm_syncobj_func_t func;
> -};
> -
>   void drm_syncobj_free(struct kref *kref);
>   
>   /**
> @@ -106,29 +103,6 @@ drm_syncobj_put(struct drm_syncobj *obj)
>   	kref_put(&obj->refcount, drm_syncobj_free);
>   }
>   
> -/**
> - * drm_syncobj_fence_get - get a reference to a fence in a sync object
> - * @syncobj: sync object.
> - *
> - * This acquires additional reference to &drm_syncobj.fence contained in @obj,
> - * if not NULL. It is illegal to call this without already holding a reference.
> - * No locks required.
> - *
> - * Returns:
> - * Either the fence of @obj or NULL if there's none.
> - */
> -static inline struct dma_fence *
> -drm_syncobj_fence_get(struct drm_syncobj *syncobj)
> -{
> -	struct dma_fence *fence;
> -
> -	rcu_read_lock();
> -	fence = dma_fence_get_rcu_safe(&syncobj->fence);
> -	rcu_read_unlock();
> -
> -	return fence;
> -}
> -
>   struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>   				     u32 handle);
>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point,
> @@ -142,5 +116,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   int drm_syncobj_get_handle(struct drm_file *file_private,
>   			   struct drm_syncobj *syncobj, u32 *handle);
>   int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
> +			     u64 flags, struct dma_fence **fence);
>   
>   #endif
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 300f336633f2..cebdb2541eb7 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -717,6 +717,7 @@ struct drm_prime_handle {
>   struct drm_syncobj_create {
>   	__u32 handle;
>   #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
>   	__u32 flags;
>   };
>   



More information about the amd-gfx mailing list