[PATCH 2/2] [RFC]drm: add syncobj timeline support

zhoucm1 zhoucm1 at amd.com
Wed Aug 22 09:59:37 UTC 2018



On 2018年08月22日 17:31, Daniel Vetter wrote:
> On Wed, Aug 22, 2018 at 05:28:17PM +0800, zhoucm1 wrote:
>>
>> On 2018年08月22日 17:24, Daniel Vetter wrote:
>>> On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
>>>> VK_KHR_timeline_semaphore:
>>>> This extension introduces a new type of semaphore that has an integer payload
>>>> identifying a point in a timeline. Such timeline semaphores support the
>>>> following operations:
>>>>     * Host query - A host operation that allows querying the payload of the
>>>>       timeline semaphore.
>>>>     * Host wait - A host operation that allows a blocking wait for a
>>>>       timeline semaphore to reach a specified value.
>>>>     * Device wait - A device operation that allows waiting for a
>>>>       timeline semaphore to reach a specified value.
>>>>     * Device signal - A device operation that allows advancing the
>>>>       timeline semaphore to a specified value.
>>>>
>>>> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT.
>>>> a. signal PT design:
>>>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled,
>>>> the timeline will increase to value of PT[N].
>>>> b. wait PT design:
>>>> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare
>>>> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be
>>>> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline,
>>>> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to
>>>> perform that.
>>>>
>>>> TODO:
>>>> CPU query and wait on timeline semaphore.
>>> Another TODO: igt testcase for all the cornercases. We already have
>>> other syncobj tests in there.
>> Yes, I'm also trying to find where test should be wrote, Could you give a
>> directory?
> There's already tests/syncobj_basic.c and tests/syncobj_wait.c. Either
> extend those, or probably better to start a new tests/syncobj_timeline.c
> since I expect this will have a lot of corner-cases we need to check.
I failed to find them in both kernel and libdrm, Could you point which 
test you said?

Thanks,
David Zhou
> -Daniel
>
>> Thanks,
>> David Zhou
>>> That would also help with understanding how this is supposed to be used,
>>> since I'm a bit too dense to immediately get your algorithm by just
>>> staring at the code.
>>>
>>>
>>>> Change-Id: I9f09aae225e268442c30451badac40406f24262c
>>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>>> Cc: Christian Konig <christian.koenig at amd.com>
>>>> Cc: Dave Airlie <airlied at redhat.com>
>>>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>>>>    drivers/gpu/drm/drm_syncobj.c          | 385 ++++++++++++++++++++++++++++++++-
>>>>    drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>>>>    drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>>>>    include/drm/drm_syncobj.h              |  45 +++-
>>>>    include/uapi/drm/drm.h                 |   3 +-
>>>>    6 files changed, 435 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index d42d1c8f78f6..463cc8960723 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -1105,7 +1105,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>>>    {
>>>>    	int r;
>>>>    	struct dma_fence *fence;
>>>> -	r = drm_syncobj_find_fence(p->filp, handle, &fence);
>>>> +	r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>>>>    	if (r)
>>>>    		return r;
>>>> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>>>>    {
>>>>    	int i;
>>>> -	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
>>>> -		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
>>>> +	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
>>>> +		drm_syncobj_signal_fence(p->post_dep_syncobjs[i], p->fence, 0);
>>>> +	}
>>>>    }
>>>>    static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>> index 70af32d0def1..3709f36c901e 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>>>    }
>>>>    EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>> +static void drm_syncobj_timeline_signal_submission_fences(struct drm_syncobj *syncobj)
>>>> +{
>>>> +	struct rb_node *node = NULL;
>>>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +	spin_lock(&syncobj->lock);
>>>> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>> +	    node != NULL; node = rb_next(node)) {
>>>> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +	    if (wait_pt->value <= syncobj->syncobj_timeline.signal_point) {
>>>> +		    if (wait_pt->submission_fence)
>>>> +			dma_fence_signal(&wait_pt->submission_fence->base);
>>>> +	    } else {
>>>> +		    /* the loop is from left to right, the later entry value is
>>>> +		     * bigger, so don't need to check any more */
>>>> +		    break;
>>>> +	    }
>>> This seems to reinvet syncobj->cb_list. Since this is the exact same
>>> "future fence that doesn't even exist yet" problem I think those two code
>>> path should be unified. In general I think it'd be much better if we treat
>>> the old syncobj as a timeline with a limit of 1 slot only.
>>>
>>> Or there needs to be a really good reason why all new code.
>>>
>>> Specifically for this stuff here having unified future fence semantics
>>> will allow drivers to do clever stuff with them.
>>>
>>>> +	}
>>>> +	spin_unlock(&syncobj->lock);
>>>> +}
>>>> +
>>>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj)
>>>> +{
>>>> +	struct rb_node *node = NULL;
>>>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +	spin_lock(&syncobj->lock);
>>>> +	for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>> +	    node != NULL; ) {
>>>> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +		node = rb_next(node);
>>>> +		if (wait_pt->value <= syncobj->syncobj_timeline.timeline) {
>>>> +			dma_fence_signal(&wait_pt->base.base);
>>>> +			rb_erase(&wait_pt->node,
>>>> +				 &syncobj->syncobj_timeline.wait_pt_tree);
>>>> +			RB_CLEAR_NODE(&wait_pt->node);
>>>> +			if (wait_pt->submission_fence)
>>>> +				dma_fence_put(&wait_pt->submission_fence->base);
>>>> +			/* kfree(wait_pt) is excuted by fence put */
>>>> +			dma_fence_put(&wait_pt->base.base);
>>>> +		} else {
>>>> +			/* the loop is from left to right, the later entry value is
>>>> +			 * bigger, so don't need to check any more */
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +	spin_unlock(&syncobj->lock);
>>>> +}
>>>> +
>>>> +
>>>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>>>> +{
>>>> +	struct dma_fence *fence = NULL;
>>>> +	struct drm_syncobj *syncobj;
>>>> +
>>>> +	fence = signal_pt->signal_fence;
>>>> +	signal_pt->signal_fence = NULL;
>>>> +	dma_fence_put(fence);
>>>> +	fence = signal_pt->pre_pt_base;
>>>> +	signal_pt->pre_pt_base = NULL;
>>>> +	dma_fence_put(fence);
>>>> +
>>>> +	syncobj = signal_pt->syncobj;
>>>> +	spin_lock(&syncobj->lock);
>>>> +	list_del(&signal_pt->list);
>>>> +	syncobj->syncobj_timeline.timeline = signal_pt->value;
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	/* kfree(signal_pt) will be  executed by below fence put */
>>>> +	dma_fence_put(&signal_pt->base.base);
>>>> +	drm_syncobj_timeline_signal_wait_pts(syncobj);
>>>> +}
>>>> +static void pt_signal_fence_func(struct dma_fence *fence,
>>>> +				 struct dma_fence_cb *cb)
>>>> +{
>>>> +	struct drm_syncobj_signal_pt *signal_pt =
>>>> +		container_of(cb, struct drm_syncobj_signal_pt, signal_cb);
>>>> +
>>>> +	if (signal_pt->pre_pt_base &&
>>>> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> +		return;
>>>> +
>>>> +	pt_fence_cb(signal_pt);
>>>> +}
>>>> +static void pt_pre_fence_func(struct dma_fence *fence,
>>>> +				 struct dma_fence_cb *cb)
>>>> +{
>>>> +	struct drm_syncobj_signal_pt *signal_pt =
>>>> +		container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb);
>>>> +
>>>> +	if (signal_pt->signal_fence &&
>>>> +	    !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> +		return;
>>>> +
>>>> +	pt_fence_cb(signal_pt);
>>>> +}
>>>> +
>>>> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj *syncobj,
>>>> +					     struct dma_fence *fence,
>>>> +					     u64 point)
>>>> +{
>>>> +	struct drm_syncobj_signal_pt *signal_pt =
>>>> +		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>>>> +	struct drm_syncobj_signal_pt *tail_pt;
>>>> +	struct dma_fence *tail_pt_fence = NULL;
>>>> +	int ret = 0;
>>>> +
>>>> +	if (!signal_pt)
>>>> +		return -ENOMEM;
>>>> +	if (syncobj->syncobj_timeline.signal_point >= point) {
>>>> +		DRM_WARN("A later signal is ready!");
>>>> +		goto out;
>>>> +	}
>>>> +	if (fence)
>>>> +		dma_fence_get(fence);
>>>> +	spin_lock(&syncobj->lock);
>>>> +	spin_lock_init(&signal_pt->base.lock);
>>>> +	dma_fence_init(&signal_pt->base.base,
>>>> +		       &drm_syncobj_stub_fence_ops,
>>>> +		       &signal_pt->base.lock,
>>>> +		       syncobj->syncobj_timeline.timeline_context, point);
>>>> +	signal_pt->signal_fence =
>>>> +		rcu_dereference_protected(fence,
>>>> +					  lockdep_is_held(&fence->lock));
>>>> +	if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>>> +		tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>> +					  struct drm_syncobj_signal_pt, list);
>>>> +		tail_pt_fence = &tail_pt->base.base;
>>>> +		if (dma_fence_is_signaled(tail_pt_fence))
>>>> +			tail_pt_fence = NULL;
>>>> +	}
>>>> +	if (tail_pt_fence)
>>>> +		signal_pt->pre_pt_base =
>>>> +			dma_fence_get(rcu_dereference_protected(tail_pt_fence,
>>>> +								lockdep_is_held(&tail_pt_fence->lock)));
>>>> +
>>>> +	signal_pt->value = point;
>>>> +	syncobj->syncobj_timeline.signal_point = point;
>>>> +	signal_pt->syncobj = syncobj;
>>>> +	INIT_LIST_HEAD(&signal_pt->list);
>>>> +	list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list);
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	drm_syncobj_timeline_signal_submission_fences(syncobj);
>>>> +	/**
>>>> +	 * Every pt is depending on signal fence and previous pt fence, add
>>>> +	 * callbacks to them
>>>> +	 */
>>>> +	if (!dma_fence_is_signaled(signal_pt->signal_fence))
>>>> +		dma_fence_add_callback(signal_pt->signal_fence,
>>>> +				       &signal_pt->signal_cb,
>>>> +				       pt_signal_fence_func);
>>>> +	else
>>>> +		pt_signal_fence_func(signal_pt->signal_fence,
>>>> +				     &signal_pt->signal_cb);
>>>> +	if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>> +		dma_fence_add_callback(signal_pt->pre_pt_base,
>>>> +				       &signal_pt->pre_pt_cb,
>>>> +				       pt_pre_fence_func);
>>>> +	else
>>>> +		pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb);
>>>> +
>>>> +
>>>> +	return 0;
>>>> +out:
>>>> +	kfree(signal_pt);
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +/**
>>>> + * drm_syncobj_signal_fence - place fence into a sync object.
>>>> + * @syncobj: Sync object to replace fence in
>>>> + * @fence: fence to install in sync file.
>>>> + *
>>>> + * This places the fence into normal or timeline sync object
>>>> + */
>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>> +			      struct dma_fence *fence,
>>>> +			      u64 point)
>>> This is a very confusion function name. Feels like
>>> drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
>>> just extend replace_fence like you do with find_fence already.
>>>
>>>> +{
>>>> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>>>> +		drm_syncobj_replace_fence(syncobj, fence);
>>>> +	else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
>>>> +		drm_syncobj_timeline_signal_fence(syncobj, fence, point);
>>>> +}
>>>> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
>>>> +
>>>>    static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>>    {
>>>>    	struct drm_syncobj_stub_fence *fence;
>>>> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>>    	return 0;
>>>>    }
>>>> +static struct drm_syncobj_wait_pt *
>>>> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point)
>>>> +{
>>>> +    struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +
>>>> +
>>>> +    spin_lock(&syncobj->lock);
>>>> +    while(node) {
>>>> +	    int result = point - wait_pt->value;
>>>> +
>>>> +	    wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +	    if (result < 0)
>>>> +		    node = node->rb_left;
>>>> +	    else if (result > 0)
>>>> +		    node = node->rb_right;
>>>> +	    else
>>>> +		    break;
>>>> +    }
>>>> +    spin_unlock(&syncobj->lock);
>>>> +
>>>> +    return wait_pt;
>>>> +}
>>>> +
>>>> +static struct drm_syncobj_wait_pt *
>>>> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point)
>>>> +{
>>>> +	struct drm_syncobj_wait_pt *wait_pt;
>>>> +	struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>>> +
>>>> +	wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
>>>> +	if (!wait_pt)
>>>> +		return NULL;
>>>> +	spin_lock_init(&wait_pt->base.lock);
>>>> +	dma_fence_init(&wait_pt->base.base,
>>>> +		       &drm_syncobj_stub_fence_ops,
>>>> +		       &wait_pt->base.lock,
>>>> +		       syncobj->syncobj_timeline.timeline_context, point);
>>>> +	wait_pt->submission_fence = NULL;
>>>> +	/* check if the pt is already sumbitted, if yes, then don't need to
>>>> +	 * create submission fence, otherwise create it */
>>>> +	if (point > syncobj->syncobj_timeline.signal_point) {
>>>> +		wait_pt->submission_fence =
>>>> +			kzalloc(sizeof(struct drm_syncobj_stub_fence),
>>>> +				GFP_KERNEL);
>>>> +		if (!wait_pt->submission_fence) {
>>>> +			dma_fence_put(&wait_pt->base.base);
>>>> +			return NULL;
>>>> +		}
>>>> +		spin_lock_init(&wait_pt->submission_fence->lock);
>>>> +		dma_fence_init(&wait_pt->submission_fence->base,
>>>> +			       &drm_syncobj_stub_fence_ops,
>>>> +			       &wait_pt->submission_fence->lock,
>>>> +			       syncobj->syncobj_timeline.submission_context,
>>>> +			       point);
>>>> +	}
>>>> +	wait_pt->value = point;
>>>> +
>>>> +	/* wait pt must be in an order, so that we can easily lookup and signal
>>>> +	 * it */
>>>> +	spin_lock(&syncobj->lock);
>>>> +	if (point <= syncobj->syncobj_timeline.timeline)
>>>> +		dma_fence_signal(&wait_pt->base.base);
>>>> +	if (wait_pt->submission_fence &&
>>>> +	    point <= syncobj->syncobj_timeline.signal_point)
>>>> +		dma_fence_signal(&wait_pt->submission_fence->base);
>>>> +	while(*new) {
>>>> +		struct drm_syncobj_wait_pt *this =
>>>> +			rb_entry(*new, struct drm_syncobj_wait_pt, node);
>>>> +		int result = wait_pt->value - this->value;
>>>> +
>>>> +		parent = *new;
>>>> +		if (result < 0)
>>>> +			new = &((*new)->rb_left);
>>>> +		else if (result > 0)
>>>> +			new = &((*new)->rb_right);
>>>> +		else
>>>> +			goto exist;
>>>> +	}
>>>> +
>>>> +	rb_link_node(&wait_pt->node, parent, new);
>>>> +	rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree);
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	return wait_pt;
>>>> +exist:
>>>> +	spin_unlock(&syncobj->lock);
>>>> +	if (wait_pt->submission_fence)
>>>> +		dma_fence_put(&wait_pt->submission_fence->base);
>>>> +	dma_fence_put(&wait_pt->base.base);
>>>> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>> +	return wait_pt;
>>>> +}
>>>> +
>>>> +static struct dma_fence *
>>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag)
>>>> +{
>>>> +	struct drm_syncobj_wait_pt *wait_pt;
>>>> +
>>>> +	/* already signaled, simply return a signaled stub fence */
>>>> +	if (point <= syncobj->syncobj_timeline.timeline) {
>>>> +		struct drm_syncobj_stub_fence *fence;
>>>> +
>>>> +		fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>>> +		if (fence == NULL)
>>>> +			return NULL;
>>>> +
>>>> +		spin_lock_init(&fence->lock);
>>>> +		dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops,
>>>> +			       &fence->lock, 0, 0);
>>>> +		dma_fence_signal(&fence->base);
>>>> +		return &fence->base;
>>>> +	}
>>>> +
>>>> +	/* check if the wait pt exists */
>>>> +	wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>> +	if (!wait_pt) {
>>>> +		/* This is a new wait pt, so create it */
>>>> +		wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point);
>>>> +		if (!wait_pt)
>>>> +			return NULL;
>>>> +	}
>>>> +	if (wait_pt) {
>>>> +		struct dma_fence *fence;
>>>> +		if (wait_pt->submission_fence) {
>>>> +			if (flag & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>>> +				dma_fence_wait(&wait_pt->submission_fence->base, true);
>>>> +			else if (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
>>>> +				return NULL;
>>>> +		}
>>>> +		rcu_read_lock();
>>>> +		fence = dma_fence_get_rcu(&wait_pt->base.base);
>>>> +		rcu_read_unlock();
>>>> +		return fence;
>>>> +	}
>>>> +	return NULL;
>>>> +}
>>>> +
>>>>    /**
>>>>     * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>>>>     * @file_private: drm file private pointer
>>>>     * @handle: sync object handle to lookup.
>>>>     * @fence: out parameter for the fence
>>>> + * @point: timeline point
>>>>     *
>>>>     * This is just a convenience function that combines drm_syncobj_find() and
>>>>     * drm_syncobj_fence_get().
>>>> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>>     */
>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    			   u32 handle,
>>>> -			   struct dma_fence **fence)
>>>> +			   struct dma_fence **fence,
>>>> +			   u64 point)
>>>>    {
>>>>    	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>>>>    	int ret = 0;
>>>> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    	if (!syncobj)
>>>>    		return -ENOENT;
>>>> -	*fence = drm_syncobj_fence_get(syncobj);
>>>> +	if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>>> +		*fence = drm_syncobj_fence_get(syncobj);
>>> Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
>>> driver bug. Or userspace bug, in which case we need to return -EINVAL;
>>>
>>> Hard to tell without the driver implementation.
>>>
>>>> +	}else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>> +		*fence = drm_syncobj_timeline_point_get(syncobj, point,
>>>> +							DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
>>>> +	} else {
>>>> +		DRM_ERROR("invalid syncobj type\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>>    	if (!*fence) {
>>>>    		ret = -EINVAL;
>>>>    	}
>>>> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    }
>>>>    EXPORT_SYMBOL(drm_syncobj_find_fence);
>>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>>> +				      struct drm_syncobj_timeline *syncobj_timeline)
>>>> +{
>>>> +	struct rb_node *node = NULL;
>>>> +	struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>> +	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>>> +
>>>> +	spin_lock(&syncobj->lock);
>>>> +	for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>>>> +	    node != NULL; ) {
>>>> +		wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>> +		node = rb_next(node);
>>>> +		rb_erase(&wait_pt->node,
>>>> +			 &syncobj_timeline->wait_pt_tree);
>>>> +		RB_CLEAR_NODE(&wait_pt->node);
>>>> +		if (wait_pt->submission_fence)
>>>> +			dma_fence_put(&wait_pt->submission_fence->base);
>>>> +		/* kfree(wait_pt) is excuted by fence put */
>>>> +		dma_fence_put(&wait_pt->base.base);
>>>> +	}
>>>> +	list_for_each_entry_safe(signal_pt, tmp,
>>>> +				 &syncobj_timeline->signal_pt_list, list) {
>>>> +		list_del(&signal_pt->list);
>>>> +		dma_fence_put(signal_pt->signal_fence);
>>>> +		dma_fence_put(signal_pt->pre_pt_base);
>>>> +		dma_fence_put(&signal_pt->base.base);
>>>> +	}
>>>> +	spin_unlock(&syncobj->lock);
>>>> +}
>>>>    /**
>>>>     * drm_syncobj_free - free a sync object.
>>>>     * @kref: kref to free.
>>>> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>>>>    						   struct drm_syncobj,
>>>>    						   refcount);
>>>>    	drm_syncobj_replace_fence(syncobj, NULL);
>>>> +	drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>>>    	kfree(syncobj);
>>>>    }
>>>>    EXPORT_SYMBOL(drm_syncobj_free);
>>>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>>>> +				      *syncobj_timeline)
>>>> +{
>>>> +	syncobj_timeline->timeline_context = dma_fence_context_alloc(1);
>>>> +	syncobj_timeline->submission_context = dma_fence_context_alloc(1);
>>>> +	syncobj_timeline->timeline = 0;
>>>> +	syncobj_timeline->signal_point = 0;
>>>> +
>>>> +	syncobj_timeline->wait_pt_tree = RB_ROOT;
>>>> +	INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>>> +}
>>>>    /**
>>>>     * drm_syncobj_create - create a new syncobj
>>>>     * @out_syncobj: returned syncobj
>>>> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>>>    	kref_init(&syncobj->refcount);
>>>>    	INIT_LIST_HEAD(&syncobj->cb_list);
>>>>    	spin_lock_init(&syncobj->lock);
>>>> +	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
>>>> +		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>>> +		drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>> +	} else {
>>>> +		syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>>> +	}
>>>>    	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>>    		ret = drm_syncobj_assign_null_handle(syncobj);
>>>> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct drm_file *file_private,
>>>>    	if (fd < 0)
>>>>    		return fd;
>>>> -	ret = drm_syncobj_find_fence(file_private, handle, &fence);
>>>> +	ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>>>>    	if (ret)
>>>>    		goto err_put_fd;
>>>> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
>>>> index 9029590267aa..f24c3eccb4d5 100644
>>>> --- a/drivers/gpu/drm/v3d/v3d_gem.c
>>>> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
>>>> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>>>>    	kref_init(&exec->refcount);
>>>>    	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
>>>> -				     &exec->bin.in_fence);
>>>> +				     &exec->bin.in_fence, 0);
>>>>    	if (ret == -EINVAL)
>>>>    		goto fail;
>>>>    	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
>>>> -				     &exec->render.in_fence);
>>>> +				     &exec->render.in_fence, 0);
>>>>    	if (ret == -EINVAL)
>>>>    		goto fail;
>>>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
>>>> index 7910b9acedd6..f7b4971342e8 100644
>>>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>>>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>>>> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>>>>    	if (args->in_sync) {
>>>>    		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
>>>> -					     &in_fence);
>>>> +					     &in_fence, 0);
>>>>    		if (ret)
>>>>    			goto fail;
>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>> index b04c492ddbb5..b6e193c6cc9a 100644
>>>> --- a/include/drm/drm_syncobj.h
>>>> +++ b/include/drm/drm_syncobj.h
>>>> @@ -54,6 +54,41 @@ const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>>>    	.release = NULL,
>>>>    };
>>>> +enum drm_syncobj_type {
>>>> +	DRM_SYNCOBJ_TYPE_NORMAL,
>>>> +	DRM_SYNCOBJ_TYPE_TIMELINE
>>>> +};
>>>> +
>>>> +struct drm_syncobj;
>>>> +struct drm_syncobj_wait_pt {
>>>> +	struct drm_syncobj_stub_fence base;
>>>> +	struct drm_syncobj_stub_fence *submission_fence;
>>>> +	u64    value;
>>>> +	struct rb_node   node;
>>>> +};
>>>> +struct drm_syncobj_signal_pt {
>>>> +	struct drm_syncobj_stub_fence base;
>>>> +	struct dma_fence *signal_fence;
>>>> +	struct dma_fence *pre_pt_base;
>>>> +	struct dma_fence_cb signal_cb;
>>>> +	struct dma_fence_cb pre_pt_cb;
>>>> +	struct drm_syncobj *syncobj;
>>>> +	u64    value;
>>>> +	struct list_head list;
>>>> +};
>>> The above internal structs shouldn't be in the public header.
>>>
>>> Some tiny comments about what each piece is for would also go a long way
>>> towards me being able to understand things.
>>>
>>> Thanks, Daniel
>>>
>>>> +struct drm_syncobj_timeline {
>>>> +	u64 timeline_context;
>>>> +	u64 submission_context;
>>>> +	/**
>>>> +	 * @timeline: syncobj timeline
>>>> +	 */
>>>> +	u64 timeline;
>>>> +	u64 signal_point;
>>>> +
>>>> +
>>>> +	struct rb_root wait_pt_tree;
>>>> +	struct list_head signal_pt_list;
>>>> +};
>>>>    /**
>>>>     * struct drm_syncobj - sync object.
>>>>     *
>>>> @@ -64,6 +99,11 @@ struct drm_syncobj {
>>>>    	 * @refcount: Reference count of this object.
>>>>    	 */
>>>>    	struct kref refcount;
>>>> +	/**
>>>> +	 * @type: indicate syncobj type
>>>> +	 */
>>>> +	enum drm_syncobj_type type;
>>>> +	struct drm_syncobj_timeline syncobj_timeline;
>>>>    	/**
>>>>    	 * @fence:
>>>>    	 * NULL or a pointer to the fence bound to this object.
>>>> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>>>    				 struct drm_syncobj_cb *cb);
>>>>    void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>>>    			       struct dma_fence *fence);
>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>> +			      struct dma_fence *fence,
>>>> +			      u64 point);
>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>    			   u32 handle,
>>>> -			   struct dma_fence **fence);
>>>> +			   struct dma_fence **fence, u64 point);
>>>>    void drm_syncobj_free(struct kref *kref);
>>>>    int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>>>    		       struct dma_fence *fence);
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index 300f336633f2..71e6cd1c88f8 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>>>>    struct drm_syncobj_create {
>>>>    	__u32 handle;
>>>>    #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>>>>    	__u32 flags;
>>>>    };
>>>> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>>>>    	__s32 fd;
>>>>    	__u32 pad;
>>>>    };
>>>> -
>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>    struct drm_syncobj_wait {
>>>> -- 
>>>> 2.14.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel



More information about the amd-gfx mailing list