[Intel-gfx] [PATCH 12/13] drm/i915: Bump signaler priority on adding a waiter

Tue May 7 12:46:45 UTC 2019

On 03/05/2019 12:52, Chris Wilson wrote:
> The handling of the no-preemption priority level imposes the restriction
> that we need to maintain the implied ordering even though preemption is
> disabled. Otherwise we may end up with an AB-BA deadlock across multiple
> engine due to a real preemption event reordering the no-preemption
> WAITs. To resolve this issue we currently promote all requests to WAIT
> on unsubmission, however this interferes with the timeslicing
> requirement that we do not apply any implicit promotion that will defeat
> the round-robin timeslice list. (If we automatically promote the active
> request it will go back to the head of the queue and not the tail!)
> 
> So we need implicit promotion to prevent reordering around semaphores
> where we are not allowed to preempt, and we must avoid implicit
> promotion on unsubmission. So instead of at unsubmit, if we apply that
> implicit promotion on adding the dependency, we avoid the semaphore
> deadlock and we also reduce the gains made by the promotion for user
> space waiting. Furthermore, by keeping the earlier dependencies at a
> higher level, we reduce the search space for timeslicing without
> altering runtime scheduling too badly (no dependencies at all will be
> assigned a higher priority for rrul).
> 
> v2: Limit the bump to external edges (as originally intended) i.e.
> between contexts and out to the user.
> 
> Testcase: igt/gem_concurrent_blit
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/selftest_lrc.c      | 12 ++++++++----
>   drivers/gpu/drm/i915/i915_request.c         |  9 ---------
>   drivers/gpu/drm/i915/i915_scheduler.c       | 11 +++++++++++
>   drivers/gpu/drm/i915/i915_scheduler_types.h |  3 ++-
>   4 files changed, 21 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 4b042893dc0e..5b3d8e33f1cf 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -98,12 +98,14 @@ static int live_busywait_preempt(void *arg)
>   	ctx_hi = kernel_context(i915);
>   	if (!ctx_hi)
>   		goto err_unlock;
> -	ctx_hi->sched.priority = INT_MAX;
> +	ctx_hi->sched.priority =
> +		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
>   
>   	ctx_lo = kernel_context(i915);
>   	if (!ctx_lo)
>   		goto err_ctx_hi;
> -	ctx_lo->sched.priority = INT_MIN;
> +	ctx_lo->sched.priority =
> +		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
>   
>   	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
>   	if (IS_ERR(obj)) {
> @@ -958,12 +960,14 @@ static int live_preempt_hang(void *arg)
>   	ctx_hi = kernel_context(i915);
>   	if (!ctx_hi)
>   		goto err_spin_lo;
> -	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
> +	ctx_hi->sched.priority =
> +		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
>   
>   	ctx_lo = kernel_context(i915);
>   	if (!ctx_lo)
>   		goto err_ctx_hi;
> -	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
> +	ctx_lo->sched.priority =
> +		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
>   
>   	for_each_engine(engine, i915, id) {
>   		struct i915_request *rq;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 8cb3ed5531e3..065da1bcbb4c 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -468,15 +468,6 @@ void __i915_request_unsubmit(struct i915_request *request)
>   	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>   
> -	/*
> -	 * As we do not allow WAIT to preempt inflight requests,
> -	 * once we have executed a request, along with triggering
> -	 * any execution callbacks, we must preserve its ordering
> -	 * within the non-preemptible FIFO.
> -	 */
> -	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
> -	request->sched.attr.priority |= __NO_PREEMPTION;
> -
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
>   		i915_request_cancel_breadcrumb(request);
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 380cb7343a10..ff0ca5718f97 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -391,6 +391,16 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
>   		    !node_started(signal))
>   			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
>   
> +		/*
> +		 * As we do not allow WAIT to preempt inflight requests,
> +		 * once we have executed a request, along with triggering
> +		 * any execution callbacks, we must preserve its ordering
> +		 * within the non-preemptible FIFO.
> +		 */
> +		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
> +		if (flags & I915_DEPENDENCY_EXTERNAL)
> +			__bump_priority(signal, __NO_PREEMPTION);
> +

I don't really follow how can this be okay from here. It gives wait 
priority to every request which has a dependency now. Which sounds not 
far off from removing the priority bump for waiters altogether. Or 
reversing things and giving requests with no priority a boost.

Regards,

Tvrtko

>   		ret = true;
>   	}
>   
> @@ -409,6 +419,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
>   		return -ENOMEM;
>   
>   	if (!__i915_sched_node_add_dependency(node, signal, dep,
> +					      I915_DEPENDENCY_EXTERNAL |
>   					      I915_DEPENDENCY_ALLOC))
>   		i915_dependency_free(dep);
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index 166a457884b2..3e309631bd0b 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -66,7 +66,8 @@ struct i915_dependency {
>   	struct list_head wait_link;
>   	struct list_head dfs_link;
>   	unsigned long flags;
> -#define I915_DEPENDENCY_ALLOC BIT(0)
> +#define I915_DEPENDENCY_ALLOC		BIT(0)
> +#define I915_DEPENDENCY_EXTERNAL	BIT(1)
>   };
>   
>   #endif /* _I915_SCHEDULER_TYPES_H_ */
>