[Intel-gfx] [PATCH 02/11] drm/i915/execlists: Suppress mere WAIT preemption

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Feb 28 12:33:33 UTC 2019


On 26/02/2019 10:23, Chris Wilson wrote:
> WAIT is occasionally suppressed by virtue of preempted requests being
> promoted to NEWCLIENT if they have not all ready received that boost.
> Make this consistent for all WAIT boosts that they are not allowed to
> preempt executing contexts and are merely granted the right to be at the
> front of the queue for the next execution slot. This is in keeping with
> the desire that the WAIT boost be a minor tweak that does not give
> excessive promotion to its user and open ourselves to trivial abuse.
> 
> The problem with the inconsistent WAIT preemption becomes more apparent
> as the preemption is propagated across the engines, where one engine may
> preempt and the other not, and we be relying on the exact execution
> order being consistent across engines (e.g. using HW semaphores to
> coordinate parallel execution).
> 
> v2: Also protect GuC submission from false preemption loops.
> v3: Build bug safeguards and better debug messages for st.
> v4: Do the priority bumping in unsubmit (i.e. on preemption/reset
> unwind), applying it earlier during submit causes out-of-order execution
> combined with execute fences.
> v5: Call sw_fence_fini for our dummy request (Matthew)
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com> #v3

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko

> Cc: Matthew Auld <matthew.auld at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c         |  15 ++
>   drivers/gpu/drm/i915/i915_scheduler.c       |   1 -
>   drivers/gpu/drm/i915/i915_scheduler.h       |   2 +
>   drivers/gpu/drm/i915/intel_guc_submission.c |   2 +-
>   drivers/gpu/drm/i915/intel_lrc.c            |   9 +-
>   drivers/gpu/drm/i915/selftests/intel_lrc.c  | 163 ++++++++++++++++++++
>   6 files changed, 189 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 935db5548f80..00a1ea7cd907 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -353,11 +353,14 @@ void __i915_request_submit(struct i915_request *request)
>   
>   	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> +
>   	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
>   	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
> +
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
>   	    !i915_request_enable_breadcrumb(request))
>   		intel_engine_queue_breadcrumbs(engine);
> +
>   	spin_unlock(&request->lock);
>   
>   	engine->emit_fini_breadcrumb(request,
> @@ -401,10 +404,22 @@ void __i915_request_unsubmit(struct i915_request *request)
>   
>   	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> +
> +	/*
> +	 * As we do not allow WAIT to preempt inflight requests,
> +	 * once we have executed a request, along with triggering
> +	 * any execution callbacks, we must preserve its ordering
> +	 * within the non-preemptible FIFO.
> +	 */
> +	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
> +	request->sched.attr.priority |= __NO_PREEMPTION;
> +
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
>   		i915_request_cancel_breadcrumb(request);
> +
>   	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
>   	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
> +
>   	spin_unlock(&request->lock);
>   
>   	/* Transfer back from the global per-engine timeline to per-context */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 38efefd22dce..9fb96ff57a29 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -322,7 +322,6 @@ static void __i915_schedule(struct i915_request *rq,
>   			if (node_signaled(p->signaler))
>   				continue;
>   
> -			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
>   			if (prio > READ_ONCE(p->signaler->attr.priority))
>   				list_move_tail(&p->dfs_link, &dfs);
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index dbe9cb7ecd82..54bd6c89817e 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -33,6 +33,8 @@ enum {
>   #define I915_PRIORITY_WAIT	((u8)BIT(0))
>   #define I915_PRIORITY_NEWCLIENT	((u8)BIT(1))
>   
> +#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
> +
>   struct i915_sched_attr {
>   	/**
>   	 * @priority: execution and service priority
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 20cbceeabeae..a2846ea1e62c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -720,7 +720,7 @@ static inline int rq_prio(const struct i915_request *rq)
>   
>   static inline int port_prio(const struct execlist_port *port)
>   {
> -	return rq_prio(port_request(port));
> +	return rq_prio(port_request(port)) | __NO_PREEMPTION;
>   }
>   
>   static bool __guc_dequeue(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index c4f4966b0f4f..0e20f3bc8210 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -188,6 +188,12 @@ static inline int rq_prio(const struct i915_request *rq)
>   	return rq->sched.attr.priority;
>   }
>   
> +static int effective_prio(const struct i915_request *rq)
> +{
> +	/* Restrict mere WAIT boosts from triggering preemption */
> +	return rq_prio(rq) | __NO_PREEMPTION;
> +}
> +
>   static int queue_prio(const struct intel_engine_execlists *execlists)
>   {
>   	struct i915_priolist *p;
> @@ -208,7 +214,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
>   				const struct i915_request *rq)
>   {
> -	const int last_prio = rq_prio(rq);
> +	int last_prio;
>   
>   	if (!intel_engine_has_preemption(engine))
>   		return false;
> @@ -228,6 +234,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   	 * preempt. If that hint is stale or we may be trying to preempt
>   	 * ourselves, ignore the request.
>   	 */
> +	last_prio = effective_prio(rq);
>   	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
>   				      last_prio))
>   		return false;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 0f7a5bf69646..7172f6c7f25a 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -407,6 +407,168 @@ static int live_suppress_self_preempt(void *arg)
>   	goto err_client_b;
>   }
>   
> +static int __i915_sw_fence_call
> +dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +{
> +	return NOTIFY_DONE;
> +}
> +
> +static struct i915_request *dummy_request(struct intel_engine_cs *engine)
> +{
> +	struct i915_request *rq;
> +
> +	rq = kmalloc(sizeof(*rq), GFP_KERNEL | __GFP_ZERO);
> +	if (!rq)
> +		return NULL;
> +
> +	INIT_LIST_HEAD(&rq->active_list);
> +	rq->engine = engine;
> +
> +	i915_sched_node_init(&rq->sched);
> +
> +	/* mark this request as permanently incomplete */
> +	rq->fence.seqno = 1;
> +	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
> +	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
> +	GEM_BUG_ON(i915_request_completed(rq));
> +
> +	i915_sw_fence_init(&rq->submit, dummy_notify);
> +	i915_sw_fence_commit(&rq->submit);
> +
> +	return rq;
> +}
> +
> +static void dummy_request_free(struct i915_request *dummy)
> +{
> +	i915_request_mark_complete(dummy);
> +	i915_sched_node_fini(dummy->engine->i915, &dummy->sched);
> +	i915_sw_fence_fini(&dummy->submit);
> +
> +	dma_fence_free(&dummy->fence);
> +}
> +
> +static int live_suppress_wait_preempt(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct preempt_client client[4];
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
> +	int err = -ENOMEM;
> +	int i;
> +
> +	/*
> +	 * Waiters are given a little priority nudge, but not enough
> +	 * to actually cause any preemption. Double check that we do
> +	 * not needlessly generate preempt-to-idle cycles.
> +	 */
> +
> +	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
> +		goto err_unlock;
> +	if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
> +		goto err_client_0;
> +	if (preempt_client_init(i915, &client[2])) /* head of queue */
> +		goto err_client_1;
> +	if (preempt_client_init(i915, &client[3])) /* bystander */
> +		goto err_client_2;
> +
> +	for_each_engine(engine, i915, id) {
> +		int depth;
> +
> +		if (!engine->emit_init_breadcrumb)
> +			continue;
> +
> +		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
> +			struct i915_request *rq[ARRAY_SIZE(client)];
> +			struct i915_request *dummy;
> +
> +			engine->execlists.preempt_hang.count = 0;
> +
> +			dummy = dummy_request(engine);
> +			if (!dummy)
> +				goto err_client_3;
> +
> +			for (i = 0; i < ARRAY_SIZE(client); i++) {
> +				rq[i] = igt_spinner_create_request(&client[i].spin,
> +								   client[i].ctx, engine,
> +								   MI_NOOP);
> +				if (IS_ERR(rq[i])) {
> +					err = PTR_ERR(rq[i]);
> +					goto err_wedged;
> +				}
> +
> +				/* Disable NEWCLIENT promotion */
> +				__i915_active_request_set(&rq[i]->timeline->last_request,
> +							  dummy);
> +				i915_request_add(rq[i]);
> +			}
> +
> +			dummy_request_free(dummy);
> +
> +			GEM_BUG_ON(i915_request_completed(rq[0]));
> +			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
> +				pr_err("%s: First client failed to start\n",
> +				       engine->name);
> +				goto err_wedged;
> +			}
> +			GEM_BUG_ON(!i915_request_started(rq[0]));
> +
> +			if (i915_request_wait(rq[depth],
> +					      I915_WAIT_LOCKED |
> +					      I915_WAIT_PRIORITY,
> +					      1) != -ETIME) {
> +				pr_err("%s: Waiter depth:%d completed!\n",
> +				       engine->name, depth);
> +				goto err_wedged;
> +			}
> +
> +			for (i = 0; i < ARRAY_SIZE(client); i++)
> +				igt_spinner_end(&client[i].spin);
> +
> +			if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +				goto err_wedged;
> +
> +			if (engine->execlists.preempt_hang.count) {
> +				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
> +				       engine->name,
> +				       engine->execlists.preempt_hang.count,
> +				       depth);
> +				err = -EINVAL;
> +				goto err_client_3;
> +			}
> +		}
> +	}
> +
> +	err = 0;
> +err_client_3:
> +	preempt_client_fini(&client[3]);
> +err_client_2:
> +	preempt_client_fini(&client[2]);
> +err_client_1:
> +	preempt_client_fini(&client[1]);
> +err_client_0:
> +	preempt_client_fini(&client[0]);
> +err_unlock:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +
> +err_wedged:
> +	for (i = 0; i < ARRAY_SIZE(client); i++)
> +		igt_spinner_end(&client[i].spin);
> +	i915_gem_set_wedged(i915);
> +	err = -EIO;
> +	goto err_client_3;
> +}
> +
>   static int live_chain_preempt(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -887,6 +1049,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
>   		SUBTEST(live_suppress_self_preempt),
> +		SUBTEST(live_suppress_wait_preempt),
>   		SUBTEST(live_chain_preempt),
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
> 


More information about the Intel-gfx mailing list