[Intel-gfx] [PATCH 2/8] drm/i915/execlists: Suppress preempting self

Tue Jan 29 10:13:47 UTC 2019

On 29/01/2019 08:58, Chris Wilson wrote:
> In order to avoid preempting ourselves, we currently refuse to schedule
> the tasklet if we reschedule an inflight context. However, this glosses
> over a few issues such as what happens after a CS completion event and
> we then preempt the newly executing context with itself, or if something
> else causes a tasklet_schedule triggering the same evaluation to
> preempt the active context with itself.
> 
> However, when we avoid preempting ELSP[0], we still retain the preemption
> value as it may match a second preemption request within the same time period
> that we need to resolve after the next CS event. However, since we only
> store the maximum preemption priority seen, it may not match the
> subsequent event and so we should double check whether or not we
> actually do need to trigger a preempt-to-idle by comparing the top
> priorities from each queue. Later, this gives us a hook for finer
> control over deciding whether the preempt-to-idle is justified.
> 
> The sequence of events where we end up preempting for no avail is:
> 
> 1. Queue requests/contexts A, B
> 2. Priority boost A; no preemption as it is executing, but keep hint
> 3. After CS switch, B is less than hint, force preempt-to-idle
> 4. Resubmit B after idling
> 
> v2: We can simplify a bunch of tests based on the knowledge that PI will
> ensure that earlier requests along the same context will have the highest
> priority.
> v3: Demonstrate the stale preemption hint with a selftest
> 
> References: a2bf92e8cc16 ("drm/i915/execlists: Avoid kicking priority on the current context")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_scheduler.c       |  20 ++-
>   drivers/gpu/drm/i915/intel_guc_submission.c |   2 +
>   drivers/gpu/drm/i915/intel_lrc.c            |  96 ++++++++++++--
>   drivers/gpu/drm/i915/intel_ringbuffer.h     |   1 +
>   drivers/gpu/drm/i915/selftests/intel_lrc.c  | 138 ++++++++++++++++++++
>   5 files changed, 245 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 0da718ceab43..7db1255665a8 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -238,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   	return engine;
>   }
>   
> +static bool inflight(const struct i915_request *rq,
> +		     const struct intel_engine_cs *engine)
> +{
> +	const struct i915_request *active;
> +
> +	if (!rq->global_seqno)
> +		return false;
> +
> +	active = port_request(engine->execlists.port);
> +	return active->hw_context == rq->hw_context;
> +}
> +
>   static void __i915_schedule(struct i915_request *rq,
>   			    const struct i915_sched_attr *attr)
>   {
> @@ -327,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq,
>   		INIT_LIST_HEAD(&dep->dfs_link);
>   
>   		engine = sched_lock_engine(node, engine);
> +		lockdep_assert_held(&engine->timeline.lock);
>   
>   		/* Recheck after acquiring the engine->timeline.lock */
>   		if (prio <= node->attr.priority || node_signaled(node))
> @@ -355,17 +368,16 @@ static void __i915_schedule(struct i915_request *rq,
>   		if (prio <= engine->execlists.preempt_priority_hint)
>   			continue;
>   
> +		engine->execlists.preempt_priority_hint = prio;
> +
>   		/*
>   		 * If we are already the currently executing context, don't
>   		 * bother evaluating if we should preempt ourselves.
>   		 */
> -		if (node_to_request(node)->global_seqno &&
> -		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> -				      node_to_request(node)->global_seqno))
> +		if (inflight(node_to_request(node), engine))
>   			continue;
>   
>   		/* Defer (tasklet) submission until after all of our updates. */
> -		engine->execlists.preempt_priority_hint = prio;
>   		tasklet_hi_schedule(&engine->execlists.tasklet);
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 6cdd3f097209..55ec122ee9e9 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -629,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work)
>   				       EXECLISTS_ACTIVE_PREEMPT);
>   		tasklet_schedule(&engine->execlists.tasklet);
>   	}
> +
> +	(void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++);
>   }
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9219efa4ee79..b8cddef5aed8 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -188,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq)
>   	return rq->sched.attr.priority;
>   }
>   
> +static int queue_prio(const struct intel_engine_execlists *execlists)
> +{
> +	struct i915_priolist *p;
> +	struct rb_node *rb;
> +
> +	rb = rb_first_cached(&execlists->queue);
> +	if (!rb)
> +		return INT_MIN;
> +
> +	/*
> +	 * As the priolist[] are inverted, with the highest priority in [0],
> +	 * we have to flip the index value to become priority.
> +	 */
> +	p = to_priolist(rb);
> +	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);

Where does the +1 come from?

p->priority stores in the user range. So I expected something like:

	(p->priority << SHIFT) + (COUNT - ffs)

Ok it's the same thing.. my algebra is beyond rusty. :)

> +}
> +
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
> -				const struct i915_request *last,
> -				int prio)
> +				const struct i915_request *rq)
> +{
> +	const int last_prio = rq_prio(rq);
> +
> +	if (!intel_engine_has_preemption(engine))
> +		return false;
> +
> +	if (i915_request_completed(rq))
> +		return false;
> +
> +	/*
> +	 * Check if the current priority hint merits a preemption attempt.
> +	 *
> +	 * We record the highest value priority we saw during rescheduling
> +	 * prior to this dequeue, therefore we know that if it is strictly
> +	 * less than the current tail of ESLP[0], we do not need to force
> +	 * a preempt-to-idle cycle.
> +	 *
> +	 * However, the priority hint is a mere hint that we may need to
> +	 * preempt. If that hint is stale or we may be trying to preempt
> +	 * ourselves, ignore the request.
> +	 */
> +	if (!__execlists_need_preempt(engine->execlists.preempt_priority_hint,
> +				      last_prio))
> +		return false;
> +
> +	/*
> +	 * Check against the first request in ELSP[1], it will, thanks to the
> +	 * power of PI, be the highest priority of that context.
> +	 */
> +	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
> +	    rq_prio(list_next_entry(rq, link)) > last_prio)
> +		return true;
> +
> +	/*
> +	 * If the inflight context did not trigger the preemption, then maybe
> +	 * it was the set of queued requests? Pick the highest priority in
> +	 * the queue (the first active priolist) and see if it deserves to be
> +	 * running instead of ELSP[0].
> +	 *
> +	 * The highest priority request in the queue can not be either
> +	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
> +	 * context, it's priority would not exceed ELSP[0] aka last_prio.
> +	 */
> +	return queue_prio(&engine->execlists) > last_prio;
> +}
> +
> +__maybe_unused static inline bool
> +assert_priority_queue(const struct intel_engine_execlists *execlists,
> +		      const struct i915_request *prev,
> +		      const struct i915_request *next)
>   {
> -	return (intel_engine_has_preemption(engine) &&
> -		__execlists_need_preempt(prio, rq_prio(last)) &&
> -		!i915_request_completed(last));
> +	if (!prev)
> +		return true;
> +
> +	/*
> +	 * Without preemption, the prev may refer to the still active element
> +	 * which we refuse to let go.
> +	 *
> +	 * Even with preemption, there are times when we think it is better not
> +	 * to preempt and leave an ostensibly lower priority request in flight.
> +	 */
> +	if (port_request(execlists->port) == prev)
> +		return true;
> +
> +	return rq_prio(prev) >= rq_prio(next);
>   }
>   
>   /*
> @@ -523,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>   	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> +
> +	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
>   }
>   
>   static void complete_preempt_context(struct intel_engine_execlists *execlists)
> @@ -591,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
>   			return;
>   
> -		if (need_preempt(engine, last, execlists->preempt_priority_hint)) {
> +		if (need_preempt(engine, last)) {
>   			inject_preempt_context(engine);
>   			return;
>   		}
> @@ -637,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		int i;
>   
>   		priolist_for_each_request_consume(rq, rn, p, i) {
> -			GEM_BUG_ON(last &&
> -				   need_preempt(engine, last, rq_prio(rq)));
> +			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
>   
>   			/*
>   			 * Can we combine this request with the current port?
> @@ -884,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine)
>   	const u32 * const buf = execlists->csb_status;
>   	u8 head, tail;
>   
> +	lockdep_assert_held(&engine->timeline.lock);
> +
>   	/*
>   	 * Note that csb_write, csb_status may be either in HWSP or mmio.
>   	 * When reading from the csb_write mmio register, we have to be
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5e2231d0c2fa..5ce54f23a945 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -203,6 +203,7 @@ struct i915_priolist {
>   
>   struct st_preempt_hang {
>   	struct completion completion;
> +	unsigned int count;
>   	bool inject_hang;
>   };
>   
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 2b2ecd76c2ac..58f534a39118 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -268,6 +268,143 @@ static int live_late_preempt(void *arg)
>   	goto err_ctx_lo;
>   }
>   
> +struct preempt_client {
> +	struct igt_spinner spin;
> +	struct i915_gem_context *ctx;
> +};
> +
> +static int preempt_client_init(struct drm_i915_private *i915,
> +			       struct preempt_client *c)
> +{
> +	c->ctx = kernel_context(i915);
> +	if (!c->ctx)
> +		return -ENOMEM;
> +
> +	if (igt_spinner_init(&c->spin, i915))
> +		goto err_ctx;
> +
> +	return 0;
> +
> +err_ctx:
> +	kernel_context_close(c->ctx);
> +	return -ENOMEM;
> +}
> +
> +static void preempt_client_fini(struct preempt_client *c)
> +{
> +	igt_spinner_fini(&c->spin);
> +	kernel_context_close(c->ctx);
> +}
> +
> +static int live_suppress_preempt(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *engine;
> +	struct i915_sched_attr attr = {
> +		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
> +	};
> +	struct preempt_client a, b;
> +	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
> +	int err = -ENOMEM;
> +
> +	/*
> +	 * Verify that if a preemption request does not cause a change in
> +	 * the current execution order, the preempt-to-idle injection is
> +	 * skipped and that we do not accidentally apply it after the CS
> +	 * completion event.
> +	 */
> +
> +	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> +		return 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0; /* presume black blox */
Pity but future proof I suppose. :/

Even though you bothered to add the counter into the guc path.

Good it's blox and not something worse. :)

> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +
> +	if (preempt_client_init(i915, &a))
> +		goto err_unlock;
> +	if (preempt_client_init(i915, &b))
> +		goto err_client_a;
> +
> +	for_each_engine(engine, i915, id) {
> +		struct i915_request *rq_a, *rq_b;
> +		int depth;
> +
> +		engine->execlists.preempt_hang.count = 0;
> +
> +		rq_a = igt_spinner_create_request(&a.spin,
> +						  a.ctx, engine,
> +						  MI_NOOP);
> +		if (IS_ERR(rq_a)) {
> +			err = PTR_ERR(rq_a);
> +			goto err_client_b;
> +		}
> +
> +		i915_request_add(rq_a);
> +		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
> +			pr_err("First client failed to start\n");
> +			goto err_wedged;
> +		}
> +
> +		for (depth = 0; depth < 8; depth++) {
> +			rq_b = igt_spinner_create_request(&b.spin,
> +							  b.ctx, engine,
> +							  MI_NOOP);
> +			if (IS_ERR(rq_b)) {
> +				err = PTR_ERR(rq_b);
> +				goto err_client_b;
> +			}
> +			i915_request_add(rq_b);
> +
> +			GEM_BUG_ON(i915_request_completed(rq_a));
> +			engine->schedule(rq_a, &attr);
> +			igt_spinner_end(&a.spin);
> +
> +			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
> +				pr_err("Second client failed to start\n");
> +				goto err_wedged;
> +			}
> +
> +			swap(a, b);
> +			rq_a = rq_b;

Could store rq in the clients as well I think.

> +		}
> +		igt_spinner_end(&a.spin);
> +
> +		if (engine->execlists.preempt_hang.count) {
> +			pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
> +			       engine->execlists.preempt_hang.count,
> +			       depth);
> +			err = -EINVAL;
> +			goto err_client_b;
> +		}
> +
> +		if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +			goto err_wedged;
> +	}
> +
> +	err = 0;
> +err_client_b:
> +	preempt_client_fini(&b);
> +err_client_a:
> +	preempt_client_fini(&a);
> +err_unlock:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +	intel_runtime_pm_put(i915, wakeref);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +
> +err_wedged:
> +	igt_spinner_end(&b.spin);
> +	igt_spinner_end(&a.spin);
> +	i915_gem_set_wedged(i915);
> +	err = -EIO;
> +	goto err_client_b;
> +}
> +
>   static int live_preempt_hang(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_sanitycheck),
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
> +		SUBTEST(live_suppress_preempt),
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   	};
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko