[Intel-gfx] [PATCH 11/15] drm/i915/execlists: Cancel banned contexts on schedule-out

Mon Oct 14 12:00:01 UTC 2019

On 14/10/2019 10:07, Chris Wilson wrote:
> On schedule-out (CS completion) of a banned context, scrub the context
> image so that we do not replay the active payload. The intent is that we
> skip banned payloads on request submission so that the timeline
> advancement continues on in the background. However, if we are returning
> to a preempted request, i915_request_skip() is ineffective and instead we
> need to patch up the context image so that it continues from the start
> of the next request.
> 
> v2: Fixup cancellation so that we only scrub the payload of the active
> request and do not short-circuit the breadcrumbs (which might cause
> other contexts to execute out of order).
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c    | 129 ++++++++----
>   drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
>   2 files changed, 361 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e16ede75412b..b76b35194114 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
>   				     const struct intel_engine_cs *engine,
>   				     const struct intel_ring *ring,
>   				     bool close);
> +static void
> +__execlists_update_reg_state(const struct intel_context *ce,
> +			     const struct intel_engine_cs *engine);
>   
>   static void cancel_timer(struct timer_list *t)
>   {
> @@ -270,6 +273,31 @@ static void mark_eio(struct i915_request *rq)
>   	i915_request_mark_complete(rq);
>   }
>   
> +static struct i915_request *active_request(struct i915_request *rq)
> +{
> +	const struct intel_context * const ce = rq->hw_context;
> +	struct i915_request *active = NULL;
> +	struct list_head *list;
> +
> +	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> +		return rq;
> +
> +	rcu_read_lock();
> +	list = &rcu_dereference(rq->timeline)->requests;
> +	list_for_each_entry_from_reverse(rq, list, link) {
> +		if (i915_request_completed(rq))
> +			break;
> +
> +		if (rq->hw_context != ce)
> +			break;
> +
> +		active = rq;
> +	}
> +	rcu_read_unlock();
> +
> +	return active;
> +}
> +
>   static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
>   {
>   	return (i915_ggtt_offset(engine->status_page.vma) +
> @@ -991,6 +1019,56 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>   		tasklet_schedule(&ve->base.execlists.tasklet);
>   }
>   
> +static void restore_default_state(struct intel_context *ce,
> +				  struct intel_engine_cs *engine)
> +{
> +	u32 *regs = ce->lrc_reg_state;
> +
> +	if (engine->pinned_default_state)
> +		memcpy(regs, /* skip restoring the vanilla PPHWSP */
> +		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> +		       engine->context_size - PAGE_SIZE);
> +
> +	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> +}
> +
> +static void cancel_active(struct i915_request *rq,
> +			  struct intel_engine_cs *engine)
> +{
> +	struct intel_context * const ce = rq->hw_context;
> +
> +	/*
> +	 * The executing context has been cancelled. Fixup the context so that
> +	 * it will be marked as incomplete [-EIO] upon resubmission and not
> +	 * execute any user payloads. We preserve the breadcrumbs and
> +	 * semaphores of the incomplete requests so that inter-timeline
> +	 * dependencies (i.e other timelines) remain correctly ordered.
> +	 *
> +	 * See __i915_request_submit() for applying -EIO and removing the
> +	 * payload on resubmission.
> +	 */
> +	GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> +		  __func__, engine->name, rq->fence.context, rq->fence.seqno);
> +	__context_pin_acquire(ce);
> +
> +	/* On resubmission of the active request, payload will be scrubbed */
> +	rq = active_request(rq);
> +	if (rq)
> +		ce->ring->head = intel_ring_wrap(ce->ring, rq->head);

Without this change, where would the head be pointing after 
schedule_out? Somewhere in the middle of the active request? And with 
this change it is rewound to the start of it?

Regards,

Tvrtko

> +	else
> +		ce->ring->head = ce->ring->tail;
> +	intel_ring_update_space(ce->ring);
> +
> +	/* Scrub the context image to prevent replaying the previous batch */
> +	restore_default_state(ce, engine);
> +	__execlists_update_reg_state(ce, engine);
> +
> +	/* We've switched away, so this should be a no-op, but intent matters */
> +	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> +
> +	__context_pin_release(ce);
> +}
> +
>   static inline void
>   __execlists_schedule_out(struct i915_request *rq,
>   			 struct intel_engine_cs * const engine)
> @@ -1001,6 +1079,9 @@ __execlists_schedule_out(struct i915_request *rq,
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>   	intel_gt_pm_put(engine->gt);
>   
> +	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
> +		cancel_active(rq, engine);
> +
>   	/*
>   	 * If this is part of a virtual engine, its next request may
>   	 * have been blocked waiting for access to the active context.
> @@ -1395,6 +1476,10 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
>   	if (!rq)
>   		return 0;
>   
> +	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
> +	if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
> +		return 1;
> +
>   	return READ_ONCE(engine->props.preempt_timeout);
>   }
>   
> @@ -2810,29 +2895,6 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>   			       &execlists->csb_status[reset_value]);
>   }
>   
> -static struct i915_request *active_request(struct i915_request *rq)
> -{
> -	const struct intel_context * const ce = rq->hw_context;
> -	struct i915_request *active = NULL;
> -	struct list_head *list;
> -
> -	if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> -		return rq;
> -
> -	list = &i915_request_active_timeline(rq)->requests;
> -	list_for_each_entry_from_reverse(rq, list, link) {
> -		if (i915_request_completed(rq))
> -			break;
> -
> -		if (rq->hw_context != ce)
> -			break;
> -
> -		active = rq;
> -	}
> -
> -	return active;
> -}
> -
>   static void __execlists_reset_reg_state(const struct intel_context *ce,
>   					const struct intel_engine_cs *engine)
>   {
> @@ -2849,7 +2911,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct intel_context *ce;
>   	struct i915_request *rq;
> -	u32 *regs;
>   
>   	mb(); /* paranoia: read the CSB pointers from after the reset */
>   	clflush(execlists->csb_write);
> @@ -2928,13 +2989,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	 * to recreate its own state.
>   	 */
>   	GEM_BUG_ON(!intel_context_is_pinned(ce));
> -	regs = ce->lrc_reg_state;
> -	if (engine->pinned_default_state) {
> -		memcpy(regs, /* skip restoring the vanilla PPHWSP */
> -		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> -		       engine->context_size - PAGE_SIZE);
> -	}
> -	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> +	restore_default_state(ce, engine);
>   
>   out_replay:
>   	GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
> @@ -4571,16 +4626,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
>   	 * future request will be after userspace has had the opportunity
>   	 * to recreate its own state.
>   	 */
> -	if (scrub) {
> -		u32 *regs = ce->lrc_reg_state;
> -
> -		if (engine->pinned_default_state) {
> -			memcpy(regs, /* skip restoring the vanilla PPHWSP */
> -			       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> -			       engine->context_size - PAGE_SIZE);
> -		}
> -		execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> -	}
> +	if (scrub)
> +		restore_default_state(ce, engine);
>   
>   	/* Rerun the request; its payload has been neutered (if guilty). */
>   	ce->ring->head = head;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index e7a86f60cf82..dd3e7b47b76d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -7,6 +7,7 @@
>   #include <linux/prime_numbers.h>
>   
>   #include "gem/i915_gem_pm.h"
> +#include "gt/intel_engine_heartbeat.h"
>   #include "gt/intel_reset.h"
>   
>   #include "i915_selftest.h"
> @@ -1016,6 +1017,277 @@ static int live_nopreempt(void *arg)
>   	goto err_client_b;
>   }
>   
> +struct live_preempt_cancel {
> +	struct intel_engine_cs *engine;
> +	struct preempt_client a, b;
> +};
> +
> +static int __cancel_active0(struct live_preempt_cancel *arg)
> +{
> +	struct i915_request *rq;
> +	struct igt_live_test t;
> +	int err;
> +
> +	/* Preempt cancel of ELSP0 */
> +	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> +	if (igt_live_test_begin(&t, arg->engine->i915,
> +				__func__, arg->engine->name))
> +		return -EIO;
> +
> +	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> +	rq = spinner_create_request(&arg->a.spin,
> +				    arg->a.ctx, arg->engine,
> +				    MI_ARB_CHECK);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	i915_request_get(rq);
> +	i915_request_add(rq);
> +	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	i915_gem_context_set_banned(arg->a.ctx);
> +	err = intel_engine_pulse(arg->engine);
> +	if (err)
> +		goto out;
> +
> +	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	if (rq->fence.error != -EIO) {
> +		pr_err("Cancelled inflight0 request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +out:
> +	i915_request_put(rq);
> +	if (igt_live_test_end(&t))
> +		err = -EIO;
> +	return err;
> +}
> +
> +static int __cancel_active1(struct live_preempt_cancel *arg)
> +{
> +	struct i915_request *rq[2] = {};
> +	struct igt_live_test t;
> +	int err;
> +
> +	/* Preempt cancel of ELSP1 */
> +	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> +	if (igt_live_test_begin(&t, arg->engine->i915,
> +				__func__, arg->engine->name))
> +		return -EIO;
> +
> +	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> +	rq[0] = spinner_create_request(&arg->a.spin,
> +				       arg->a.ctx, arg->engine,
> +				       MI_NOOP); /* no preemption */
> +	if (IS_ERR(rq[0]))
> +		return PTR_ERR(rq[0]);
> +
> +	i915_request_get(rq[0]);
> +	i915_request_add(rq[0]);
> +	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> +	rq[1] = spinner_create_request(&arg->b.spin,
> +				       arg->b.ctx, arg->engine,
> +				       MI_ARB_CHECK);
> +	if (IS_ERR(rq[1])) {
> +		err = PTR_ERR(rq[1]);
> +		goto out;
> +	}
> +
> +	i915_request_get(rq[1]);
> +	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> +	i915_request_add(rq[1]);
> +	if (err)
> +		goto out;
> +
> +	i915_gem_context_set_banned(arg->b.ctx);
> +	err = intel_engine_pulse(arg->engine);
> +	if (err)
> +		goto out;
> +
> +	igt_spinner_end(&arg->a.spin);
> +	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	if (rq[0]->fence.error != 0) {
> +		pr_err("Normal inflight0 request did not complete\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (rq[1]->fence.error != -EIO) {
> +		pr_err("Cancelled inflight1 request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +out:
> +	i915_request_put(rq[1]);
> +	i915_request_put(rq[0]);
> +	if (igt_live_test_end(&t))
> +		err = -EIO;
> +	return err;
> +}
> +
> +static int __cancel_queued(struct live_preempt_cancel *arg)
> +{
> +	struct i915_request *rq[3] = {};
> +	struct igt_live_test t;
> +	int err;
> +
> +	/* Full ELSP and one in the wings */
> +	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> +	if (igt_live_test_begin(&t, arg->engine->i915,
> +				__func__, arg->engine->name))
> +		return -EIO;
> +
> +	clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> +	rq[0] = spinner_create_request(&arg->a.spin,
> +				       arg->a.ctx, arg->engine,
> +				       MI_ARB_CHECK);
> +	if (IS_ERR(rq[0]))
> +		return PTR_ERR(rq[0]);
> +
> +	i915_request_get(rq[0]);
> +	i915_request_add(rq[0]);
> +	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> +	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
> +	if (IS_ERR(rq[1])) {
> +		err = PTR_ERR(rq[1]);
> +		goto out;
> +	}
> +
> +	i915_request_get(rq[1]);
> +	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> +	i915_request_add(rq[1]);
> +	if (err)
> +		goto out;
> +
> +	rq[2] = spinner_create_request(&arg->b.spin,
> +				       arg->a.ctx, arg->engine,
> +				       MI_ARB_CHECK);
> +	if (IS_ERR(rq[2])) {
> +		err = PTR_ERR(rq[2]);
> +		goto out;
> +	}
> +
> +	i915_request_get(rq[2]);
> +	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
> +	i915_request_add(rq[2]);
> +	if (err)
> +		goto out;
> +
> +	i915_gem_context_set_banned(arg->a.ctx);
> +	err = intel_engine_pulse(arg->engine);
> +	if (err)
> +		goto out;
> +
> +	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
> +		err = -EIO;
> +		goto out;
> +	}
> +
> +	if (rq[0]->fence.error != -EIO) {
> +		pr_err("Cancelled inflight0 request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (rq[1]->fence.error != 0) {
> +		pr_err("Normal inflight1 request did not complete\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (rq[2]->fence.error != -EIO) {
> +		pr_err("Cancelled queued request did not report -EIO\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +out:
> +	i915_request_put(rq[2]);
> +	i915_request_put(rq[1]);
> +	i915_request_put(rq[0]);
> +	if (igt_live_test_end(&t))
> +		err = -EIO;
> +	return err;
> +}
> +
> +static int live_preempt_cancel(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct live_preempt_cancel data;
> +	enum intel_engine_id id;
> +	int err = -ENOMEM;
> +
> +	/*
> +	 * To cancel an inflight context, we need to first remove it from the
> +	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
> +	 */
> +
> +	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> +		return 0;
> +
> +	if (preempt_client_init(i915, &data.a))
> +		return -ENOMEM;
> +	if (preempt_client_init(i915, &data.b))
> +		goto err_client_a;
> +
> +	for_each_engine(data.engine, i915, id) {
> +		if (!intel_engine_has_preemption(data.engine))
> +			continue;
> +
> +		err = __cancel_active0(&data);
> +		if (err)
> +			goto err_wedged;
> +
> +		err = __cancel_active1(&data);
> +		if (err)
> +			goto err_wedged;
> +
> +		err = __cancel_queued(&data);
> +		if (err)
> +			goto err_wedged;
> +	}
> +
> +	err = 0;
> +err_client_b:
> +	preempt_client_fini(&data.b);
> +err_client_a:
> +	preempt_client_fini(&data.a);
> +	return err;
> +
> +err_wedged:
> +	GEM_TRACE_DUMP();
> +	igt_spinner_end(&data.b.spin);
> +	igt_spinner_end(&data.a.spin);
> +	intel_gt_set_wedged(&i915->gt);
> +	goto err_client_b;
> +}
> +
>   static int live_suppress_self_preempt(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -2549,6 +2821,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt),
>   		SUBTEST(live_late_preempt),
>   		SUBTEST(live_nopreempt),
> +		SUBTEST(live_preempt_cancel),
>   		SUBTEST(live_suppress_self_preempt),
>   		SUBTEST(live_suppress_wait_preempt),
>   		SUBTEST(live_chain_preempt),
>