[Intel-gfx] [PATCH 11/15] drm/i915/execlists: Cancel banned contexts on schedule-out
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Oct 14 12:00:01 UTC 2019
On 14/10/2019 10:07, Chris Wilson wrote:
> On schedule-out (CS completion) of a banned context, scrub the context
> image so that we do not replay the active payload. The intent is that we
> skip banned payloads on request submission so that the timeline
> advancement continues on in the background. However, if we are returning
> to a preempted request, i915_request_skip() is ineffective and instead we
> need to patch up the context image so that it continues from the start
> of the next request.
>
> v2: Fixup cancellation so that we only scrub the payload of the active
> request and do not short-circuit the breadcrumbs (which might cause
> other contexts to execute out of order).
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 129 ++++++++----
> drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
> 2 files changed, 361 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e16ede75412b..b76b35194114 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
> const struct intel_engine_cs *engine,
> const struct intel_ring *ring,
> bool close);
> +static void
> +__execlists_update_reg_state(const struct intel_context *ce,
> + const struct intel_engine_cs *engine);
>
> static void cancel_timer(struct timer_list *t)
> {
> @@ -270,6 +273,31 @@ static void mark_eio(struct i915_request *rq)
> i915_request_mark_complete(rq);
> }
>
> +static struct i915_request *active_request(struct i915_request *rq)
> +{
> + const struct intel_context * const ce = rq->hw_context;
> + struct i915_request *active = NULL;
> + struct list_head *list;
> +
> + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> + return rq;
> +
> + rcu_read_lock();
> + list = &rcu_dereference(rq->timeline)->requests;
> + list_for_each_entry_from_reverse(rq, list, link) {
> + if (i915_request_completed(rq))
> + break;
> +
> + if (rq->hw_context != ce)
> + break;
> +
> + active = rq;
> + }
> + rcu_read_unlock();
> +
> + return active;
> +}
> +
> static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> {
> return (i915_ggtt_offset(engine->status_page.vma) +
> @@ -991,6 +1019,56 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
> tasklet_schedule(&ve->base.execlists.tasklet);
> }
>
> +static void restore_default_state(struct intel_context *ce,
> + struct intel_engine_cs *engine)
> +{
> + u32 *regs = ce->lrc_reg_state;
> +
> + if (engine->pinned_default_state)
> + memcpy(regs, /* skip restoring the vanilla PPHWSP */
> + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> + engine->context_size - PAGE_SIZE);
> +
> + execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> +}
> +
> +static void cancel_active(struct i915_request *rq,
> + struct intel_engine_cs *engine)
> +{
> + struct intel_context * const ce = rq->hw_context;
> +
> + /*
> + * The executing context has been cancelled. Fixup the context so that
> + * it will be marked as incomplete [-EIO] upon resubmission and not
> + * execute any user payloads. We preserve the breadcrumbs and
> + * semaphores of the incomplete requests so that inter-timeline
> + * dependencies (i.e other timelines) remain correctly ordered.
> + *
> + * See __i915_request_submit() for applying -EIO and removing the
> + * payload on resubmission.
> + */
> + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> + __func__, engine->name, rq->fence.context, rq->fence.seqno);
> + __context_pin_acquire(ce);
> +
> + /* On resubmission of the active request, payload will be scrubbed */
> + rq = active_request(rq);
> + if (rq)
> + ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
Without this change, where would the head be pointing after
schedule_out? Somewhere in the middle of the active request? And with
this change it is rewound to the start of it?
Regards,
Tvrtko
> + else
> + ce->ring->head = ce->ring->tail;
> + intel_ring_update_space(ce->ring);
> +
> + /* Scrub the context image to prevent replaying the previous batch */
> + restore_default_state(ce, engine);
> + __execlists_update_reg_state(ce, engine);
> +
> + /* We've switched away, so this should be a no-op, but intent matters */
> + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> +
> + __context_pin_release(ce);
> +}
> +
> static inline void
> __execlists_schedule_out(struct i915_request *rq,
> struct intel_engine_cs * const engine)
> @@ -1001,6 +1079,9 @@ __execlists_schedule_out(struct i915_request *rq,
> execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> intel_gt_pm_put(engine->gt);
>
> + if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
> + cancel_active(rq, engine);
> +
> /*
> * If this is part of a virtual engine, its next request may
> * have been blocked waiting for access to the active context.
> @@ -1395,6 +1476,10 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
> if (!rq)
> return 0;
>
> + /* Force a fast reset for terminated contexts (ignoring sysfs!) */
> + if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
> + return 1;
> +
> return READ_ONCE(engine->props.preempt_timeout);
> }
>
> @@ -2810,29 +2895,6 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
> &execlists->csb_status[reset_value]);
> }
>
> -static struct i915_request *active_request(struct i915_request *rq)
> -{
> - const struct intel_context * const ce = rq->hw_context;
> - struct i915_request *active = NULL;
> - struct list_head *list;
> -
> - if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> - return rq;
> -
> - list = &i915_request_active_timeline(rq)->requests;
> - list_for_each_entry_from_reverse(rq, list, link) {
> - if (i915_request_completed(rq))
> - break;
> -
> - if (rq->hw_context != ce)
> - break;
> -
> - active = rq;
> - }
> -
> - return active;
> -}
> -
> static void __execlists_reset_reg_state(const struct intel_context *ce,
> const struct intel_engine_cs *engine)
> {
> @@ -2849,7 +2911,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct intel_context *ce;
> struct i915_request *rq;
> - u32 *regs;
>
> mb(); /* paranoia: read the CSB pointers from after the reset */
> clflush(execlists->csb_write);
> @@ -2928,13 +2989,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
> * to recreate its own state.
> */
> GEM_BUG_ON(!intel_context_is_pinned(ce));
> - regs = ce->lrc_reg_state;
> - if (engine->pinned_default_state) {
> - memcpy(regs, /* skip restoring the vanilla PPHWSP */
> - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> - engine->context_size - PAGE_SIZE);
> - }
> - execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> + restore_default_state(ce, engine);
>
> out_replay:
> GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
> @@ -4571,16 +4626,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
> * future request will be after userspace has had the opportunity
> * to recreate its own state.
> */
> - if (scrub) {
> - u32 *regs = ce->lrc_reg_state;
> -
> - if (engine->pinned_default_state) {
> - memcpy(regs, /* skip restoring the vanilla PPHWSP */
> - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> - engine->context_size - PAGE_SIZE);
> - }
> - execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> - }
> + if (scrub)
> + restore_default_state(ce, engine);
>
> /* Rerun the request; its payload has been neutered (if guilty). */
> ce->ring->head = head;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index e7a86f60cf82..dd3e7b47b76d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -7,6 +7,7 @@
> #include <linux/prime_numbers.h>
>
> #include "gem/i915_gem_pm.h"
> +#include "gt/intel_engine_heartbeat.h"
> #include "gt/intel_reset.h"
>
> #include "i915_selftest.h"
> @@ -1016,6 +1017,277 @@ static int live_nopreempt(void *arg)
> goto err_client_b;
> }
>
> +struct live_preempt_cancel {
> + struct intel_engine_cs *engine;
> + struct preempt_client a, b;
> +};
> +
> +static int __cancel_active0(struct live_preempt_cancel *arg)
> +{
> + struct i915_request *rq;
> + struct igt_live_test t;
> + int err;
> +
> + /* Preempt cancel of ELSP0 */
> + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> + if (igt_live_test_begin(&t, arg->engine->i915,
> + __func__, arg->engine->name))
> + return -EIO;
> +
> + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> + rq = spinner_create_request(&arg->a.spin,
> + arg->a.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + i915_request_get(rq);
> + i915_request_add(rq);
> + if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> + err = -EIO;
> + goto out;
> + }
> +
> + i915_gem_context_set_banned(arg->a.ctx);
> + err = intel_engine_pulse(arg->engine);
> + if (err)
> + goto out;
> +
> + if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> + err = -EIO;
> + goto out;
> + }
> +
> + if (rq->fence.error != -EIO) {
> + pr_err("Cancelled inflight0 request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> +out:
> + i915_request_put(rq);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + return err;
> +}
> +
> +static int __cancel_active1(struct live_preempt_cancel *arg)
> +{
> + struct i915_request *rq[2] = {};
> + struct igt_live_test t;
> + int err;
> +
> + /* Preempt cancel of ELSP1 */
> + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> + if (igt_live_test_begin(&t, arg->engine->i915,
> + __func__, arg->engine->name))
> + return -EIO;
> +
> + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> + rq[0] = spinner_create_request(&arg->a.spin,
> + arg->a.ctx, arg->engine,
> + MI_NOOP); /* no preemption */
> + if (IS_ERR(rq[0]))
> + return PTR_ERR(rq[0]);
> +
> + i915_request_get(rq[0]);
> + i915_request_add(rq[0]);
> + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> + err = -EIO;
> + goto out;
> + }
> +
> + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> + rq[1] = spinner_create_request(&arg->b.spin,
> + arg->b.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq[1])) {
> + err = PTR_ERR(rq[1]);
> + goto out;
> + }
> +
> + i915_request_get(rq[1]);
> + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> + i915_request_add(rq[1]);
> + if (err)
> + goto out;
> +
> + i915_gem_context_set_banned(arg->b.ctx);
> + err = intel_engine_pulse(arg->engine);
> + if (err)
> + goto out;
> +
> + igt_spinner_end(&arg->a.spin);
> + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
> + err = -EIO;
> + goto out;
> + }
> +
> + if (rq[0]->fence.error != 0) {
> + pr_err("Normal inflight0 request did not complete\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (rq[1]->fence.error != -EIO) {
> + pr_err("Cancelled inflight1 request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> +out:
> + i915_request_put(rq[1]);
> + i915_request_put(rq[0]);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + return err;
> +}
> +
> +static int __cancel_queued(struct live_preempt_cancel *arg)
> +{
> + struct i915_request *rq[3] = {};
> + struct igt_live_test t;
> + int err;
> +
> + /* Full ELSP and one in the wings */
> + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> + if (igt_live_test_begin(&t, arg->engine->i915,
> + __func__, arg->engine->name))
> + return -EIO;
> +
> + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> + rq[0] = spinner_create_request(&arg->a.spin,
> + arg->a.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq[0]))
> + return PTR_ERR(rq[0]);
> +
> + i915_request_get(rq[0]);
> + i915_request_add(rq[0]);
> + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> + err = -EIO;
> + goto out;
> + }
> +
> + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
> + if (IS_ERR(rq[1])) {
> + err = PTR_ERR(rq[1]);
> + goto out;
> + }
> +
> + i915_request_get(rq[1]);
> + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> + i915_request_add(rq[1]);
> + if (err)
> + goto out;
> +
> + rq[2] = spinner_create_request(&arg->b.spin,
> + arg->a.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq[2])) {
> + err = PTR_ERR(rq[2]);
> + goto out;
> + }
> +
> + i915_request_get(rq[2]);
> + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
> + i915_request_add(rq[2]);
> + if (err)
> + goto out;
> +
> + i915_gem_context_set_banned(arg->a.ctx);
> + err = intel_engine_pulse(arg->engine);
> + if (err)
> + goto out;
> +
> + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
> + err = -EIO;
> + goto out;
> + }
> +
> + if (rq[0]->fence.error != -EIO) {
> + pr_err("Cancelled inflight0 request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (rq[1]->fence.error != 0) {
> + pr_err("Normal inflight1 request did not complete\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (rq[2]->fence.error != -EIO) {
> + pr_err("Cancelled queued request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> +out:
> + i915_request_put(rq[2]);
> + i915_request_put(rq[1]);
> + i915_request_put(rq[0]);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + return err;
> +}
> +
> +static int live_preempt_cancel(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct live_preempt_cancel data;
> + enum intel_engine_id id;
> + int err = -ENOMEM;
> +
> + /*
> + * To cancel an inflight context, we need to first remove it from the
> + * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
> + */
> +
> + if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> + return 0;
> +
> + if (preempt_client_init(i915, &data.a))
> + return -ENOMEM;
> + if (preempt_client_init(i915, &data.b))
> + goto err_client_a;
> +
> + for_each_engine(data.engine, i915, id) {
> + if (!intel_engine_has_preemption(data.engine))
> + continue;
> +
> + err = __cancel_active0(&data);
> + if (err)
> + goto err_wedged;
> +
> + err = __cancel_active1(&data);
> + if (err)
> + goto err_wedged;
> +
> + err = __cancel_queued(&data);
> + if (err)
> + goto err_wedged;
> + }
> +
> + err = 0;
> +err_client_b:
> + preempt_client_fini(&data.b);
> +err_client_a:
> + preempt_client_fini(&data.a);
> + return err;
> +
> +err_wedged:
> + GEM_TRACE_DUMP();
> + igt_spinner_end(&data.b.spin);
> + igt_spinner_end(&data.a.spin);
> + intel_gt_set_wedged(&i915->gt);
> + goto err_client_b;
> +}
> +
> static int live_suppress_self_preempt(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -2549,6 +2821,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> SUBTEST(live_preempt),
> SUBTEST(live_late_preempt),
> SUBTEST(live_nopreempt),
> + SUBTEST(live_preempt_cancel),
> SUBTEST(live_suppress_self_preempt),
> SUBTEST(live_suppress_wait_preempt),
> SUBTEST(live_chain_preempt),
>
More information about the Intel-gfx
mailing list