[Intel-gfx] [PATCH v2] drm/i915/execlists: Cancel banned contexts on schedule-out
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Oct 11 13:10:21 UTC 2019
On 11/10/2019 12:16, Chris Wilson wrote:
> On schedule-out (CS completion) of a banned context, scrub the context
> image so that we do not replay the active payload. The intent is that we
> skip banned payloads on request submission so that the timeline
> advancement continues on in the background. However, if we are returning
> to a preempted request, i915_request_skip() is ineffective and instead we
> need to patch up the context image so that it continues from the start
> of the next request.
>
> v2: Fixup cancellation so that we only scrub the payload of the active
> request and do not short-circuit the breadcrumbs (which might cause
> other contexts to execute out of order).
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 91 ++++++---
> drivers/gpu/drm/i915/gt/selftest_lrc.c | 273 +++++++++++++++++++++++++
> 2 files changed, 341 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 09fc5ecfdd09..809a5dd97c14 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
> const struct intel_engine_cs *engine,
> const struct intel_ring *ring,
> bool close);
> +static void
> +__execlists_update_reg_state(const struct intel_context *ce,
> + const struct intel_engine_cs *engine);
>
> static void __context_pin_acquire(struct intel_context *ce)
> {
> @@ -256,6 +259,29 @@ static void mark_eio(struct i915_request *rq)
> i915_request_mark_complete(rq);
> }
>
> +static struct i915_request *active_request(struct i915_request *rq)
> +{
> + const struct intel_context * const ce = rq->hw_context;
> + struct i915_request *active = NULL;
> + struct list_head *list;
> +
> + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> + return rq;
> +
> + list = &i915_request_active_timeline(rq)->requests;
> + list_for_each_entry_from_reverse(rq, list, link) {
> + if (i915_request_completed(rq))
> + break;
> +
> + if (rq->hw_context != ce)
> + break;
Would it be of any value here to also check the initial breadcrumb matches?
> +
> + active = rq;
> + }
> +
> + return active;
> +}
> +
> static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> {
> return (i915_ggtt_offset(engine->status_page.vma) +
> @@ -977,6 +1003,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
> tasklet_schedule(&ve->base.execlists.tasklet);
> }
>
> +static void cancel_active(struct i915_request *rq,
> + struct intel_engine_cs *engine)
> +{
> + struct intel_context * const ce = rq->hw_context;
> + u32 *regs = ce->lrc_reg_state;
> +
> + /*
> + * The executing context has been cancelled. Fixup the context so that
> + * it continues on from the breadcrumb after the batch and will be
> + * marked as incomplete [-EIO] upon signaling. We preserve the
Where does the -EIO marking happen now?
> + * breadcrumbs and semaphores of the subsequent requests so that
> + * inter-timeline dependencies remain correctly ordered.
> + */
> + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
> + __func__, engine->name, rq->fence.context, rq->fence.seqno);
> +
> + __context_pin_acquire(ce);
> +
> + /* On resubmission of the active request, it's payload be scrubbed */
> + rq = active_request(rq);
> + if (rq)
> + ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
> + else
> + ce->ring->head = ce->ring->tail;
I don't quite understand yet.
If a context was banned I'd expect all requests on the tl->requests to
be zapped and we only move to execute the last breadcrumb, no?
So if you find the active_request and you set ring head to
active_rq->head how does that skip the payload?
Furthermore, if I try to sketch the rq->requests timeline like this:
R0 r1 r2 r[elsp] r4 r5
'R' = completed; 'r' = incomplete
On schedule_out(r[elsp]) I'd expect you want to find r5 and set ring
head to final breadcrumb of it. And mark r1-r5 and -EIO. Am I completely
on the wrong track?
(Bear with me with r4 and r5, assuming someone has set the context as
single submission for future proofing the code.)
Regards,
Tvrtko
> +
> + /* Scrub the context image to prevent replaying the previous batch */
> + memcpy(regs, /* skip restoring the vanilla PPHWSP */
> + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> + engine->context_size - PAGE_SIZE);
> +
> + execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> + __execlists_update_reg_state(ce, engine);
> +
> + /* We've switched away, so this should be a no-op, but intent matters */
> + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> +
> + __context_pin_release(ce);
> +}
> +
> static inline void
> __execlists_schedule_out(struct i915_request *rq,
> struct intel_engine_cs * const engine)
> @@ -987,6 +1052,9 @@ __execlists_schedule_out(struct i915_request *rq,
> execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> intel_gt_pm_put(engine->gt);
>
> + if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
> + cancel_active(rq, engine);
> +
> /*
> * If this is part of a virtual engine, its next request may
> * have been blocked waiting for access to the active context.
> @@ -2776,29 +2844,6 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
> &execlists->csb_status[reset_value]);
> }
>
> -static struct i915_request *active_request(struct i915_request *rq)
> -{
> - const struct intel_context * const ce = rq->hw_context;
> - struct i915_request *active = NULL;
> - struct list_head *list;
> -
> - if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
> - return rq;
> -
> - list = &i915_request_active_timeline(rq)->requests;
> - list_for_each_entry_from_reverse(rq, list, link) {
> - if (i915_request_completed(rq))
> - break;
> -
> - if (rq->hw_context != ce)
> - break;
> -
> - active = rq;
> - }
> -
> - return active;
> -}
> -
> static void __execlists_reset_reg_state(const struct intel_context *ce,
> const struct intel_engine_cs *engine)
> {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 1276da059dc6..9d842e327aa1 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -7,6 +7,7 @@
> #include <linux/prime_numbers.h>
>
> #include "gem/i915_gem_pm.h"
> +#include "gt/intel_engine_heartbeat.h"
> #include "gt/intel_reset.h"
>
> #include "i915_selftest.h"
> @@ -1016,6 +1017,277 @@ static int live_nopreempt(void *arg)
> goto err_client_b;
> }
>
> +struct live_preempt_cancel {
> + struct intel_engine_cs *engine;
> + struct preempt_client a, b;
> +};
> +
> +static int __cancel_active0(struct live_preempt_cancel *arg)
> +{
> + struct i915_request *rq;
> + struct igt_live_test t;
> + int err;
> +
> + /* Preempt cancel of ELSP0 */
> + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> + if (igt_live_test_begin(&t, arg->engine->i915,
> + __func__, arg->engine->name))
> + return -EIO;
> +
> + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> + rq = spinner_create_request(&arg->a.spin,
> + arg->a.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + i915_request_get(rq);
> + i915_request_add(rq);
> + if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> + err = -EIO;
> + goto out;
> + }
> +
> + i915_gem_context_set_banned(arg->a.ctx);
> + err = intel_engine_pulse(arg->engine);
> + if (err)
> + goto out;
> +
> + if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> + err = -EIO;
> + goto out;
> + }
> +
> + if (rq->fence.error != -EIO) {
> + pr_err("Cancelled inflight0 request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> +out:
> + i915_request_put(rq);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + return err;
> +}
> +
> +static int __cancel_active1(struct live_preempt_cancel *arg)
> +{
> + struct i915_request *rq[2] = {};
> + struct igt_live_test t;
> + int err;
> +
> + /* Preempt cancel of ELSP1 */
> + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> + if (igt_live_test_begin(&t, arg->engine->i915,
> + __func__, arg->engine->name))
> + return -EIO;
> +
> + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> + rq[0] = spinner_create_request(&arg->a.spin,
> + arg->a.ctx, arg->engine,
> + MI_NOOP); /* no preemption */
> + if (IS_ERR(rq[0]))
> + return PTR_ERR(rq[0]);
> +
> + i915_request_get(rq[0]);
> + i915_request_add(rq[0]);
> + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> + err = -EIO;
> + goto out;
> + }
> +
> + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> + rq[1] = spinner_create_request(&arg->b.spin,
> + arg->b.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq[1])) {
> + err = PTR_ERR(rq[1]);
> + goto out;
> + }
> +
> + i915_request_get(rq[1]);
> + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> + i915_request_add(rq[1]);
> + if (err)
> + goto out;
> +
> + i915_gem_context_set_banned(arg->b.ctx);
> + err = intel_engine_pulse(arg->engine);
> + if (err)
> + goto out;
> +
> + igt_spinner_end(&arg->a.spin);
> + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
> + err = -EIO;
> + goto out;
> + }
> +
> + if (rq[0]->fence.error != 0) {
> + pr_err("Normal inflight0 request did not complete\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (rq[1]->fence.error != -EIO) {
> + pr_err("Cancelled inflight1 request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> +out:
> + i915_request_put(rq[1]);
> + i915_request_put(rq[0]);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + return err;
> +}
> +
> +static int __cancel_queued(struct live_preempt_cancel *arg)
> +{
> + struct i915_request *rq[3] = {};
> + struct igt_live_test t;
> + int err;
> +
> + /* Full ELSP and one in the wings */
> + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
> +
> + if (igt_live_test_begin(&t, arg->engine->i915,
> + __func__, arg->engine->name))
> + return -EIO;
> +
> + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
> + rq[0] = spinner_create_request(&arg->a.spin,
> + arg->a.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq[0]))
> + return PTR_ERR(rq[0]);
> +
> + i915_request_get(rq[0]);
> + i915_request_add(rq[0]);
> + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> + err = -EIO;
> + goto out;
> + }
> +
> + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
> + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
> + if (IS_ERR(rq[1])) {
> + err = PTR_ERR(rq[1]);
> + goto out;
> + }
> +
> + i915_request_get(rq[1]);
> + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
> + i915_request_add(rq[1]);
> + if (err)
> + goto out;
> +
> + rq[2] = spinner_create_request(&arg->b.spin,
> + arg->a.ctx, arg->engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq[2])) {
> + err = PTR_ERR(rq[2]);
> + goto out;
> + }
> +
> + i915_request_get(rq[2]);
> + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
> + i915_request_add(rq[2]);
> + if (err)
> + goto out;
> +
> + i915_gem_context_set_banned(arg->a.ctx);
> + err = intel_engine_pulse(arg->engine);
> + if (err)
> + goto out;
> +
> + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
> + err = -EIO;
> + goto out;
> + }
> +
> + if (rq[0]->fence.error != -EIO) {
> + pr_err("Cancelled inflight0 request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (rq[1]->fence.error != 0) {
> + pr_err("Normal inflight1 request did not complete\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (rq[2]->fence.error != -EIO) {
> + pr_err("Cancelled queued request did not report -EIO\n");
> + err = -EINVAL;
> + goto out;
> + }
> +
> +out:
> + i915_request_put(rq[2]);
> + i915_request_put(rq[1]);
> + i915_request_put(rq[0]);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + return err;
> +}
> +
> +static int live_preempt_cancel(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct live_preempt_cancel data;
> + enum intel_engine_id id;
> + int err = -ENOMEM;
> +
> + /*
> + * To cancel an inflight context, we need to first remove it from the
> + * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
> + */
> +
> + if (!HAS_LOGICAL_RING_PREEMPTION(i915))
> + return 0;
> +
> + if (preempt_client_init(i915, &data.a))
> + return -ENOMEM;
> + if (preempt_client_init(i915, &data.b))
> + goto err_client_a;
> +
> + for_each_engine(data.engine, i915, id) {
> + if (!intel_engine_has_preemption(data.engine))
> + continue;
> +
> + err = __cancel_active0(&data);
> + if (err)
> + goto err_wedged;
> +
> + err = __cancel_active1(&data);
> + if (err)
> + goto err_wedged;
> +
> + err = __cancel_queued(&data);
> + if (err)
> + goto err_wedged;
> + }
> +
> + err = 0;
> +err_client_b:
> + preempt_client_fini(&data.b);
> +err_client_a:
> + preempt_client_fini(&data.a);
> + return err;
> +
> +err_wedged:
> + GEM_TRACE_DUMP();
> + igt_spinner_end(&data.b.spin);
> + igt_spinner_end(&data.a.spin);
> + intel_gt_set_wedged(&i915->gt);
> + goto err_client_b;
> +}
> +
> static int live_suppress_self_preempt(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -2452,6 +2724,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> SUBTEST(live_preempt),
> SUBTEST(live_late_preempt),
> SUBTEST(live_nopreempt),
> + SUBTEST(live_preempt_cancel),
> SUBTEST(live_suppress_self_preempt),
> SUBTEST(live_suppress_wait_preempt),
> SUBTEST(live_chain_preempt),
>
More information about the Intel-gfx
mailing list