[Intel-gfx] [PATCH 1/2] drm/i915: Unwind incomplete legacy context switches
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Nov 21 16:25:23 UTC 2017
Chris Wilson <chris at chris-wilson.co.uk> writes:
> The legacy context switch for ringbuffer submission is multistaged,
> where each of those stages may fail. However, we were updating global
> state after some stages, and so we had to force the incomplete request
> to be submitted because we could not unwind. Save the global state
> before performing the switches, and so enable us to unwind back to the
> previous global state should any phase fail. We then must cancel the
> request instead of submitting it should the construction fail.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_context.c | 168 ++++++++++----------------------
> drivers/gpu/drm/i915/i915_gem_request.c | 18 ++--
> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 +
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 4 files changed, 62 insertions(+), 126 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 6ca56e482d79..f63bec08cc85 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -507,6 +507,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
>
> for_each_engine(engine, dev_priv, id) {
> engine->legacy_active_context = NULL;
> + engine->legacy_active_ppgtt = NULL;
>
> if (!engine->last_retired_context)
> continue;
> @@ -681,68 +682,48 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice)
> return 0;
> }
>
> -static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
> - struct intel_engine_cs *engine,
> - struct i915_gem_context *to)
> -{
> - if (to->remap_slice)
> - return false;
> -
> - if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
> - return false;
> -
> - return to == engine->legacy_active_context;
> -}
> -
> -static bool
> -needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
> -{
> - struct i915_gem_context *from = engine->legacy_active_context;
> -
> - if (!ppgtt)
> - return false;
> -
> - /* Always load the ppgtt on first use */
> - if (!from)
> - return true;
> -
> - /* Same context without new entries, skip */
> - if ((!from->ppgtt || from->ppgtt == ppgtt) &&
> - !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
> - return false;
> -
> - if (engine->id != RCS)
> - return true;
> -
> - return true;
> -}
> -
> -static int do_rcs_switch(struct drm_i915_gem_request *req)
> +/**
> + * i915_switch_context() - perform a GPU context switch.
> + * @rq: request for which we'll execute the context switch
> + *
> + * The context life cycle is simple. The context refcount is incremented and
> + * decremented by 1 and create and destroy. If the context is in use by the GPU,
s/and/on
> + * it will have a refcount > 1. This allows us to destroy the context abstract
> + * object while letting the normal object tracking destroy the backing BO.
> + *
> + * This function should not be used in execlists mode. Instead the context is
> + * switched by writing to the ELSP and requests keep a reference to their
> + * context.
> + */
> +int i915_switch_context(struct drm_i915_gem_request *rq)
> {
> - struct i915_gem_context *to = req->ctx;
> - struct intel_engine_cs *engine = req->engine;
> - struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
> - struct i915_gem_context *from = engine->legacy_active_context;
> - u32 hw_flags;
> + struct intel_engine_cs *engine = rq->engine;
> + struct i915_gem_context *to = rq->ctx;
> + struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
> + struct i915_gem_context *saved_ctx = engine->legacy_active_context;
> + struct i915_hw_ppgtt *saved_mm = engine->legacy_active_ppgtt;
saved as in context of this function or saved as in saved by hardware?
Just trying to get behind the naming here.
I am not in bottom of this patch yet but at this point,
new_[mm|ctx] and current[mm|ctx] feel more natural.
> + u32 hw_flags = 0;
> int ret, i;
>
> - GEM_BUG_ON(engine->id != RCS);
> -
> - if (skip_rcs_switch(ppgtt, engine, to))
> - return 0;
> + lockdep_assert_held(&rq->i915->drm.struct_mutex);
> + GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
>
> - if (needs_pd_load_pre(ppgtt, engine)) {
> - /* Older GENs and non render rings still want the load first,
> - * "PP_DCLV followed by PP_DIR_BASE register through Load
> - * Register Immediate commands in Ring Buffer before submitting
> - * a context."*/
> + if (ppgtt != saved_mm ||
> + (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) {
> trace_switch_mm(engine, to);
> - ret = ppgtt->switch_mm(ppgtt, req);
> + ret = ppgtt->switch_mm(ppgtt, rq);
> if (ret)
> - return ret;
> + goto err;
> +
> + ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> + engine->legacy_active_ppgtt = ppgtt;
> + hw_flags = MI_FORCE_RESTORE;
> }
>
> - if (i915_gem_context_is_kernel(to))
> + if (to->engine[engine->id].state &&
> + (to != saved_ctx || hw_flags & MI_FORCE_RESTORE)) {
> + GEM_BUG_ON(engine->id != RCS);
> +
> /*
> * The kernel context(s) is treated as pure scratch and is not
> * expected to retain any state (as we sacrifice it during
> @@ -750,78 +731,37 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
> * as nothing actually executes using the kernel context; it
> * is purely used for flushing user contexts.
> */
> - hw_flags = MI_RESTORE_INHIBIT;
> - else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
> - hw_flags = MI_FORCE_RESTORE;
> - else
> - hw_flags = 0;
> + if (i915_gem_context_is_kernel(to))
> + hw_flags = MI_RESTORE_INHIBIT;
>
> - if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
> - ret = mi_set_context(req, hw_flags);
> + ret = mi_set_context(rq, hw_flags);
> if (ret)
> - return ret;
> + goto err_mm;
>
> engine->legacy_active_context = to;
> }
>
> - if (ppgtt)
> - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> -
> - for (i = 0; i < MAX_L3_SLICES; i++) {
> - if (!(to->remap_slice & (1<<i)))
> - continue;
> -
> - ret = remap_l3(req, i);
> - if (ret)
> - return ret;
> -
> - to->remap_slice &= ~(1<<i);
> - }
> -
> - return 0;
> -}
> -
> -/**
> - * i915_switch_context() - perform a GPU context switch.
> - * @req: request for which we'll execute the context switch
> - *
> - * The context life cycle is simple. The context refcount is incremented and
> - * decremented by 1 and create and destroy. If the context is in use by the GPU,
> - * it will have a refcount > 1. This allows us to destroy the context abstract
> - * object while letting the normal object tracking destroy the backing BO.
> - *
> - * This function should not be used in execlists mode. Instead the context is
> - * switched by writing to the ELSP and requests keep a reference to their
> - * context.
> - */
> -int i915_switch_context(struct drm_i915_gem_request *req)
> -{
> - struct intel_engine_cs *engine = req->engine;
> -
> - lockdep_assert_held(&req->i915->drm.struct_mutex);
> - GEM_BUG_ON(HAS_EXECLISTS(req->i915));
> + if (to->remap_slice) {
> + for (i = 0; i < MAX_L3_SLICES; i++) {
> + if (!(to->remap_slice & BIT(i)))
> + continue;
>
> - if (!req->ctx->engine[engine->id].state) {
> - struct i915_gem_context *to = req->ctx;
> - struct i915_hw_ppgtt *ppgtt =
> - to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
> -
> - if (needs_pd_load_pre(ppgtt, engine)) {
> - int ret;
> -
> - trace_switch_mm(engine, to);
> - ret = ppgtt->switch_mm(ppgtt, req);
> + ret = remap_l3(rq, i);
> if (ret)
> - return ret;
> -
> - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> + goto err_ctx;
> }
>
> - engine->legacy_active_context = to;
> - return 0;
> + to->remap_slice = 0;
Because we have unwind we can just remap again everything if error
happens?
-Mika
> }
>
> - return do_rcs_switch(req);
> + return 0;
> +
> +err_ctx:
> + engine->legacy_active_context = saved_ctx;
> +err_mm:
> + engine->legacy_active_ppgtt = saved_mm;
> +err:
> + return ret;
> }
>
> static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 86e2346357cf..2749e4735563 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -718,25 +718,19 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> /* Unconditionally invalidate GPU caches and TLBs. */
> ret = engine->emit_flush(req, EMIT_INVALIDATE);
> if (ret)
> - goto err_ctx;
> + goto err_unwind;
>
> ret = engine->request_alloc(req);
> - if (ret) {
> - /*
> - * Past the point-of-no-return. Since we may have updated
> - * global state after partially completing the request alloc,
> - * we need to commit any commands so far emitted in the
> - * request to the HW.
> - */
> - __i915_add_request(req, false);
> - return ERR_PTR(ret);
> - }
> + if (ret)
> + goto err_unwind;
>
> /* Check that we didn't interrupt ourselves with a new request */
> GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
> return req;
>
> -err_ctx:
> +err_unwind:
> + req->ring->emit = req->head;
> +
> /* Make sure we didn't add ourselves to external state before freeing */
> GEM_BUG_ON(!list_empty(&req->active_list));
> GEM_BUG_ON(!list_empty(&req->priotree.signalers_list));
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index bfa11a84e476..a904b0353bec 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -591,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine,
> request->ring->head = request->postfix;
> } else {
> engine->legacy_active_context = NULL;
> + engine->legacy_active_ppgtt = NULL;
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d16e32adf19a..cbc9c36f675e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -494,6 +494,7 @@ struct intel_engine_cs {
> * stream (ring).
> */
> struct i915_gem_context *legacy_active_context;
> + struct i915_hw_ppgtt *legacy_active_ppgtt;
>
> /* status_notifier: list of callbacks for context-switch changes */
> struct atomic_notifier_head context_status_notifier;
> --
> 2.15.0
More information about the Intel-gfx
mailing list