[Intel-gfx] [PATCH 1/2] drm/i915: Unwind incomplete legacy context switches

Tue Nov 21 16:25:23 UTC 2017

Chris Wilson <chris at chris-wilson.co.uk> writes:

> The legacy context switch for ringbuffer submission is multistaged,
> where each of those stages may fail. However, we were updating global
> state after some stages, and so we had to force the incomplete request
> to be submitted because we could not unwind. Save the global state
> before performing the switches, and so enable us to unwind back to the
> previous global state should any phase fail. We then must cancel the
> request instead of submitting it should the construction fail.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c | 168 ++++++++++----------------------
>  drivers/gpu/drm/i915/i915_gem_request.c |  18 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
>  4 files changed, 62 insertions(+), 126 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 6ca56e482d79..f63bec08cc85 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -507,6 +507,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
>  
>  	for_each_engine(engine, dev_priv, id) {
>  		engine->legacy_active_context = NULL;
> +		engine->legacy_active_ppgtt = NULL;
>  
>  		if (!engine->last_retired_context)
>  			continue;
> @@ -681,68 +682,48 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice)
>  	return 0;
>  }
>  
> -static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
> -				   struct intel_engine_cs *engine,
> -				   struct i915_gem_context *to)
> -{
> -	if (to->remap_slice)
> -		return false;
> -
> -	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
> -		return false;
> -
> -	return to == engine->legacy_active_context;
> -}
> -
> -static bool
> -needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
> -{
> -	struct i915_gem_context *from = engine->legacy_active_context;
> -
> -	if (!ppgtt)
> -		return false;
> -
> -	/* Always load the ppgtt on first use */
> -	if (!from)
> -		return true;
> -
> -	/* Same context without new entries, skip */
> -	if ((!from->ppgtt || from->ppgtt == ppgtt) &&
> -	    !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
> -		return false;
> -
> -	if (engine->id != RCS)
> -		return true;
> -
> -	return true;
> -}
> -
> -static int do_rcs_switch(struct drm_i915_gem_request *req)
> +/**
> + * i915_switch_context() - perform a GPU context switch.
> + * @rq: request for which we'll execute the context switch
> + *
> + * The context life cycle is simple. The context refcount is incremented and
> + * decremented by 1 and create and destroy. If the context is in use by the GPU,

s/and/on

> + * it will have a refcount > 1. This allows us to destroy the context abstract
> + * object while letting the normal object tracking destroy the backing BO.
> + *
> + * This function should not be used in execlists mode.  Instead the context is
> + * switched by writing to the ELSP and requests keep a reference to their
> + * context.
> + */
> +int i915_switch_context(struct drm_i915_gem_request *rq)
>  {
> -	struct i915_gem_context *to = req->ctx;
> -	struct intel_engine_cs *engine = req->engine;
> -	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
> -	struct i915_gem_context *from = engine->legacy_active_context;
> -	u32 hw_flags;
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct i915_gem_context *to = rq->ctx;
> +	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
> +	struct i915_gem_context *saved_ctx = engine->legacy_active_context;
> +	struct i915_hw_ppgtt *saved_mm = engine->legacy_active_ppgtt;

saved as in context of this function or saved as in saved by hardware?
Just trying to get behind the naming here.

I am not in bottom of this patch yet but at this point,

new_[mm|ctx] and current[mm|ctx] feel more natural.

> +	u32 hw_flags = 0;
>  	int ret, i;
>  
> -	GEM_BUG_ON(engine->id != RCS);
> -
> -	if (skip_rcs_switch(ppgtt, engine, to))
> -		return 0;
> +	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> +	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
>  
> -	if (needs_pd_load_pre(ppgtt, engine)) {
> -		/* Older GENs and non render rings still want the load first,
> -		 * "PP_DCLV followed by PP_DIR_BASE register through Load
> -		 * Register Immediate commands in Ring Buffer before submitting
> -		 * a context."*/
> +	if (ppgtt != saved_mm ||
> +	    (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) {
>  		trace_switch_mm(engine, to);
> -		ret = ppgtt->switch_mm(ppgtt, req);
> +		ret = ppgtt->switch_mm(ppgtt, rq);
>  		if (ret)
> -			return ret;
> +			goto err;
> +
> +		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> +		engine->legacy_active_ppgtt = ppgtt;
> +		hw_flags = MI_FORCE_RESTORE;
>  	}
>  
> -	if (i915_gem_context_is_kernel(to))
> +	if (to->engine[engine->id].state &&
> +	    (to != saved_ctx || hw_flags & MI_FORCE_RESTORE)) {
> +		GEM_BUG_ON(engine->id != RCS);
> +
>  		/*
>  		 * The kernel context(s) is treated as pure scratch and is not
>  		 * expected to retain any state (as we sacrifice it during
> @@ -750,78 +731,37 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>  		 * as nothing actually executes using the kernel context; it
>  		 * is purely used for flushing user contexts.
>  		 */
> -		hw_flags = MI_RESTORE_INHIBIT;
> -	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
> -		hw_flags = MI_FORCE_RESTORE;
> -	else
> -		hw_flags = 0;
> +		if (i915_gem_context_is_kernel(to))
> +			hw_flags = MI_RESTORE_INHIBIT;
>  
> -	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
> -		ret = mi_set_context(req, hw_flags);
> +		ret = mi_set_context(rq, hw_flags);
>  		if (ret)
> -			return ret;
> +			goto err_mm;
>  
>  		engine->legacy_active_context = to;
>  	}
>  
> -	if (ppgtt)
> -		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> -
> -	for (i = 0; i < MAX_L3_SLICES; i++) {
> -		if (!(to->remap_slice & (1<<i)))
> -			continue;
> -
> -		ret = remap_l3(req, i);
> -		if (ret)
> -			return ret;
> -
> -		to->remap_slice &= ~(1<<i);
> -	}
> -
> -	return 0;
> -}
> -
> -/**
> - * i915_switch_context() - perform a GPU context switch.
> - * @req: request for which we'll execute the context switch
> - *
> - * The context life cycle is simple. The context refcount is incremented and
> - * decremented by 1 and create and destroy. If the context is in use by the GPU,
> - * it will have a refcount > 1. This allows us to destroy the context abstract
> - * object while letting the normal object tracking destroy the backing BO.
> - *
> - * This function should not be used in execlists mode.  Instead the context is
> - * switched by writing to the ELSP and requests keep a reference to their
> - * context.
> - */
> -int i915_switch_context(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -
> -	lockdep_assert_held(&req->i915->drm.struct_mutex);
> -	GEM_BUG_ON(HAS_EXECLISTS(req->i915));
> +	if (to->remap_slice) {
> +		for (i = 0; i < MAX_L3_SLICES; i++) {
> +			if (!(to->remap_slice & BIT(i)))
> +				continue;
>  
> -	if (!req->ctx->engine[engine->id].state) {
> -		struct i915_gem_context *to = req->ctx;
> -		struct i915_hw_ppgtt *ppgtt =
> -			to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
> -
> -		if (needs_pd_load_pre(ppgtt, engine)) {
> -			int ret;
> -
> -			trace_switch_mm(engine, to);
> -			ret = ppgtt->switch_mm(ppgtt, req);
> +			ret = remap_l3(rq, i);
>  			if (ret)
> -				return ret;
> -
> -			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> +				goto err_ctx;
>  		}
>  
> -		engine->legacy_active_context = to;
> -		return 0;
> +		to->remap_slice = 0;

Because we have unwind we can just remap again everything if error
happens?

-Mika

>  	}
>  
> -	return do_rcs_switch(req);
> +	return 0;
> +
> +err_ctx:
> +	engine->legacy_active_context = saved_ctx;
> +err_mm:
> +	engine->legacy_active_ppgtt = saved_mm;
> +err:
> +	return ret;
>  }
>  
>  static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 86e2346357cf..2749e4735563 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -718,25 +718,19 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	/* Unconditionally invalidate GPU caches and TLBs. */
>  	ret = engine->emit_flush(req, EMIT_INVALIDATE);
>  	if (ret)
> -		goto err_ctx;
> +		goto err_unwind;
>  
>  	ret = engine->request_alloc(req);
> -	if (ret) {
> -		/*
> -		 * Past the point-of-no-return. Since we may have updated
> -		 * global state after partially completing the request alloc,
> -		 * we need to commit any commands so far emitted in the
> -		 * request to the HW.
> -		 */
> -		__i915_add_request(req, false);
> -		return ERR_PTR(ret);
> -	}
> +	if (ret)
> +		goto err_unwind;
>  
>  	/* Check that we didn't interrupt ourselves with a new request */
>  	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
>  	return req;
>  
> -err_ctx:
> +err_unwind:
> +	req->ring->emit = req->head;
> +
>  	/* Make sure we didn't add ourselves to external state before freeing */
>  	GEM_BUG_ON(!list_empty(&req->active_list));
>  	GEM_BUG_ON(!list_empty(&req->priotree.signalers_list));
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index bfa11a84e476..a904b0353bec 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -591,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine,
>  			request->ring->head = request->postfix;
>  	} else {
>  		engine->legacy_active_context = NULL;
> +		engine->legacy_active_ppgtt = NULL;
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d16e32adf19a..cbc9c36f675e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -494,6 +494,7 @@ struct intel_engine_cs {
>  	 * stream (ring).
>  	 */
>  	struct i915_gem_context *legacy_active_context;
> +	struct i915_hw_ppgtt *legacy_active_ppgtt;
>  
>  	/* status_notifier: list of callbacks for context-switch changes */
>  	struct atomic_notifier_head context_status_notifier;
> -- 
> 2.15.0