[Intel-gfx] [PATCH] drm/i915/gt: Confirm the context survives execution

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Oct 15 15:15:53 UTC 2020


On 14/10/2020 09:43, Chris Wilson wrote:
> Repeat our sanitychecks from before execution to after execution. One
> expects that if we were to see these, the gpu would already be on fire,
> but the timing may be informative.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c | 37 +++++++++++++++--------
>   drivers/gpu/drm/i915/gt/intel_lrc.c       | 12 ++++++--
>   2 files changed, 34 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index f7b2e07e2229..c5376790a6b9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -17,6 +17,25 @@
>   #include "intel_ring.h"
>   #include "shmem_utils.h"
>   
> +static void dbg_poison_ce(struct intel_context *ce)
> +{
> +	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> +		return;
> +
> +	if (ce->state) {
> +		struct drm_i915_gem_object *obj = ce->state->obj;
> +		int type = i915_coherent_map_type(ce->engine->i915);
> +		void *map;
> +
> +		map = i915_gem_object_pin_map(obj, type);
> +		if (!IS_ERR(map)) {
> +			memset(map, CONTEXT_REDZONE, obj->base.size);
> +			i915_gem_object_flush_map(obj);
> +			i915_gem_object_unpin_map(obj);
> +		}
> +	}
> +}
> +
>   static int __engine_unpark(struct intel_wakeref *wf)
>   {
>   	struct intel_engine_cs *engine =
> @@ -32,20 +51,14 @@ static int __engine_unpark(struct intel_wakeref *wf)
>   	if (ce) {
>   		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
>   
> +		/* Flush all pending HW writes before we touch the context */
> +		while (unlikely(intel_context_inflight(ce)))
> +			intel_engine_flush_submission(ce->engine);
> +
>   		/* First poison the image to verify we never fully trust it */
> -		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
> -			struct drm_i915_gem_object *obj = ce->state->obj;
> -			int type = i915_coherent_map_type(engine->i915);
> -			void *map;
> -
> -			map = i915_gem_object_pin_map(obj, type);
> -			if (!IS_ERR(map)) {
> -				memset(map, CONTEXT_REDZONE, obj->base.size);
> -				i915_gem_object_flush_map(obj);
> -				i915_gem_object_unpin_map(obj);
> -			}
> -		}
> +		dbg_poison_ce(ce);
>   
> +		/* Scrub the context image after our loss of control */
>   		ce->ops->reset(ce);
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 287537089c77..6170f6874f52 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1216,7 +1216,8 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
>   
>   static void
>   execlists_check_context(const struct intel_context *ce,
> -			const struct intel_engine_cs *engine)
> +			const struct intel_engine_cs *engine,
> +			const char *when)
>   {
>   	const struct intel_ring *ring = ce->ring;
>   	u32 *regs = ce->lrc_reg_state;
> @@ -1251,7 +1252,7 @@ execlists_check_context(const struct intel_context *ce,
>   		valid = false;
>   	}
>   
> -	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
> +	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
>   }
>   
>   static void restore_default_state(struct intel_context *ce,
> @@ -1347,7 +1348,7 @@ __execlists_schedule_in(struct i915_request *rq)
>   		reset_active(rq, engine);
>   
>   	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> -		execlists_check_context(ce, engine);
> +		execlists_check_context(ce, engine, "before");
>   
>   	if (ce->tag) {
>   		/* Use a fixed tag for OA and friends */
> @@ -1418,6 +1419,9 @@ __execlists_schedule_out(struct i915_request *rq,
>   	 * refrain from doing non-trivial work here.
>   	 */
>   
> +	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> +		execlists_check_context(ce, engine, "after");
> +
>   	/*
>   	 * If we have just completed this context, the engine may now be
>   	 * idle and we want to re-enter powersaving.
> @@ -4078,6 +4082,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>   
>   static void execlists_sanitize(struct intel_engine_cs *engine)
>   {
> +	GEM_BUG_ON(execlists_active(&engine->execlists));
> +
>   	/*
>   	 * Poison residual state on resume, in case the suspend didn't!
>   	 *
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko


More information about the Intel-gfx mailing list