[Intel-gfx] [PATCH v2] drm/i915/gt: Register the migrate contexts with their engines

Daniel Vetter daniel at ffwll.ch
Thu Aug 26 12:44:31 UTC 2021


On Thu, Aug 26, 2021 at 12:45:14PM +0200, Thomas Hellström wrote:
> Pinned contexts, like the migrate contexts need reset after resume
> since their context image may have been lost. Also the GuC needs to
> register pinned contexts.
> 
> Add a list to struct intel_engine_cs where we add all pinned contexts on
> creation, and traverse that list at resume time to reset the pinned
> contexts.
> 
> This fixes the kms_pipe_crc_basic at suspend-read-crc-pipe-a selftest for now,
> but proper LMEM backup / restore is needed for full suspend functionality.
> However, note that even with full LMEM backup / restore it may be
> desirable to keep the reset since backing up the migrate context images
> must happen using memcpy() after the migrate context has become inactive,
> and for performance- and other reasons we want to avoid memcpy() from
> LMEM.
> 
> Also traverse the list at guc_init_lrc_mapping() calling
> guc_kernel_context_pin() for the pinned contexts, like is already done
> for the kernel context.
> 
> v2:
> - Don't reset the contexts on each __engine_unpark() but rather at
>   resume time (Chris Wilson).
> 
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Cc: Brost Matthew <matthew.brost at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

I guess it got lost, but I few weeks ago I stumbled over this and wondered
why we're even setting up a separate context or at least why a separate vm
compared to the gt->vm we have already?

Even on chips with bazillions of copy engines the plan is that we only
reserve a single one for kernel migrations, so there's not really a need
for quite this much generality I think. Maybe check with Jon Bloomfield on
this.

Iirc I had also a few other questions on simplifying this area.
-Daniel


> ---
>  drivers/gpu/drm/i915/gt/intel_context_types.h |  8 +++++++
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  4 ++++
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 23 +++++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_engine_pm.h     |  2 ++
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  7 ++++++
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  3 +++
>  drivers/gpu/drm/i915/gt/mock_engine.c         |  1 +
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 +++++---
>  8 files changed, 55 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index e54351a170e2..a63631ea0ec4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -152,6 +152,14 @@ struct intel_context {
>  	/** sseu: Control eu/slice partitioning */
>  	struct intel_sseu sseu;
>  
> +	/**
> +	 * pinned_contexts_link: List link for the engine's pinned contexts.
> +	 * This is only used if this is a perma-pinned kernel context and
> +	 * the list is assumed to only be manipulated during driver load
> +	 * or unload time so no mutex protection currently.
> +	 */
> +	struct list_head pinned_contexts_link;
> +
>  	u8 wa_bb_page; /* if set, page num reserved for context workarounds */
>  
>  	struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 332efea696a5..c606a4714904 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -320,6 +320,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>  
>  	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
>  
> +	INIT_LIST_HEAD(&engine->pinned_contexts_list);
>  	engine->id = id;
>  	engine->legacy_idx = INVALID_ENGINE;
>  	engine->mask = BIT(id);
> @@ -875,6 +876,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
>  		return ERR_PTR(err);
>  	}
>  
> +	list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
> +
>  	/*
>  	 * Give our perma-pinned kernel timelines a separate lockdep class,
>  	 * so that we can use them from within the normal user timelines
> @@ -897,6 +900,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce)
>  	list_del(&ce->timeline->engine_link);
>  	mutex_unlock(&hwsp->vm->mutex);
>  
> +	list_del(&ce->pinned_contexts_link);
>  	intel_context_unpin(ce);
>  	intel_context_put(ce);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 1f07ac4e0672..dacd62773735 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -298,6 +298,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
>  	intel_engine_init_heartbeat(engine);
>  }
>  
> +/**
> + * intel_engine_reset_pinned_contexts - Reset the pinned contexts of
> + * an engine.
> + * @engine: The engine whose pinned contexts we want to reset.
> + *
> + * Typically the pinned context LMEM images lose or get their content
> + * corrupted on suspend. This function resets their images.
> + */
> +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine)
> +{
> +	struct intel_context *ce;
> +
> +	list_for_each_entry(ce, &engine->pinned_contexts_list,
> +			    pinned_contexts_link) {
> +		/* kernel context gets reset at __engine_unpark() */
> +		if (ce == engine->kernel_context)
> +			continue;
> +
> +		dbg_poison_ce(ce);
> +		ce->ops->reset(ce);
> +	}
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftest_engine_pm.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> index 70ea46d6cfb0..8520c595f5e1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> @@ -69,4 +69,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine)
>  
>  void intel_engine_init__pm(struct intel_engine_cs *engine);
>  
> +void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine);
> +
>  #endif /* INTEL_ENGINE_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index bfbfe53c23dd..5ae1207c363b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -307,6 +307,13 @@ struct intel_engine_cs {
>  
>  	struct intel_context *kernel_context; /* pinned */
>  
> +	/**
> +	 * pinned_contexts_list: List of pinned contexts. This list is only
> +	 * assumed to be manipulated during driver load- or unload time and
> +	 * does therefore not have any additional protection.
> +	 */
> +	struct list_head pinned_contexts_list;
> +
>  	intel_engine_mask_t saturated; /* submitting semaphores too late? */
>  
>  	struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index dea8e2479897..c9bae2ef92df 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -192,6 +192,9 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
>  
>  	intel_rps_sanitize(&gt->rps);
>  
> +	for_each_engine(engine, gt, id)
> +		intel_engine_reset_pinned_contexts(engine);
> +
>  	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
>  	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 2c1af030310c..8a14982a9691 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -376,6 +376,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
>  {
>  	struct intel_context *ce;
>  
> +	INIT_LIST_HEAD(&engine->pinned_contexts_list);
>  	engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK);
>  	if (!engine->sched_engine)
>  		return -ENOMEM;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 87d8dc8f51b9..55709206b95e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2385,9 +2385,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
>  	 * and even it did this code would be run again.
>  	 */
>  
> -	for_each_engine(engine, gt, id)
> -		if (engine->kernel_context)
> -			guc_kernel_context_pin(guc, engine->kernel_context);
> +	for_each_engine(engine, gt, id) {
> +		struct intel_context *ce;
> +
> +		list_for_each_entry(ce, &engine->pinned_contexts_list,
> +				    pinned_contexts_link)
> +			guc_kernel_context_pin(guc, ce);
> +	}
>  }
>  
>  static void guc_release(struct intel_engine_cs *engine)
> -- 
> 2.31.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list