[PATCH 02/26] drm/i915/guc: Take GT PM ref when deregistering context

Fri Oct 8 18:23:38 UTC 2021

On Wed, Oct 06, 2021 at 08:37:03PM -0700, John Harrison wrote:
> On 10/4/2021 15:06, Matthew Brost wrote:
> > Taking a PM reference to prevent intel_gt_wait_for_idle from short
> > circuiting while a deregister context H2G is in flight. To do this must
> > issue the deregister H2G from a worker as context can be destroyed from
> > an atomic context and taking GT PM ref blows up. Previously we took a
> > runtime PM from this atomic context which worked but will stop working
> > once runtime pm autosuspend in enabled.
> > 
> > So this patch is two fold, stop intel_gt_wait_for_idle from short
> > circuting and fix runtime pm autosuspend.
> > 
> > v2:
> >   (John Harrison)
> >    - Split structure changes out in different patch
> >   (Tvrtko)
> >    - Don't drop lock in deregister_destroyed_contexts
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context.c       |   2 +
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |   7 +
> >   drivers/gpu/drm/i915/gt/intel_engine_pm.h     |   5 +
> >   drivers/gpu/drm/i915/gt/intel_gt_pm.h         |   4 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  11 ++
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +++++++++++-------
> >   6 files changed, 121 insertions(+), 54 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> > index e9a0cad5c34d..1076066f41e0 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -399,6 +399,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> >   	ce->guc_id.id = GUC_INVALID_LRC_ID;
> >   	INIT_LIST_HEAD(&ce->guc_id.link);
> > +	INIT_LIST_HEAD(&ce->destroyed_link);
> > +
> >   	/*
> >   	 * Initialize fence to be complete as this is expected to be complete
> >   	 * unless there is a pending schedule disable outstanding.
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index e7e3984aab78..4613d027cbc3 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -213,6 +213,13 @@ struct intel_context {
> >   		struct list_head link;
> >   	} guc_id;
> > +	/**
> > +	 * @destroyed_link: link in guc->submission_state.destroyed_contexts, in
> > +	 * list when context is pending to be destroyed (deregistered with the
> > +	 * GuC), protected by guc->submission_state.lock
> > +	 */
> > +	struct list_head destroyed_link;
> > +
> >   #ifdef CONFIG_DRM_I915_SELFTEST
> >   	/**
> >   	 * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> > index 8520c595f5e1..6fdeae668e6e 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> > @@ -16,6 +16,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
> >   	return intel_wakeref_is_active(&engine->wakeref);
> >   }
> > +static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
> > +{
> > +	__intel_wakeref_get(&engine->wakeref);
> > +}
> > +
> >   static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
> >   {
> >   	intel_wakeref_get(&engine->wakeref);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> > index d0588d8aaa44..05de6c1af25b 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> > @@ -41,6 +41,10 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
> >   	intel_wakeref_put_async(&gt->wakeref);
> >   }
> > +#define with_intel_gt_pm(gt, tmp) \
> > +	for (tmp = 1, intel_gt_pm_get(gt); tmp; \
> > +	     intel_gt_pm_put(gt), tmp = 0)
> > +
> >   static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
> >   {
> >   	return intel_wakeref_wait_for_idle(&gt->wakeref);
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > index 65b5e8eeef96..25a598e2b6e8 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> > @@ -84,6 +84,17 @@ struct intel_guc {
> >   		 * refs
> >   		 */
> >   		struct list_head guc_id_list;
> > +		/**
> > +		 * @destroyed_contexts: list of contexts waiting to be destroyed
> > +		 * (deregistered with the GuC)
> > +		 */
> > +		struct list_head destroyed_contexts;
> > +		/**
> > +		 * @destroyed_worker: worker to deregister contexts, need as we
> > +		 * need to take a GT PM reference and can't from destroy
> > +		 * function as it might be in an atomic context (no sleeping)
> > +		 */
> > +		struct work_struct destroyed_worker;
> >   	} submission_state;
> >   	/**
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index ad5c18119d92..17da2fea1bff 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -90,8 +90,8 @@
> >    * used for all of GuC submission but that could change in the future.
> >    *
> >    * guc->submission_state.lock
> > - * Protects guc_id allocation for the given GuC, i.e. only one context can be
> > - * doing guc_id allocation operations at a time for each GuC in the system.
> > + * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
> > + * list.
> Feels like this should not be removing explanations, only adding to them.
> The patch itself is only adding new features not removing them. Either the
> details about id allocation are not worth mentioning and should not have
> been added in the previous patch. Or they are and should be kept rather than
> removed in this patch. Either way works for me. The comment was valid
> information but does maybe count as obvious from the guc_id member (and
> friends) are within a per GuC instance structure.
> 
> >    *
> >    * ce->guc_state.lock
> >    * Protects everything under ce->guc_state. Ensures that a context is in the
> > @@ -719,6 +719,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
> >   			if (deregister)
> >   				guc_signal_context_fence(ce);
> >   			if (destroyed) {
> > +				intel_gt_pm_put_async(guc_to_gt(guc));
> >   				release_guc_id(guc, ce);
> >   				__guc_context_destroy(ce);
> >   			}
> > @@ -797,6 +798,8 @@ static void guc_flush_submissions(struct intel_guc *guc)
> >   	spin_unlock_irqrestore(&sched_engine->lock, flags);
> >   }
> > +static void guc_flush_destroyed_contexts(struct intel_guc *guc);
> > +
> >   void intel_guc_submission_reset_prepare(struct intel_guc *guc)
> >   {
> >   	int i;
> > @@ -815,6 +818,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
> >   	spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
> >   	guc_flush_submissions(guc);
> > +	guc_flush_destroyed_contexts(guc);
> >   	/*
> >   	 * Handle any outstanding G2Hs before reset. Call IRQ handler directly
> > @@ -1126,6 +1130,8 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
> >   	intel_gt_unpark_heartbeats(guc_to_gt(guc));
> >   }
> > +static void destroyed_worker_func(struct work_struct *w);
> > +
> >   /*
> >    * Set up the memory resources to be shared with the GuC (via the GGTT)
> >    * at firmware loading time.
> > @@ -1151,6 +1157,9 @@ int intel_guc_submission_init(struct intel_guc *guc)
> >   	spin_lock_init(&guc->submission_state.lock);
> >   	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
> >   	ida_init(&guc->submission_state.guc_ids);
> > +	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> > +	INIT_WORK(&guc->submission_state.destroyed_worker,
> > +		  destroyed_worker_func);
> >   	return 0;
> >   }
> > @@ -1161,6 +1170,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
> >   		return;
> >   	guc_lrc_desc_pool_destroy(guc);
> > +	guc_flush_destroyed_contexts(guc);
> Seems like these lines should be reversed. We should destroy the higher
> level constructs before the lower level ones that they could be built on.
> 

Missed a few comments. Sure, will reverse.

> >   	i915_sched_engine_put(guc->sched_engine);
> >   }
> > @@ -1859,11 +1869,30 @@ static void guc_context_sched_disable(struct intel_context *ce)
> >   static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> >   {
> >   	struct intel_guc *guc = ce_to_guc(ce);
> > +	struct intel_gt *gt = guc_to_gt(guc);
> > +	unsigned long flags;
> > +	bool disabled;
> > +	GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
> >   	GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
> >   	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
> >   	GEM_BUG_ON(context_enabled(ce));
> > +	/* Seal race with Reset */
> > +	spin_lock_irqsave(&ce->guc_state.lock, flags);
> > +	disabled = submission_disabled(guc);
> > +	if (likely(!disabled)) {
> > +		__intel_gt_pm_get(gt);
> > +		set_context_destroyed(ce);
> > +		clr_context_registered(ce);
> > +	}
> > +	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> > +	if (unlikely(disabled)) {
> > +		release_guc_id(guc, ce);
> > +		__guc_context_destroy(ce);
> > +		return;
> > +	}
> > +
> >   	deregister_context(ce, ce->guc_id.id);
> >   }
> > @@ -1891,78 +1920,86 @@ static void __guc_context_destroy(struct intel_context *ce)
> >   	}
> >   }
> > +static void guc_flush_destroyed_contexts(struct intel_guc *guc)
> > +{
> > +	struct intel_context *ce, *cn;
> > +	unsigned long flags;
> > +
> > +	GEM_BUG_ON(!submission_disabled(guc) &&
> > +		   guc_submission_initialized(guc));
> > +
> > +	spin_lock_irqsave(&guc->submission_state.lock, flags);
> > +	list_for_each_entry_safe(ce, cn,
> > +				 &guc->submission_state.destroyed_contexts,
> > +				 destroyed_link) {
> > +		list_del_init(&ce->destroyed_link);
> > +		__release_guc_id(guc, ce);
> > +		__guc_context_destroy(ce);
> > +	}
> > +	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > +}
> > +
> > +static void deregister_destroyed_contexts(struct intel_guc *guc)
> > +{
> > +	struct intel_context *ce, *cn;
> > +	unsigned long flags;
> > +
> > +	spin_lock_irqsave(&guc->submission_state.lock, flags);
> > +	list_for_each_entry_safe(ce, cn,
> > +				 &guc->submission_state.destroyed_contexts,
> > +				 destroyed_link) {
> > +		list_del_init(&ce->destroyed_link);
> > +		guc_lrc_desc_unpin(ce);
> > +	}
> > +	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > +}
> > +
> > +static void destroyed_worker_func(struct work_struct *w)
> > +{
> > +	struct intel_guc *guc = container_of(w, struct intel_guc,
> > +					     submission_state.destroyed_worker);
> > +	struct intel_gt *gt = guc_to_gt(guc);
> > +	int tmp;
> > +
> > +	with_intel_gt_pm(gt, tmp)
> > +		deregister_destroyed_contexts(guc);
> > +}
> > +
> >   static void guc_context_destroy(struct kref *kref)
> >   {
> >   	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
> > -	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
> >   	struct intel_guc *guc = ce_to_guc(ce);
> > -	intel_wakeref_t wakeref;
> >   	unsigned long flags;
> > -	bool disabled;
> > +	bool destroy;
> >   	/*
> >   	 * If the guc_id is invalid this context has been stolen and we can free
> >   	 * it immediately. Also can be freed immediately if the context is not
> >   	 * registered with the GuC or the GuC is in the middle of a reset.
> >   	 */
> > -	if (context_guc_id_invalid(ce)) {
> > -		__guc_context_destroy(ce);
> > -		return;
> > -	} else if (submission_disabled(guc) ||
> > -		   !lrc_desc_registered(guc, ce->guc_id.id)) {
> > -		release_guc_id(guc, ce);
> > -		__guc_context_destroy(ce);
> > -		return;
> > -	}
> > -
> > -	/*
> > -	 * We have to acquire the context spinlock and check guc_id again, if it
> > -	 * is valid it hasn't been stolen and needs to be deregistered. We
> > -	 * delete this context from the list of unpinned guc_id available to
> > -	 * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
> > -	 * returns indicating this context has been deregistered the guc_id is
> > -	 * returned to the pool of available guc_id.
> > -	 */
> >   	spin_lock_irqsave(&guc->submission_state.lock, flags);
> > -	if (context_guc_id_invalid(ce)) {
> > -		spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > -		__guc_context_destroy(ce);
> > -		return;
> > +	destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
> > +		!lrc_desc_registered(guc, ce->guc_id.id);
> > +	if (likely(!destroy)) {
> > +		if (!list_empty(&ce->guc_id.link))
> > +			list_del_init(&ce->guc_id.link);
> > +		list_add_tail(&ce->destroyed_link,
> > +			      &guc->submission_state.destroyed_contexts);
> > +	} else {
> > +		__release_guc_id(guc, ce);
> 'destroy' can be true if the guc_id is invalid. Is it good to call release
> on an invalid id?
> 

__release_guc_id protects again that, it is harmless to call.

Matt

> John.
> 
> >   	}
> > -
> > -	if (!list_empty(&ce->guc_id.link))
> > -		list_del_init(&ce->guc_id.link);
> >   	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> > -
> > -	/* Seal race with Reset */
> > -	spin_lock_irqsave(&ce->guc_state.lock, flags);
> > -	disabled = submission_disabled(guc);
> > -	if (likely(!disabled)) {
> > -		set_context_destroyed(ce);
> > -		clr_context_registered(ce);
> > -	}
> > -	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> > -	if (unlikely(disabled)) {
> > -		release_guc_id(guc, ce);
> > +	if (unlikely(destroy)) {
> >   		__guc_context_destroy(ce);
> >   		return;
> >   	}
> >   	/*
> > -	 * We defer GuC context deregistration until the context is destroyed
> > -	 * in order to save on CTBs. With this optimization ideally we only need
> > -	 * 1 CTB to register the context during the first pin and 1 CTB to
> > -	 * deregister the context when the context is destroyed. Without this
> > -	 * optimization, a CTB would be needed every pin & unpin.
> > -	 *
> > -	 * XXX: Need to acqiure the runtime wakeref as this can be triggered
> > -	 * from context_free_worker when runtime wakeref is not held.
> > -	 * guc_lrc_desc_unpin requires the runtime as a GuC register is written
> > -	 * in H2G CTB to deregister the context. A future patch may defer this
> > -	 * H2G CTB if the runtime wakeref is zero.
> > +	 * We use a worker to issue the H2G to deregister the context as we can
> > +	 * take the GT PM for the first time which isn't allowed from an atomic
> > +	 * context.
> >   	 */
> > -	with_intel_runtime_pm(runtime_pm, wakeref)
> > -		guc_lrc_desc_unpin(ce);
> > +	queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
> >   }
> >   static int guc_context_alloc(struct intel_context *ce)
> > @@ -2798,6 +2835,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
> >   		intel_context_put(ce);
> >   	} else if (context_destroyed(ce)) {
> >   		/* Context has been destroyed */
> > +		intel_gt_pm_put_async(guc_to_gt(guc));
> >   		release_guc_id(guc, ce);
> >   		__guc_context_destroy(ce);
> >   	}
>