[Intel-gfx] [PATCH 02/25] drm/i915/guc: Take GT PM ref when deregistering context
John Harrison
john.c.harrison at intel.com
Wed Oct 13 23:59:04 UTC 2021
On 10/13/2021 13:42, Matthew Brost wrote:
> Taking a PM reference to prevent intel_gt_wait_for_idle from short
> circuiting while a deregister context H2G is in flight. To do this must
> issue the deregister H2G from a worker as context can be destroyed from
> an atomic context and taking GT PM ref blows up. Previously we took a
> runtime PM from this atomic context which worked but will stop working
> once runtime pm autosuspend in enabled.
>
> So this patch is two fold, stop intel_gt_wait_for_idle from short
> circuting and fix runtime pm autosuspend.
>
> v2:
> (John Harrison)
> - Split structure changes out in different patch
> (Tvrtko)
> - Don't drop lock in deregister_destroyed_contexts
> v3:
> (John Harrison)
> - Flush destroyed contexts before destroying context reg pool
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_context.c | 2 +
> drivers/gpu/drm/i915/gt/intel_context_types.h | 7 +
> drivers/gpu/drm/i915/gt/intel_engine_pm.h | 5 +
> drivers/gpu/drm/i915/gt/intel_gt_pm.h | 4 +
> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 11 ++
> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 146 +++++++++++-------
> 6 files changed, 121 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 35babd02ddfe..d008ef8623ce 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -400,6 +400,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> ce->guc_id.id = GUC_INVALID_LRC_ID;
> INIT_LIST_HEAD(&ce->guc_id.link);
>
> + INIT_LIST_HEAD(&ce->destroyed_link);
> +
> /*
> * Initialize fence to be complete as this is expected to be complete
> * unless there is a pending schedule disable outstanding.
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index e7e3984aab78..4613d027cbc3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -213,6 +213,13 @@ struct intel_context {
> struct list_head link;
> } guc_id;
>
> + /**
> + * @destroyed_link: link in guc->submission_state.destroyed_contexts, in
> + * list when context is pending to be destroyed (deregistered with the
> + * GuC), protected by guc->submission_state.lock
> + */
> + struct list_head destroyed_link;
> +
> #ifdef CONFIG_DRM_I915_SELFTEST
> /**
> * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> index 8520c595f5e1..6fdeae668e6e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> @@ -16,6 +16,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
> return intel_wakeref_is_active(&engine->wakeref);
> }
>
> +static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
> +{
> + __intel_wakeref_get(&engine->wakeref);
> +}
> +
> static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
> {
> intel_wakeref_get(&engine->wakeref);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> index d0588d8aaa44..05de6c1af25b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -41,6 +41,10 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
> intel_wakeref_put_async(>->wakeref);
> }
>
> +#define with_intel_gt_pm(gt, tmp) \
> + for (tmp = 1, intel_gt_pm_get(gt); tmp; \
> + intel_gt_pm_put(gt), tmp = 0)
> +
> static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
> {
> return intel_wakeref_wait_for_idle(>->wakeref);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 82e248c2290c..74f071a0b6d5 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -90,6 +90,17 @@ struct intel_guc {
> * refs
> */
> struct list_head guc_id_list;
> + /**
> + * @destroyed_contexts: list of contexts waiting to be destroyed
> + * (deregistered with the GuC)
> + */
> + struct list_head destroyed_contexts;
> + /**
> + * @destroyed_worker: worker to deregister contexts, need as we
> + * need to take a GT PM reference and can't from destroy
> + * function as it might be in an atomic context (no sleeping)
> + */
> + struct work_struct destroyed_worker;
> } submission_state;
>
> /**
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index b2646b088c7f..d2ce47b5541e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -90,8 +90,8 @@
> * used for all of GuC submission but that could change in the future.
> *
> * guc->submission_state.lock
> - * Protects guc_id allocation for the given GuC, i.e. only one context can be
> - * doing guc_id allocation operations at a time for each GuC in the system.
> + * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
> + * list.
> *
> * ce->guc_state.lock
> * Protects everything under ce->guc_state. Ensures that a context is in the
> @@ -719,6 +719,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
> if (deregister)
> guc_signal_context_fence(ce);
> if (destroyed) {
> + intel_gt_pm_put_async(guc_to_gt(guc));
> release_guc_id(guc, ce);
> __guc_context_destroy(ce);
> }
> @@ -797,6 +798,8 @@ static void guc_flush_submissions(struct intel_guc *guc)
> spin_unlock_irqrestore(&sched_engine->lock, flags);
> }
>
> +static void guc_flush_destroyed_contexts(struct intel_guc *guc);
> +
> void intel_guc_submission_reset_prepare(struct intel_guc *guc)
> {
> int i;
> @@ -815,6 +818,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
> spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
>
> guc_flush_submissions(guc);
> + guc_flush_destroyed_contexts(guc);
>
> /*
> * Handle any outstanding G2Hs before reset. Call IRQ handler directly
> @@ -1126,6 +1130,8 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
> intel_gt_unpark_heartbeats(guc_to_gt(guc));
> }
>
> +static void destroyed_worker_func(struct work_struct *w);
> +
> /*
> * Set up the memory resources to be shared with the GuC (via the GGTT)
> * at firmware loading time.
> @@ -1151,6 +1157,9 @@ int intel_guc_submission_init(struct intel_guc *guc)
> spin_lock_init(&guc->submission_state.lock);
> INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
> ida_init(&guc->submission_state.guc_ids);
> + INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
> + INIT_WORK(&guc->submission_state.destroyed_worker,
> + destroyed_worker_func);
>
> return 0;
> }
> @@ -1160,6 +1169,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
> if (!guc->lrc_desc_pool)
> return;
>
> + guc_flush_destroyed_contexts(guc);
> guc_lrc_desc_pool_destroy(guc);
> i915_sched_engine_put(guc->sched_engine);
> }
> @@ -1859,11 +1869,30 @@ static void guc_context_sched_disable(struct intel_context *ce)
> static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> {
> struct intel_guc *guc = ce_to_guc(ce);
> + struct intel_gt *gt = guc_to_gt(guc);
> + unsigned long flags;
> + bool disabled;
>
> + GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
> GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
> GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
> GEM_BUG_ON(context_enabled(ce));
>
> + /* Seal race with Reset */
> + spin_lock_irqsave(&ce->guc_state.lock, flags);
> + disabled = submission_disabled(guc);
> + if (likely(!disabled)) {
> + __intel_gt_pm_get(gt);
> + set_context_destroyed(ce);
> + clr_context_registered(ce);
> + }
> + spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> + if (unlikely(disabled)) {
> + release_guc_id(guc, ce);
> + __guc_context_destroy(ce);
> + return;
> + }
> +
> deregister_context(ce, ce->guc_id.id);
> }
>
> @@ -1891,78 +1920,86 @@ static void __guc_context_destroy(struct intel_context *ce)
> }
> }
>
> +static void guc_flush_destroyed_contexts(struct intel_guc *guc)
> +{
> + struct intel_context *ce, *cn;
> + unsigned long flags;
> +
> + GEM_BUG_ON(!submission_disabled(guc) &&
> + guc_submission_initialized(guc));
> +
> + spin_lock_irqsave(&guc->submission_state.lock, flags);
> + list_for_each_entry_safe(ce, cn,
> + &guc->submission_state.destroyed_contexts,
> + destroyed_link) {
> + list_del_init(&ce->destroyed_link);
> + __release_guc_id(guc, ce);
> + __guc_context_destroy(ce);
> + }
> + spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> +}
> +
> +static void deregister_destroyed_contexts(struct intel_guc *guc)
> +{
> + struct intel_context *ce, *cn;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&guc->submission_state.lock, flags);
> + list_for_each_entry_safe(ce, cn,
> + &guc->submission_state.destroyed_contexts,
> + destroyed_link) {
> + list_del_init(&ce->destroyed_link);
> + guc_lrc_desc_unpin(ce);
> + }
> + spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> +}
> +
> +static void destroyed_worker_func(struct work_struct *w)
> +{
> + struct intel_guc *guc = container_of(w, struct intel_guc,
> + submission_state.destroyed_worker);
> + struct intel_gt *gt = guc_to_gt(guc);
> + int tmp;
> +
> + with_intel_gt_pm(gt, tmp)
> + deregister_destroyed_contexts(guc);
> +}
> +
> static void guc_context_destroy(struct kref *kref)
> {
> struct intel_context *ce = container_of(kref, typeof(*ce), ref);
> - struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
> struct intel_guc *guc = ce_to_guc(ce);
> - intel_wakeref_t wakeref;
> unsigned long flags;
> - bool disabled;
> + bool destroy;
>
> /*
> * If the guc_id is invalid this context has been stolen and we can free
> * it immediately. Also can be freed immediately if the context is not
> * registered with the GuC or the GuC is in the middle of a reset.
> */
> - if (context_guc_id_invalid(ce)) {
> - __guc_context_destroy(ce);
> - return;
> - } else if (submission_disabled(guc) ||
> - !lrc_desc_registered(guc, ce->guc_id.id)) {
> - release_guc_id(guc, ce);
> - __guc_context_destroy(ce);
> - return;
> - }
> -
> - /*
> - * We have to acquire the context spinlock and check guc_id again, if it
> - * is valid it hasn't been stolen and needs to be deregistered. We
> - * delete this context from the list of unpinned guc_id available to
> - * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
> - * returns indicating this context has been deregistered the guc_id is
> - * returned to the pool of available guc_id.
> - */
> spin_lock_irqsave(&guc->submission_state.lock, flags);
> - if (context_guc_id_invalid(ce)) {
> - spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> - __guc_context_destroy(ce);
> - return;
> + destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
> + !lrc_desc_registered(guc, ce->guc_id.id);
> + if (likely(!destroy)) {
> + if (!list_empty(&ce->guc_id.link))
> + list_del_init(&ce->guc_id.link);
> + list_add_tail(&ce->destroyed_link,
> + &guc->submission_state.destroyed_contexts);
> + } else {
> + __release_guc_id(guc, ce);
> }
> -
> - if (!list_empty(&ce->guc_id.link))
> - list_del_init(&ce->guc_id.link);
> spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> -
> - /* Seal race with Reset */
> - spin_lock_irqsave(&ce->guc_state.lock, flags);
> - disabled = submission_disabled(guc);
> - if (likely(!disabled)) {
> - set_context_destroyed(ce);
> - clr_context_registered(ce);
> - }
> - spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> - if (unlikely(disabled)) {
> - release_guc_id(guc, ce);
> + if (unlikely(destroy)) {
> __guc_context_destroy(ce);
> return;
> }
>
> /*
> - * We defer GuC context deregistration until the context is destroyed
> - * in order to save on CTBs. With this optimization ideally we only need
> - * 1 CTB to register the context during the first pin and 1 CTB to
> - * deregister the context when the context is destroyed. Without this
> - * optimization, a CTB would be needed every pin & unpin.
> - *
> - * XXX: Need to acqiure the runtime wakeref as this can be triggered
> - * from context_free_worker when runtime wakeref is not held.
> - * guc_lrc_desc_unpin requires the runtime as a GuC register is written
> - * in H2G CTB to deregister the context. A future patch may defer this
> - * H2G CTB if the runtime wakeref is zero.
> + * We use a worker to issue the H2G to deregister the context as we can
> + * take the GT PM for the first time which isn't allowed from an atomic
> + * context.
> */
> - with_intel_runtime_pm(runtime_pm, wakeref)
> - guc_lrc_desc_unpin(ce);
> + queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
> }
>
> static int guc_context_alloc(struct intel_context *ce)
> @@ -2798,6 +2835,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
> intel_context_put(ce);
> } else if (context_destroyed(ce)) {
> /* Context has been destroyed */
> + intel_gt_pm_put_async(guc_to_gt(guc));
> release_guc_id(guc, ce);
> __guc_context_destroy(ce);
> }
More information about the Intel-gfx
mailing list