[Intel-gfx] [PATCH v3 2/3] drm/i915/guc: Close deregister-context race against CT-loss
Rodrigo Vivi
rodrigo.vivi at intel.com
Thu Sep 14 15:34:51 UTC 2023
On Sat, Sep 09, 2023 at 08:58:45PM -0700, Alan Previn wrote:
> If we are at the end of suspend or very early in resume
> its possible an async fence signal could lead us to the
> execution of the context destruction worker (after the
> prior worker flush).
>
> Even if checking that the CT is enabled before calling
> destroyed_worker_func, guc_lrc_desc_unpin may still fail
> because in corner cases, as we traverse the GuC's
> context-destroy list, the CT could get disabled in the mid
> of it right before calling the GuC's CT send function.
>
> We've witnessed this race condition once every ~6000-8000
> suspend-resume cycles while ensuring workloads that render
> something onscreen is continuously started just before
> we suspend (and the workload is small enough to complete
> and trigger the queued engine/context free-up either very
> late in suspend or very early in resume).
>
> In such a case, we need to unroll the unpin process because
> guc-lrc-unpin takes a gt wakeref which only gets released in
> the G2H IRQ reply that never comes through in this corner
> case. That will cascade into a kernel hang later at the tail
> end of suspend in this function:
>
> intel_wakeref_wait_for_idle(>->wakeref)
> (called by) - intel_gt_pm_wait_for_idle
> (called by) - wait_for_suspend
>
> Doing this unroll and keeping the context in the GuC's
> destroy-list will allow the context to get picked up on
> the next destroy worker invocation or purged as part of a
> major GuC sanitization or reset flow.
>
> Signed-off-by: Alan Previn <alan.previn.teres.alexis at intel.com>
> Tested-by: Mousumi Jana <mousumi.jana at intel.com>
> ---
> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 76 ++++++++++++++++---
> 1 file changed, 65 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 0ed44637bca0..db7df1217350 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -235,6 +235,13 @@ set_context_destroyed(struct intel_context *ce)
> ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
> }
>
> +static inline void
> +clr_context_destroyed(struct intel_context *ce)
> +{
> + lockdep_assert_held(&ce->guc_state.lock);
> + ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
> +}
> +
> static inline bool context_pending_disable(struct intel_context *ce)
> {
> return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
> @@ -612,6 +619,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
> u32 g2h_len_dw,
> bool loop)
> {
> + int ret;
> +
> /*
> * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
> * so we don't handle the case where we don't get a reply because we
> @@ -622,7 +631,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
> if (g2h_len_dw)
> atomic_inc(&guc->outstanding_submission_g2h);
>
> - return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> + ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> + if (ret)
> + atomic_dec(&guc->outstanding_submission_g2h);
> +
> + return ret;
> }
>
> int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
> @@ -3173,12 +3186,13 @@ static void guc_context_close(struct intel_context *ce)
> spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> }
>
> -static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> +static inline int guc_lrc_desc_unpin(struct intel_context *ce)
> {
> struct intel_guc *guc = ce_to_guc(ce);
> struct intel_gt *gt = guc_to_gt(guc);
> unsigned long flags;
> bool disabled;
> + int ret;
>
> GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
> GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
> @@ -3188,19 +3202,33 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> /* Seal race with Reset */
> spin_lock_irqsave(&ce->guc_state.lock, flags);
> disabled = submission_disabled(guc);
> - if (likely(!disabled)) {
> - __intel_gt_pm_get(gt);
> - set_context_destroyed(ce);
> - clr_context_registered(ce);
> - }
> - spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> if (unlikely(disabled)) {
> + spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> release_guc_id(guc, ce);
> __guc_context_destroy(ce);
> - return;
> + return 0;
> }
>
> - deregister_context(ce, ce->guc_id.id);
> + /* GuC is active, lets destroy this context,
> + * but at this point we can still be racing with
> + * suspend, so we undo everything if the H2G fails
> + */
> +
> + /* Change context state to destroyed and get gt-pm */
> + __intel_gt_pm_get(gt);
> + set_context_destroyed(ce);
> + clr_context_registered(ce);
> +
> + ret = deregister_context(ce, ce->guc_id.id);
> + if (ret) {
> + /* Undo the state change and put gt-pm if that failed */
> + set_context_registered(ce);
> + clr_context_destroyed(ce);
> + intel_gt_pm_put(gt);
This is a might_sleep inside a spin_lock! Are you 100% confident no WARN
was seeing during the tests indicated in the commit msg?
> + }
> + spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> +
> + return 0;
If you are always returning 0, there's no pointing in s/void/int...
> }
>
> static void __guc_context_destroy(struct intel_context *ce)
> @@ -3268,7 +3296,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
> if (!ce)
> break;
>
> - guc_lrc_desc_unpin(ce);
> + if (guc_lrc_desc_unpin(ce)) {
> + /*
> + * This means GuC's CT link severed mid-way which could happen
> + * in suspend-resume corner cases. In this case, put the
> + * context back into the destroyed_contexts list which will
> + * get picked up on the next context deregistration event or
> + * purged in a GuC sanitization event (reset/unload/wedged/...).
> + */
> + spin_lock_irqsave(&guc->submission_state.lock, flags);
> + list_add_tail(&ce->destroyed_link,
> + &guc->submission_state.destroyed_contexts);
> + spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> + /* Bail now since the list might never be emptied if h2gs fail */
> + break;
> + }
> +
> }
> }
>
> @@ -3279,6 +3322,17 @@ static void destroyed_worker_func(struct work_struct *w)
> struct intel_gt *gt = guc_to_gt(guc);
> int tmp;
>
> + /*
> + * In rare cases we can get here via async context-free fence-signals that
> + * come very late in suspend flow or very early in resume flows. In these
> + * cases, GuC won't be ready but just skipping it here is fine as these
> + * pending-destroy-contexts get destroyed totally at GuC reset time at the
> + * end of suspend.. OR.. this worker can be picked up later on the next
> + * context destruction trigger after resume-completes
who is triggering the work queue again?
I mean, I'm wondering if it is necessary to re-schedule it from inside...
but also wonder if this is safe anyway...
> + */
> + if (!intel_guc_is_ready(guc))
> + return;
> +
> with_intel_gt_pm(gt, tmp)
> deregister_destroyed_contexts(guc);
> }
> --
> 2.39.0
>
More information about the Intel-gfx
mailing list