[Intel-gfx] [PATCH v1 1/1] drm/i915/gt: Dont wait forever when idling in suspend

Rodrigo Vivi rodrigo.vivi at intel.com
Mon Nov 27 20:36:28 UTC 2023


On Tue, Nov 14, 2023 at 08:22:27AM -0800, Alan Previn wrote:
> When suspending, add a timeout when calling
> intel_gt_pm_wait_for_idle else if we have a leaked
> wakeref (which would be indicative of a bug elsewhere
> in the driver), driver will at exit the suspend-resume
> cycle, after the kernel detects the held reference and
> prints a message to abort suspending instead of hanging
> in the kernel forever which then requires serial connection
> or ramoops dump to debug further.
> 
> Signed-off-by: Alan Previn <alan.previn.teres.alexis at intel.com>
> Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Tested-by: Mousumi Jana <mousumi.jana at intel.com>

could you please rebase this on top of recent drm-tip and resend?
I got a conflict while trying to apply on drm-intel-gt-next.

As I had stated in another thread, I believe we should go further
and block the suspend and have a clean return to normal operation.
But anyway, I agree this is already a good and necessary improvement
because being in the dark if some bad case like this happens is
the worst thing. So this patch is already an improvement anyway.

again:
Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c     |  7 ++++++-
>  drivers/gpu/drm/i915/gt/intel_gt_pm.h     |  7 ++++++-
>  drivers/gpu/drm/i915/intel_wakeref.c      | 14 ++++++++++----
>  drivers/gpu/drm/i915/intel_wakeref.h      |  6 ++++--
>  5 files changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 40687806d22a..ffef963037f2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -686,7 +686,7 @@ void intel_engines_release(struct intel_gt *gt)
>  		if (!engine->release)
>  			continue;
>  
> -		intel_wakeref_wait_for_idle(&engine->wakeref);
> +		intel_wakeref_wait_for_idle(&engine->wakeref, 0);
>  		GEM_BUG_ON(intel_engine_pm_is_awake(engine));
>  
>  		engine->release(engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index f5899d503e23..25cb39ba9fdf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -306,6 +306,8 @@ int intel_gt_resume(struct intel_gt *gt)
>  
>  static void wait_for_suspend(struct intel_gt *gt)
>  {
> +	int final_timeout_ms = (I915_GT_SUSPEND_IDLE_TIMEOUT * 10);
> +
>  	if (!intel_gt_pm_is_awake(gt))
>  		return;
>  
> @@ -318,7 +320,10 @@ static void wait_for_suspend(struct intel_gt *gt)
>  		intel_gt_retire_requests(gt);
>  	}
>  
> -	intel_gt_pm_wait_for_idle(gt);
> +	/* we are suspending, so we shouldn't be waiting forever */
> +	if (intel_gt_pm_wait_timeout_for_idle(gt, final_timeout_ms) == -ETIMEDOUT)
> +		gt_warn(gt, "bailing from %s after %d milisec timeout\n",
> +			__func__, final_timeout_ms);
>  }
>  
>  void intel_gt_suspend_prepare(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> index b1eeb5b33918..1757ca4c3077 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -68,7 +68,12 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
>  
>  static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
>  {
> -	return intel_wakeref_wait_for_idle(&gt->wakeref);
> +	return intel_wakeref_wait_for_idle(&gt->wakeref, 0);
> +}
> +
> +static inline int intel_gt_pm_wait_timeout_for_idle(struct intel_gt *gt, int timeout_ms)
> +{
> +	return intel_wakeref_wait_for_idle(&gt->wakeref, timeout_ms);
>  }
>  
>  void intel_gt_pm_init_early(struct intel_gt *gt);
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> index 623a69089386..f2611c65246b 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.c
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -113,14 +113,20 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
>  			 "wakeref.work", &key->work, 0);
>  }
>  
> -int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
> +int intel_wakeref_wait_for_idle(struct intel_wakeref *wf, int timeout_ms)
>  {
> -	int err;
> +	int err = 0;
>  
>  	might_sleep();
>  
> -	err = wait_var_event_killable(&wf->wakeref,
> -				      !intel_wakeref_is_active(wf));
> +	if (!timeout_ms)
> +		err = wait_var_event_killable(&wf->wakeref,
> +					      !intel_wakeref_is_active(wf));
> +	else if (wait_var_event_timeout(&wf->wakeref,
> +					!intel_wakeref_is_active(wf),
> +					msecs_to_jiffies(timeout_ms)) < 1)
> +		err = -ETIMEDOUT;
> +
>  	if (err)
>  		return err;
>  
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
> index ec881b097368..302694a780d2 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -251,15 +251,17 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf)
>  /**
>   * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
>   * @wf: the wakeref
> + * @timeout_ms: Timeout in ms, 0 means never timeout.
>   *
>   * Wait for the earlier asynchronous release of the wakeref. Note
>   * this will wait for any third party as well, so make sure you only wait
>   * when you have control over the wakeref and trust no one else is acquiring
>   * it.
>   *
> - * Return: 0 on success, error code if killed.
> + * Returns 0 on success, -ETIMEDOUT upon a timeout, or the unlikely
> + * error propagation from wait_var_event_killable if timeout_ms is 0.
>   */
> -int intel_wakeref_wait_for_idle(struct intel_wakeref *wf);
> +int intel_wakeref_wait_for_idle(struct intel_wakeref *wf, int timeout_ms);
>  
>  struct intel_wakeref_auto {
>  	struct drm_i915_private *i915;
> 
> base-commit: 3d1e36691e73b3946b4a9ca8132a34f0319ff984
> -- 
> 2.39.0
> 


More information about the dri-devel mailing list