[Intel-gfx] [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume flow

Gupta, Anshuman anshuman.gupta at intel.com
Mon Mar 6 08:33:04 UTC 2023



> -----Original Message-----
> From: Nilawar, Badal <badal.nilawar at intel.com>
> Sent: Saturday, March 4, 2023 9:48 PM
> To: intel-gfx at lists.freedesktop.org
> Cc: Gupta, Anshuman <anshuman.gupta at intel.com>; Ewins, Jon
> <jon.ewins at intel.com>; Belgaumkar, Vinay <vinay.belgaumkar at intel.com>;
> Vivi, Rodrigo <rodrigo.vivi at intel.com>
> Subject: [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume
> flow
> 
> During suspend resume flow it was seen that lots forcewakes are taken and
> released for media. So to avoid HANG due to MC6 entry/exit while accessing
> media applied Wa_14017073508 in suspend/resume flow.
We need to fix such gaps in runtime PM as well, but that will make this WA dirty somewhat similar to other RC6 related WA on other platforms. 
I suggest to disable the MC6 rather on A step rather than having this form of WA. This WA is not needed on A step.
@rodrigo and @matt what is you opinion here ?

However some comment her to improve this patch.

> 
> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  6 ++++-
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c    | 32 ++++++++++++++++--------
>  drivers/gpu/drm/i915/gt/intel_gt_pm.h    |  3 +++
>  drivers/gpu/drm/i915/gt/intel_gt_types.h |  1 +
>  drivers/gpu/drm/i915/i915_driver.c       | 32 ++++++++++++++++++++++++
>  5 files changed, 62 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 0d812f4d787d..60deac41104d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -160,8 +160,12 @@ void i915_gem_suspend_late(struct
> drm_i915_private *i915)
>  	 * machine in an unusable condition.
>  	 */
> 
> -	for_each_gt(gt, i915, i)
> +	for_each_gt(gt, i915, i) {
> +		/* Wa_14017073508: mtl */
> +		mtl_media_busy(gt);
Let's add  these busy/idle in intel_gt_suspend_late()
>  		intel_gt_suspend_late(gt);
> +		mtl_media_idle(gt);
> +	}
> 
>  	spin_lock_irqsave(&i915->mm.obj_lock, flags);
>  	for (phase = phases; *phase; phase++) { diff --git
> a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index cef3d6f5c34e..1f3bf1cf0421 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -26,24 +26,34 @@
> 
>  #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
> 
> -static void mtl_media_busy(struct intel_gt *gt)
> +void mtl_media_busy(struct intel_gt *gt)
>  {
>  	/* Wa_14017073508: mtl */
> -	if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> +	if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)
> &&
>  	    gt->type == GT_MEDIA)
Let's have this in a separate patch , I believe that is already floated and ready to merge. 
> -		snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
> -				  PCODE_MBOX_GT_STATE_MEDIA_BUSY,
> -
> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
> +		if (!atomic_inc_not_zero(&gt->media_wakeref)) {
> +			drm_err(&gt->i915->drm, "Media busy\n");
This should not be Error message.
> +			snb_pcode_write_p(gt->uncore,
> PCODE_MBOX_GT_STATE,
> +
> PCODE_MBOX_GT_STATE_MEDIA_BUSY,
> +
> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
> +			atomic_inc(&gt->media_wakeref);
> +		}
>  }
> 
> -static void mtl_media_idle(struct intel_gt *gt)
> +void mtl_media_idle(struct intel_gt *gt)
>  {
>  	/* Wa_14017073508: mtl */
> -	if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> -	    gt->type == GT_MEDIA)
> -		snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
> -
> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
> -
> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
> +	if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)
> &&
> +	    gt->type == GT_MEDIA) {
> +		WARN_ON(atomic_read(&gt->media_wakeref) <= 0);
> +		if (!atomic_add_unless(&gt->media_wakeref, -1, 1)) {
> +			drm_err(&gt->i915->drm, "Media idle\n");
Same here this can't be error message.
Thanks,
Anshuman Gupta.
> +			snb_pcode_write_p(gt->uncore,
> PCODE_MBOX_GT_STATE,
> +
> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
> +
> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
> +			atomic_dec(&gt->media_wakeref);
> +		}
> +	}
>  }
> 
>  static void user_forcewake(struct intel_gt *gt, bool suspend) diff --git
> a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> index 6c9a46452364..43ffabf9babe 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -89,4 +89,7 @@ static inline bool is_mock_gt(const struct intel_gt *gt)
>  	return I915_SELFTEST_ONLY(gt->awake == -ENODEV);  }
> 
> +void mtl_media_busy(struct intel_gt *gt); void mtl_media_idle(struct
> +intel_gt *gt);
> +
>  #endif /* INTEL_GT_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index f08c2556aa25..321ccfef4028 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -145,6 +145,7 @@ struct intel_gt {
> 
>  	struct intel_wakeref wakeref;
>  	atomic_t user_wakeref;
> +	atomic_t media_wakeref;
> 
>  	struct list_head closed_vma;
>  	spinlock_t closed_lock; /* guards the list of closed_vma */ diff --git
> a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 8bc76dede332..c7625ea28022 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -327,6 +327,8 @@ static int i915_driver_mmio_probe(struct
> drm_i915_private *dev_priv)
>  	intel_gmch_bar_setup(dev_priv);
>  	intel_device_info_runtime_init(dev_priv);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_busy(dev_priv->media_gt);
>  	for_each_gt(gt, dev_priv, i) {
>  		ret = intel_gt_init_mmio(gt);
>  		if (ret)
> @@ -335,10 +337,14 @@ static int i915_driver_mmio_probe(struct
> drm_i915_private *dev_priv)
> 
>  	/* As early as possible, scrub existing GPU state before clobbering */
>  	sanitize_gpu(dev_priv);
> +	/* Wa_14017073508: mtl */
> +	mtl_media_idle(dev_priv->media_gt);
> 
>  	return 0;
> 
>  err_uncore:
> +	/* Wa_14017073508: mtl */
> +	mtl_media_idle(dev_priv->media_gt);
>  	intel_gmch_bar_teardown(dev_priv);
> 
>  	return ret;
> @@ -781,6 +787,12 @@ int i915_driver_probe(struct pci_dev *pdev, const
> struct pci_device_id *ent)
>  	if (ret < 0)
>  		goto out_tiles_cleanup;
> 
> +	/* Wa_14017073508: mtl */
> +	if (i915->media_gt) {
> +		atomic_set(&i915->media_gt->media_wakeref, 0);
> +		mtl_media_busy(i915->media_gt);
> +	}
> +
>  	ret = i915_driver_hw_probe(i915);
>  	if (ret < 0)
>  		goto out_cleanup_mmio;
> @@ -838,6 +850,8 @@ int i915_driver_probe(struct pci_dev *pdev, const
> struct pci_device_id *ent)
>  	i915_gem_drain_freed_objects(i915);
>  	i915_ggtt_driver_late_release(i915);
>  out_cleanup_mmio:
> +	/* Wa_14017073508: mtl */
> +	mtl_media_idle(i915->media_gt);
>  	i915_driver_mmio_release(i915);
>  out_tiles_cleanup:
>  	intel_gt_release_all(i915);
> @@ -1063,6 +1077,9 @@ static int i915_drm_suspend(struct drm_device
> *dev)
> 
>  	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_busy(dev_priv->media_gt);
> +
>  	/* We do a lot of poking in a lot of registers, make sure they work
>  	 * properly. */
>  	intel_power_domains_disable(dev_priv);
> @@ -1097,6 +1114,9 @@ static int i915_drm_suspend(struct drm_device
> *dev)
> 
>  	intel_dmc_suspend(dev_priv);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_busy(dev_priv->media_gt);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
> 
>  	i915_gem_drain_freed_objects(dev_priv);
> @@ -1197,6 +1217,9 @@ static int i915_drm_resume(struct drm_device
> *dev)
> 
>  	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_busy(dev_priv->media_gt);
> +
>  	ret = i915_pcode_init(dev_priv);
>  	if (ret)
>  		return ret;
> @@ -1260,6 +1283,9 @@ static int i915_drm_resume(struct drm_device
> *dev)
> 
>  	intel_gvt_resume(dev_priv);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_idle(dev_priv->media_gt);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
> 
>  	return 0;
> @@ -1319,6 +1345,9 @@ static int i915_drm_resume_early(struct
> drm_device *dev)
> 
>  	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_busy(dev_priv->media_gt);
> +
>  	ret = vlv_resume_prepare(dev_priv, false);
>  	if (ret)
>  		drm_err(&dev_priv->drm,
> @@ -1333,6 +1362,9 @@ static int i915_drm_resume_early(struct
> drm_device *dev)
> 
>  	intel_power_domains_resume(dev_priv);
> 
> +	/* Wa_14017073508: mtl */
> +	mtl_media_idle(dev_priv->media_gt);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
> 
>  	return ret;
> --
> 2.25.1



More information about the Intel-gfx mailing list