[Intel-gfx] [PATCH 11/11] drm/i915 : Add D3COLD OFF support

Tue Jun 21 13:15:00 UTC 2022


> -----Original Message-----
> From: Tangudu, Tilak <tilak.tangudu at intel.com>
> Sent: Tuesday, June 21, 2022 6:05 PM
> To: intel-gfx at lists.freedesktop.org; Ewins, Jon <jon.ewins at intel.com>; Vivi,
> Rodrigo <rodrigo.vivi at intel.com>; Belgaumkar, Vinay
> <vinay.belgaumkar at intel.com>; Wilson, Chris P <chris.p.wilson at intel.com>;
> Dixit, Ashutosh <ashutosh.dixit at intel.com>; Nilawar, Badal
> <badal.nilawar at intel.com>; Gupta, Anshuman <anshuman.gupta at intel.com>;
> Tangudu, Tilak <tilak.tangudu at intel.com>; Roper, Matthew D
> <matthew.d.roper at intel.com>; Gupta, saurabhg <saurabhg.gupta at intel.com>;
> Iddamsetty, Aravind <aravind.iddamsetty at intel.com>; Sundaresan, Sujaritha
> <sujaritha.sundaresan at intel.com>
> Subject: [PATCH 11/11] drm/i915 : Add D3COLD OFF support
> 
> Added lmem deep suspend/resume, which covers lmem eviction and added
> GT/GUC deep suspend/resume using i915_gem_backup_suspend,
> i915_gem_suspend_late and i915_gem_resume.
> 
> Added HAS_D3COLD_OFF feature macro to use for D3COLD OFF feature
We don't need such Macro HAS_D3COLD_OFF , checking bridge pme capability from D3Cold is sufficient,
As it is a pci feature. You can refer below patch.
https://patchwork.freedesktop.org/patch/489746/?series=104128&rev=3

> 
> Signed-off-by: Tilak Tangudu <tilak.tangudu at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_driver.c       | 96 ++++++++++++++++++------
>  drivers/gpu/drm/i915/i915_drv.h          |  1 +
>  drivers/gpu/drm/i915/i915_pci.c          |  1 +
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  4 files changed, 76 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_driver.c
> b/drivers/gpu/drm/i915/i915_driver.c
> index 669365c2958c..1ca45d933a4a 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -1609,9 +1609,21 @@ static int intel_runtime_suspend(struct device *kdev)
>  	 * We are safe here against re-faults, since the fault handler takes
>  	 * an RPM reference.
>  	 */
> -	i915_gem_runtime_suspend(dev_priv);
> +	if (HAS_D3COLD_OFF(dev_priv)) {
> +		i915_gem_backup_suspend(dev_priv);
> +		i915_ggtt_suspend(to_gt(dev_priv)->ggtt);
> +		i915_gem_suspend_late(dev_priv);
> +	} else {
> +		i915_gem_runtime_suspend(dev_priv);
> +		intel_gt_runtime_suspend(to_gt(dev_priv));
> 
> -	intel_gt_runtime_suspend(to_gt(dev_priv));
> +		/*
> +		 * FIXME: Temporary hammer to avoid freezing the machine on
> our DGFX
> +		 * This should be totally removed when we handle the pci states
> properly
> +		 * on runtime PM and on s2idle cases.
> +		 */
> +		pci_d3cold_disable(pdev);
> +	}
> 
>  	intel_runtime_pm_disable_interrupts(dev_priv);
> 
> @@ -1641,12 +1653,6 @@ static int intel_runtime_suspend(struct device *kdev)
>  		drm_err(&dev_priv->drm,
>  			"Unclaimed access detected prior to suspending\n");
> 
> -	/*
> -	 * FIXME: Temporary hammer to avoid freezing the machine on our
> DGFX
> -	 * This should be totally removed when we handle the pci states
> properly
> -	 * on runtime PM and on s2idle cases.
> -	 */
> -	pci_d3cold_disable(pdev);
>  	rpm->suspended = true;
> 
>  	/*
> @@ -1662,14 +1668,18 @@ static int intel_runtime_suspend(struct device
> *kdev)
>  		 */
>  		intel_opregion_notify_adapter(dev_priv, PCI_D3hot);
>  	} else {
> -		/*
> -		 * current versions of firmware which depend on this opregion
> -		 * notification have repurposed the D1 definition to mean
> -		 * "runtime suspended" vs. what you would normally expect
> (D3)
> -		 * to distinguish it from notifications that might be sent via
> -		 * the suspend path.
> -		 */
> -		intel_opregion_notify_adapter(dev_priv, PCI_D1);
> +		if (HAS_D3COLD_OFF(dev_priv)) {
> +			intel_opregion_suspend(dev_priv, PCI_D3cold);
This is not needed, this opregion mbox is already depreciated.
> +		} else {
> +			/*
> +			 * current versions of firmware which depend on this
> opregion
> +			 * notification have repurposed the D1 definition to
> mean
> +			 * "runtime suspended" vs. what you would normally
> expect (D3)
> +			 * to distinguish it from notifications that might be sent
> via
> +			 * the suspend path.
> +			 */
> +			intel_opregion_notify_adapter(dev_priv, PCI_D1);
> +		}
>  	}
> 
>  	assert_forcewakes_inactive(&dev_priv->uncore);
> @@ -1677,6 +1687,12 @@ static int intel_runtime_suspend(struct device *kdev)
>  	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
>  		intel_hpd_poll_enable(dev_priv);
> 
> +	if (HAS_D3COLD_OFF(dev_priv)) {
> +		i915_save_pci_state(pdev);
PCI core do save/restore state , do we need this ?
> +		pci_disable_device(pdev);
> +		pci_set_power_state(pdev, PCI_D3cold);
We don't need to set the pci state as well,  as though pci config state, it can set up to max d3hot.
> +	}
> +
>  	drm_dbg(&dev_priv->drm, "Device suspended\n");
>  	return 0;
>  }
> @@ -1696,9 +1712,28 @@ static int intel_runtime_resume(struct device *kdev)
>  	drm_WARN_ON_ONCE(&dev_priv->drm, atomic_read(&rpm-
> >wakeref_count));
>  	disable_rpm_wakeref_asserts(rpm);
> 
> -	intel_opregion_notify_adapter(dev_priv, PCI_D0);
> +	if (HAS_D3COLD_OFF(dev_priv)) {
> +		ret = pci_set_power_state(pdev, PCI_D0);
> +		if (ret) {
> +			drm_err(&dev_priv->drm,
> +				"failed to set PCI D0 power state (%d)\n", ret);
> +			return ret;
> +		}
> +
> +		i915_load_pci_state(pdev);
> +
> +		ret = pci_enable_device(pdev);
> +		if (ret)
> +			return ret;
> +		pci_set_master(pdev);
> +		intel_opregion_resume(dev_priv);
> +	} else {
> +		pci_d3cold_enable(pdev);
> +		intel_opregion_notify_adapter(dev_priv, PCI_D0);
> +	}
> +
>  	rpm->suspended = false;
> -	pci_d3cold_enable(pdev);
> +
>  	if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
>  		drm_dbg(&dev_priv->drm,
>  			"Unclaimed access during suspend, bios?\n"); @@ -
> 1711,12 +1746,27 @@ static int intel_runtime_resume(struct device *kdev)
> 
>  	intel_runtime_pm_enable_interrupts(dev_priv);
> 
> -	/*
> -	 * No point of rolling back things in case of an error, as the best
> -	 * we can do is to hope that things will still work (and disable RPM).
> -	 */
> -	intel_gt_runtime_resume(to_gt(dev_priv));
> +	if (HAS_D3COLD_OFF(dev_priv)) {
> +		ret = i915_pcode_init(dev_priv);
> +		if (ret)
> +			return ret;
> 
> +		sanitize_gpu(dev_priv);
> +		ret = i915_ggtt_enable_hw(dev_priv);
> +		if (ret)
> +			drm_err(&dev_priv->drm, "failed to re-enable
> GGTT\n");
> +
> +		i915_ggtt_resume(to_gt(dev_priv)->ggtt);
> +
> +		i915_gem_resume(dev_priv);
> +
> +	} else {
> +		/*
> +		 * No point of rolling back things in case of an error, as the best
> +		 * we can do is to hope that things will still work (and disable
> RPM).
> +		 */
> +		intel_gt_runtime_resume(to_gt(dev_priv));
> +	}
>  	/*
>  	 * On VLV/CHV display interrupts are part of the display
>  	 * power well, so hpd is reinitialized from there. For diff --git
> a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index
> ec8c7a2af673..633d20c2372a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1300,6 +1300,7 @@ IS_SUBPLATFORM(const struct drm_i915_private
> *i915,
> 
>  #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
> #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM)
> +#define HAS_D3COLD_OFF(i915) (INTEL_INFO(dev_priv)->has_d3cold_off)
> 
>  /*
>   * Platform has the dedicated compression control state for each lmem surfaces
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 5e51fc29bb8b..749ccb14fd6f 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1064,6 +1064,7 @@ static const struct intel_device_info xehpsdv_info = {
>  	.has_guc_deprivilege = 1, \
>  	.has_heci_pxp = 1, \
>  	.needs_compact_pt = 1, \
> +	.has_d3cold_off = 1, \
>  	.has_media_ratio_mode = 1, \
>  	.platform_engine_mask = \
>  		BIT(RCS0) | BIT(BCS0) | \
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h
> b/drivers/gpu/drm/i915/intel_device_info.h
> index 08341174ee0a..495c12d65c3e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -140,6 +140,7 @@ enum intel_ppgtt_type {
>  	/* Keep has_* in alphabetical order */ \
>  	func(has_64bit_reloc); \
>  	func(has_64k_pages); \
> +	func(has_d3cold_off); \
>  	func(needs_compact_pt); \
>  	func(gpu_reset_clobbers_display); \
>  	func(has_reset_engine); \
> --
> 2.25.1