[Intel-gfx] [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume flow

Nilawar, Badal badal.nilawar at intel.com
Tue Mar 7 13:44:13 UTC 2023



On 07-03-2023 01:50, Rodrigo Vivi wrote:
> On Mon, Mar 06, 2023 at 08:33:04AM +0000, Gupta, Anshuman wrote:
>>
>>
>>> -----Original Message-----
>>> From: Nilawar, Badal <badal.nilawar at intel.com>
>>> Sent: Saturday, March 4, 2023 9:48 PM
>>> To: intel-gfx at lists.freedesktop.org
>>> Cc: Gupta, Anshuman <anshuman.gupta at intel.com>; Ewins, Jon
>>> <jon.ewins at intel.com>; Belgaumkar, Vinay <vinay.belgaumkar at intel.com>;
>>> Vivi, Rodrigo <rodrigo.vivi at intel.com>
>>> Subject: [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume
>>> flow
>>>
>>> During suspend resume flow it was seen that lots forcewakes are taken and
>>> released for media. So to avoid HANG due to MC6 entry/exit while accessing
>>> media applied Wa_14017073508 in suspend/resume flow.
>> We need to fix such gaps in runtime PM as well, but that will make this WA dirty somewhat similar to other RC6 related WA on other platforms.
>> I suggest to disable the MC6 rather on A step rather than having this form of WA. This WA is not needed on A step.
>> @rodrigo and @matt what is you opinion here ?
> 
> My opinion since day-0 is that we should simply disable MC6 on A0 parts
> and move on with this feature only on B0. Since the beginning it was
> pretty clear that this workaround would get out of control and become
> the same as the PVC-RC6 one.
Thanks for your opinion Rodrigo. I will disable MC6 and send the patch.

Regards,
Badal Nilawar
> 
>>
>> However some comment her to improve this patch.
>>
>>>
>>> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  6 ++++-
>>>   drivers/gpu/drm/i915/gt/intel_gt_pm.c    | 32 ++++++++++++++++--------
>>>   drivers/gpu/drm/i915/gt/intel_gt_pm.h    |  3 +++
>>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  1 +
>>>   drivers/gpu/drm/i915/i915_driver.c       | 32 ++++++++++++++++++++++++
>>>   5 files changed, 62 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> index 0d812f4d787d..60deac41104d 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> @@ -160,8 +160,12 @@ void i915_gem_suspend_late(struct
>>> drm_i915_private *i915)
>>>   	 * machine in an unusable condition.
>>>   	 */
>>>
>>> -	for_each_gt(gt, i915, i)
>>> +	for_each_gt(gt, i915, i) {
>>> +		/* Wa_14017073508: mtl */
>>> +		mtl_media_busy(gt);
>> Let's add  these busy/idle in intel_gt_suspend_late()
>>>   		intel_gt_suspend_late(gt);
>>> +		mtl_media_idle(gt);
>>> +	}
>>>
>>>   	spin_lock_irqsave(&i915->mm.obj_lock, flags);
>>>   	for (phase = phases; *phase; phase++) { diff --git
>>> a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> index cef3d6f5c34e..1f3bf1cf0421 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> @@ -26,24 +26,34 @@
>>>
>>>   #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
>>>
>>> -static void mtl_media_busy(struct intel_gt *gt)
>>> +void mtl_media_busy(struct intel_gt *gt)
>>>   {
>>>   	/* Wa_14017073508: mtl */
>>> -	if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
>>> +	if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)
>>> &&
>>>   	    gt->type == GT_MEDIA)
>> Let's have this in a separate patch , I believe that is already floated and ready to merge.
>>> -		snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
>>> -				  PCODE_MBOX_GT_STATE_MEDIA_BUSY,
>>> -
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> +		if (!atomic_inc_not_zero(&gt->media_wakeref)) {
>>> +			drm_err(&gt->i915->drm, "Media busy\n");
>> This should not be Error message.
>>> +			snb_pcode_write_p(gt->uncore,
>>> PCODE_MBOX_GT_STATE,
>>> +
>>> PCODE_MBOX_GT_STATE_MEDIA_BUSY,
>>> +
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> +			atomic_inc(&gt->media_wakeref);
>>> +		}
>>>   }
>>>
>>> -static void mtl_media_idle(struct intel_gt *gt)
>>> +void mtl_media_idle(struct intel_gt *gt)
>>>   {
>>>   	/* Wa_14017073508: mtl */
>>> -	if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
>>> -	    gt->type == GT_MEDIA)
>>> -		snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
>>> -
>>> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
>>> -
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> +	if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)
>>> &&
>>> +	    gt->type == GT_MEDIA) {
>>> +		WARN_ON(atomic_read(&gt->media_wakeref) <= 0);
>>> +		if (!atomic_add_unless(&gt->media_wakeref, -1, 1)) {
>>> +			drm_err(&gt->i915->drm, "Media idle\n");
>> Same here this can't be error message.
>> Thanks,
>> Anshuman Gupta.
>>> +			snb_pcode_write_p(gt->uncore,
>>> PCODE_MBOX_GT_STATE,
>>> +
>>> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
>>> +
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> +			atomic_dec(&gt->media_wakeref);
>>> +		}
>>> +	}
>>>   }
>>>
>>>   static void user_forcewake(struct intel_gt *gt, bool suspend) diff --git
>>> a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> index 6c9a46452364..43ffabf9babe 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> @@ -89,4 +89,7 @@ static inline bool is_mock_gt(const struct intel_gt *gt)
>>>   	return I915_SELFTEST_ONLY(gt->awake == -ENODEV);  }
>>>
>>> +void mtl_media_busy(struct intel_gt *gt); void mtl_media_idle(struct
>>> +intel_gt *gt);
>>> +
>>>   #endif /* INTEL_GT_PM_H */
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> index f08c2556aa25..321ccfef4028 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> @@ -145,6 +145,7 @@ struct intel_gt {
>>>
>>>   	struct intel_wakeref wakeref;
>>>   	atomic_t user_wakeref;
>>> +	atomic_t media_wakeref;
>>>
>>>   	struct list_head closed_vma;
>>>   	spinlock_t closed_lock; /* guards the list of closed_vma */ diff --git
>>> a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
>>> index 8bc76dede332..c7625ea28022 100644
>>> --- a/drivers/gpu/drm/i915/i915_driver.c
>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>>> @@ -327,6 +327,8 @@ static int i915_driver_mmio_probe(struct
>>> drm_i915_private *dev_priv)
>>>   	intel_gmch_bar_setup(dev_priv);
>>>   	intel_device_info_runtime_init(dev_priv);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_busy(dev_priv->media_gt);
>>>   	for_each_gt(gt, dev_priv, i) {
>>>   		ret = intel_gt_init_mmio(gt);
>>>   		if (ret)
>>> @@ -335,10 +337,14 @@ static int i915_driver_mmio_probe(struct
>>> drm_i915_private *dev_priv)
>>>
>>>   	/* As early as possible, scrub existing GPU state before clobbering */
>>>   	sanitize_gpu(dev_priv);
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_idle(dev_priv->media_gt);
>>>
>>>   	return 0;
>>>
>>>   err_uncore:
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_idle(dev_priv->media_gt);
>>>   	intel_gmch_bar_teardown(dev_priv);
>>>
>>>   	return ret;
>>> @@ -781,6 +787,12 @@ int i915_driver_probe(struct pci_dev *pdev, const
>>> struct pci_device_id *ent)
>>>   	if (ret < 0)
>>>   		goto out_tiles_cleanup;
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	if (i915->media_gt) {
>>> +		atomic_set(&i915->media_gt->media_wakeref, 0);
>>> +		mtl_media_busy(i915->media_gt);
>>> +	}
>>> +
>>>   	ret = i915_driver_hw_probe(i915);
>>>   	if (ret < 0)
>>>   		goto out_cleanup_mmio;
>>> @@ -838,6 +850,8 @@ int i915_driver_probe(struct pci_dev *pdev, const
>>> struct pci_device_id *ent)
>>>   	i915_gem_drain_freed_objects(i915);
>>>   	i915_ggtt_driver_late_release(i915);
>>>   out_cleanup_mmio:
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_idle(i915->media_gt);
>>>   	i915_driver_mmio_release(i915);
>>>   out_tiles_cleanup:
>>>   	intel_gt_release_all(i915);
>>> @@ -1063,6 +1077,9 @@ static int i915_drm_suspend(struct drm_device
>>> *dev)
>>>
>>>   	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_busy(dev_priv->media_gt);
>>> +
>>>   	/* We do a lot of poking in a lot of registers, make sure they work
>>>   	 * properly. */
>>>   	intel_power_domains_disable(dev_priv);
>>> @@ -1097,6 +1114,9 @@ static int i915_drm_suspend(struct drm_device
>>> *dev)
>>>
>>>   	intel_dmc_suspend(dev_priv);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_busy(dev_priv->media_gt);
>>> +
>>>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>>   	i915_gem_drain_freed_objects(dev_priv);
>>> @@ -1197,6 +1217,9 @@ static int i915_drm_resume(struct drm_device
>>> *dev)
>>>
>>>   	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_busy(dev_priv->media_gt);
>>> +
>>>   	ret = i915_pcode_init(dev_priv);
>>>   	if (ret)
>>>   		return ret;
>>> @@ -1260,6 +1283,9 @@ static int i915_drm_resume(struct drm_device
>>> *dev)
>>>
>>>   	intel_gvt_resume(dev_priv);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_idle(dev_priv->media_gt);
>>> +
>>>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>>   	return 0;
>>> @@ -1319,6 +1345,9 @@ static int i915_drm_resume_early(struct
>>> drm_device *dev)
>>>
>>>   	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_busy(dev_priv->media_gt);
>>> +
>>>   	ret = vlv_resume_prepare(dev_priv, false);
>>>   	if (ret)
>>>   		drm_err(&dev_priv->drm,
>>> @@ -1333,6 +1362,9 @@ static int i915_drm_resume_early(struct
>>> drm_device *dev)
>>>
>>>   	intel_power_domains_resume(dev_priv);
>>>
>>> +	/* Wa_14017073508: mtl */
>>> +	mtl_media_idle(dev_priv->media_gt);
>>> +
>>>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>>   	return ret;
>>> --
>>> 2.25.1
>>


More information about the Intel-gfx mailing list