[Intel-gfx] [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume flow
Nilawar, Badal
badal.nilawar at intel.com
Tue Mar 7 13:44:13 UTC 2023
On 07-03-2023 01:50, Rodrigo Vivi wrote:
> On Mon, Mar 06, 2023 at 08:33:04AM +0000, Gupta, Anshuman wrote:
>>
>>
>>> -----Original Message-----
>>> From: Nilawar, Badal <badal.nilawar at intel.com>
>>> Sent: Saturday, March 4, 2023 9:48 PM
>>> To: intel-gfx at lists.freedesktop.org
>>> Cc: Gupta, Anshuman <anshuman.gupta at intel.com>; Ewins, Jon
>>> <jon.ewins at intel.com>; Belgaumkar, Vinay <vinay.belgaumkar at intel.com>;
>>> Vivi, Rodrigo <rodrigo.vivi at intel.com>
>>> Subject: [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume
>>> flow
>>>
>>> During suspend resume flow it was seen that lots forcewakes are taken and
>>> released for media. So to avoid HANG due to MC6 entry/exit while accessing
>>> media applied Wa_14017073508 in suspend/resume flow.
>> We need to fix such gaps in runtime PM as well, but that will make this WA dirty somewhat similar to other RC6 related WA on other platforms.
>> I suggest to disable the MC6 rather on A step rather than having this form of WA. This WA is not needed on A step.
>> @rodrigo and @matt what is you opinion here ?
>
> My opinion since day-0 is that we should simply disable MC6 on A0 parts
> and move on with this feature only on B0. Since the beginning it was
> pretty clear that this workaround would get out of control and become
> the same as the PVC-RC6 one.
Thanks for your opinion Rodrigo. I will disable MC6 and send the patch.
Regards,
Badal Nilawar
>
>>
>> However some comment her to improve this patch.
>>
>>>
>>> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/gem/i915_gem_pm.c | 6 ++++-
>>> drivers/gpu/drm/i915/gt/intel_gt_pm.c | 32 ++++++++++++++++--------
>>> drivers/gpu/drm/i915/gt/intel_gt_pm.h | 3 +++
>>> drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 +
>>> drivers/gpu/drm/i915/i915_driver.c | 32 ++++++++++++++++++++++++
>>> 5 files changed, 62 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> index 0d812f4d787d..60deac41104d 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>>> @@ -160,8 +160,12 @@ void i915_gem_suspend_late(struct
>>> drm_i915_private *i915)
>>> * machine in an unusable condition.
>>> */
>>>
>>> - for_each_gt(gt, i915, i)
>>> + for_each_gt(gt, i915, i) {
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_busy(gt);
>> Let's add these busy/idle in intel_gt_suspend_late()
>>> intel_gt_suspend_late(gt);
>>> + mtl_media_idle(gt);
>>> + }
>>>
>>> spin_lock_irqsave(&i915->mm.obj_lock, flags);
>>> for (phase = phases; *phase; phase++) { diff --git
>>> a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> index cef3d6f5c34e..1f3bf1cf0421 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>>> @@ -26,24 +26,34 @@
>>>
>>> #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
>>>
>>> -static void mtl_media_busy(struct intel_gt *gt)
>>> +void mtl_media_busy(struct intel_gt *gt)
>>> {
>>> /* Wa_14017073508: mtl */
>>> - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
>>> + if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)
>>> &&
>>> gt->type == GT_MEDIA)
>> Let's have this in a separate patch , I believe that is already floated and ready to merge.
>>> - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
>>> - PCODE_MBOX_GT_STATE_MEDIA_BUSY,
>>> -
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> + if (!atomic_inc_not_zero(>->media_wakeref)) {
>>> + drm_err(>->i915->drm, "Media busy\n");
>> This should not be Error message.
>>> + snb_pcode_write_p(gt->uncore,
>>> PCODE_MBOX_GT_STATE,
>>> +
>>> PCODE_MBOX_GT_STATE_MEDIA_BUSY,
>>> +
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> + atomic_inc(>->media_wakeref);
>>> + }
>>> }
>>>
>>> -static void mtl_media_idle(struct intel_gt *gt)
>>> +void mtl_media_idle(struct intel_gt *gt)
>>> {
>>> /* Wa_14017073508: mtl */
>>> - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
>>> - gt->type == GT_MEDIA)
>>> - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
>>> -
>>> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
>>> -
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> + if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)
>>> &&
>>> + gt->type == GT_MEDIA) {
>>> + WARN_ON(atomic_read(>->media_wakeref) <= 0);
>>> + if (!atomic_add_unless(>->media_wakeref, -1, 1)) {
>>> + drm_err(>->i915->drm, "Media idle\n");
>> Same here this can't be error message.
>> Thanks,
>> Anshuman Gupta.
>>> + snb_pcode_write_p(gt->uncore,
>>> PCODE_MBOX_GT_STATE,
>>> +
>>> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
>>> +
>>> PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0);
>>> + atomic_dec(>->media_wakeref);
>>> + }
>>> + }
>>> }
>>>
>>> static void user_forcewake(struct intel_gt *gt, bool suspend) diff --git
>>> a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> index 6c9a46452364..43ffabf9babe 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
>>> @@ -89,4 +89,7 @@ static inline bool is_mock_gt(const struct intel_gt *gt)
>>> return I915_SELFTEST_ONLY(gt->awake == -ENODEV); }
>>>
>>> +void mtl_media_busy(struct intel_gt *gt); void mtl_media_idle(struct
>>> +intel_gt *gt);
>>> +
>>> #endif /* INTEL_GT_PM_H */
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> index f08c2556aa25..321ccfef4028 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>> @@ -145,6 +145,7 @@ struct intel_gt {
>>>
>>> struct intel_wakeref wakeref;
>>> atomic_t user_wakeref;
>>> + atomic_t media_wakeref;
>>>
>>> struct list_head closed_vma;
>>> spinlock_t closed_lock; /* guards the list of closed_vma */ diff --git
>>> a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
>>> index 8bc76dede332..c7625ea28022 100644
>>> --- a/drivers/gpu/drm/i915/i915_driver.c
>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>>> @@ -327,6 +327,8 @@ static int i915_driver_mmio_probe(struct
>>> drm_i915_private *dev_priv)
>>> intel_gmch_bar_setup(dev_priv);
>>> intel_device_info_runtime_init(dev_priv);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_busy(dev_priv->media_gt);
>>> for_each_gt(gt, dev_priv, i) {
>>> ret = intel_gt_init_mmio(gt);
>>> if (ret)
>>> @@ -335,10 +337,14 @@ static int i915_driver_mmio_probe(struct
>>> drm_i915_private *dev_priv)
>>>
>>> /* As early as possible, scrub existing GPU state before clobbering */
>>> sanitize_gpu(dev_priv);
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_idle(dev_priv->media_gt);
>>>
>>> return 0;
>>>
>>> err_uncore:
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_idle(dev_priv->media_gt);
>>> intel_gmch_bar_teardown(dev_priv);
>>>
>>> return ret;
>>> @@ -781,6 +787,12 @@ int i915_driver_probe(struct pci_dev *pdev, const
>>> struct pci_device_id *ent)
>>> if (ret < 0)
>>> goto out_tiles_cleanup;
>>>
>>> + /* Wa_14017073508: mtl */
>>> + if (i915->media_gt) {
>>> + atomic_set(&i915->media_gt->media_wakeref, 0);
>>> + mtl_media_busy(i915->media_gt);
>>> + }
>>> +
>>> ret = i915_driver_hw_probe(i915);
>>> if (ret < 0)
>>> goto out_cleanup_mmio;
>>> @@ -838,6 +850,8 @@ int i915_driver_probe(struct pci_dev *pdev, const
>>> struct pci_device_id *ent)
>>> i915_gem_drain_freed_objects(i915);
>>> i915_ggtt_driver_late_release(i915);
>>> out_cleanup_mmio:
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_idle(i915->media_gt);
>>> i915_driver_mmio_release(i915);
>>> out_tiles_cleanup:
>>> intel_gt_release_all(i915);
>>> @@ -1063,6 +1077,9 @@ static int i915_drm_suspend(struct drm_device
>>> *dev)
>>>
>>> disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_busy(dev_priv->media_gt);
>>> +
>>> /* We do a lot of poking in a lot of registers, make sure they work
>>> * properly. */
>>> intel_power_domains_disable(dev_priv);
>>> @@ -1097,6 +1114,9 @@ static int i915_drm_suspend(struct drm_device
>>> *dev)
>>>
>>> intel_dmc_suspend(dev_priv);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_busy(dev_priv->media_gt);
>>> +
>>> enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> i915_gem_drain_freed_objects(dev_priv);
>>> @@ -1197,6 +1217,9 @@ static int i915_drm_resume(struct drm_device
>>> *dev)
>>>
>>> disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_busy(dev_priv->media_gt);
>>> +
>>> ret = i915_pcode_init(dev_priv);
>>> if (ret)
>>> return ret;
>>> @@ -1260,6 +1283,9 @@ static int i915_drm_resume(struct drm_device
>>> *dev)
>>>
>>> intel_gvt_resume(dev_priv);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_idle(dev_priv->media_gt);
>>> +
>>> enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> return 0;
>>> @@ -1319,6 +1345,9 @@ static int i915_drm_resume_early(struct
>>> drm_device *dev)
>>>
>>> disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_busy(dev_priv->media_gt);
>>> +
>>> ret = vlv_resume_prepare(dev_priv, false);
>>> if (ret)
>>> drm_err(&dev_priv->drm,
>>> @@ -1333,6 +1362,9 @@ static int i915_drm_resume_early(struct
>>> drm_device *dev)
>>>
>>> intel_power_domains_resume(dev_priv);
>>>
>>> + /* Wa_14017073508: mtl */
>>> + mtl_media_idle(dev_priv->media_gt);
>>> +
>>> enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>
>>> return ret;
>>> --
>>> 2.25.1
>>
More information about the Intel-gfx
mailing list