[PATCH 2/2] drm/amdgpu: fix pm notifier handling
Alex Deucher
alexdeucher at gmail.com
Fri May 2 12:56:25 UTC 2025
On Thu, May 1, 2025 at 5:19 PM Mario Limonciello
<mario.limonciello at amd.com> wrote:
>
> On 5/1/2025 3:09 PM, Alex Deucher wrote:
> > Set the s3/s0ix and s4 flags in the pm notifier so that we can skip
> > the resource evictions properly in pm prepare based on whether
> > we are suspending or hibernating. Drop the eviction as processes
> > are not frozen at this time, we we can end up getting stuck trying
> > to evict VRAM while applications continue to submit work which
> > causes the buffers to get pulled back into VRAM.
> >
> > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4178
> > Fixes: 2965e6355dcd ("drm/amd: Add Suspend/Hibernate notification callback support")
> > Cc: Mario Limonciello <mario.limonciello at amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
>
> Rather than removing the callbacks (and thus introducing the "other"
> bugs with memory pressure), I've sent an alternative series. LMK what
> you think of this instead.
>
> https://lore.kernel.org/amd-gfx/20250501211734.2434369-1-superm1@kernel.org/T/#m6cdc075af911868667ea6fc849bcd196477d6c20
Series looks good to me, but I think a variant of this patch (with the
evictions still in place) still makes sense so that we can properly
set the suspend and hibernate flags in amdgpu so that we can optimize
the evictions for suspend on APUs. Will respin.
Alex
>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 25 +++++++++++-----------
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 22 ++-----------------
> > 2 files changed, 15 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 71d95f16067a4..d232e4a26d7bf 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -4974,28 +4974,29 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
> > * @data: data
> > *
> > * This function is called when the system is about to suspend or hibernate.
> > - * It is used to evict resources from the device before the system goes to
> > - * sleep while there is still access to swap.
> > + * It is used to set the appropriate flags so that eviction can be optimized
> > + * in the pm prepare callback.
> > */
> > static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
> > void *data)
> > {
> > struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
> > - int r;
> >
> > switch (mode) {
> > case PM_HIBERNATION_PREPARE:
> > adev->in_s4 = true;
> > - fallthrough;
> > + break;
> > + case PM_POST_HIBERNATION:
> > + adev->in_s4 = false;
> > + break;
> > case PM_SUSPEND_PREPARE:
> > - r = amdgpu_device_evict_resources(adev);
> > - /*
> > - * This is considered non-fatal at this time because
> > - * amdgpu_device_prepare() will also fatally evict resources.
> > - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781
> > - */
> > - if (r)
> > - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r);
> > + if (amdgpu_acpi_is_s0ix_active(adev))
> > + adev->in_s0ix = true;
> > + else if (amdgpu_acpi_is_s3_active(adev))
> > + adev->in_s3 = true;
> > + break;
> > + case PM_POST_SUSPEND:
> > + adev->in_s0ix = adev->in_s3 = false;
> > break;
> > }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index cec041a556013..6599fb6313220 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -2572,10 +2572,6 @@ static int amdgpu_pmops_suspend(struct device *dev)
> > struct drm_device *drm_dev = dev_get_drvdata(dev);
> > struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >
> > - if (amdgpu_acpi_is_s0ix_active(adev))
> > - adev->in_s0ix = true;
> > - else if (amdgpu_acpi_is_s3_active(adev))
> > - adev->in_s3 = true;
> > if (!adev->in_s0ix && !adev->in_s3) {
> > /* don't allow going deep first time followed by s2idle the next time */
> > if (adev->last_suspend_state != PM_SUSPEND_ON &&
> > @@ -2608,7 +2604,6 @@ static int amdgpu_pmops_resume(struct device *dev)
> > {
> > struct drm_device *drm_dev = dev_get_drvdata(dev);
> > struct amdgpu_device *adev = drm_to_adev(drm_dev);
> > - int r;
> >
> > if (!adev->in_s0ix && !adev->in_s3)
> > return 0;
> > @@ -2617,12 +2612,7 @@ static int amdgpu_pmops_resume(struct device *dev)
> > if (!pci_device_is_present(adev->pdev))
> > adev->no_hw_access = true;
> >
> > - r = amdgpu_device_resume(drm_dev, true);
> > - if (amdgpu_acpi_is_s0ix_active(adev))
> > - adev->in_s0ix = false;
> > - else
> > - adev->in_s3 = false;
> > - return r;
> > + return amdgpu_device_resume(drm_dev, true);
> > }
> >
> > static int amdgpu_pmops_freeze(struct device *dev)
> > @@ -2643,13 +2633,8 @@ static int amdgpu_pmops_freeze(struct device *dev)
> > static int amdgpu_pmops_thaw(struct device *dev)
> > {
> > struct drm_device *drm_dev = dev_get_drvdata(dev);
> > - struct amdgpu_device *adev = drm_to_adev(drm_dev);
> > - int r;
> >
> > - r = amdgpu_device_resume(drm_dev, true);
> > - adev->in_s4 = false;
> > -
> > - return r;
> > + return amdgpu_device_resume(drm_dev, true);
> > }
> >
> > static int amdgpu_pmops_poweroff(struct device *dev)
> > @@ -2662,9 +2647,6 @@ static int amdgpu_pmops_poweroff(struct device *dev)
> > static int amdgpu_pmops_restore(struct device *dev)
> > {
> > struct drm_device *drm_dev = dev_get_drvdata(dev);
> > - struct amdgpu_device *adev = drm_to_adev(drm_dev);
> > -
> > - adev->in_s4 = false;
> >
> > return amdgpu_device_resume(drm_dev, true);
> > }
>
More information about the amd-gfx
mailing list