[PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend
Alex Deucher
alexdeucher at gmail.com
Wed Feb 7 22:40:00 UTC 2024
On Wed, Feb 7, 2024 at 5:36 PM Mario Limonciello
<mario.limonciello at amd.com> wrote:
>
> On 2/7/2024 16:34, Alex Deucher wrote:
> > On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello
> > <mario.limonciello at amd.com> wrote:
> >>
> >> commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback")
> >> intentionally moved the eviction of resources to earlier in the suspend
> >> process, but this introduced a subtle change that it occurs before adev->in_s0ix
> >> or adev->in_s3 are set. This meant that APUs actually started to evict
> >> resources at suspend time as well.
> >>
> >> Add a new `in_prepare` flag that is set for the life of the prepare() callback
> >> to return the old code flow. Drop the existing call to return 1 in this case because
> >> the suspend() callback looks for the flags too.
> >>
> >> Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict
> >> resources in this callback so that APUs will still get resources evicted.
> >>
> >> Reported-by: Jürg Billeter <j at bitron.ch>
> >> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
> >> Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback")
> >> Signed-off-by: Mario Limonciello <mario.limonciello at amd.com>
> >> ---
> >> v1->v2:
> >> * Add and use new in_prepare member
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +-
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++--
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 21 ++--------
> >> 3 files changed, 48 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index 5d5be3e20687..f9db09a9017a 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -1075,7 +1075,8 @@ struct amdgpu_device {
> >> u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
> >>
> >> /* s3/s4 mask */
> >> - bool in_suspend;
> >> + bool in_prepare;
> >> + bool in_suspend;
> >> bool in_s3;
> >> bool in_s4;
> >> bool in_s0ix;
> >> @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
> >> int amdgpu_device_prepare(struct drm_device *dev);
> >> int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
> >> int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
> >> +int amdgpu_device_freeze(struct drm_device *drm_dev);
> >> u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
> >> int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
> >> void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index 2bc460cb993d..0a337fcd89b4 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
> >> int ret;
> >>
> >> /* No need to evict vram on APUs for suspend to ram or s2idle */
> >> - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
> >> + if ((adev->in_prepare) && (adev->flags & AMD_IS_APU))
> >
> > Could probably simplify this to:
> > if ((!adev->in_s4) && (adev->flags & AMD_IS_APU))
> >
> > Then you could drop the in_prepare variable.
> >
> >> return 0;
> >>
> >> ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
> >> @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev)
> >> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> >> return 0;
> >>
> >> + adev->in_prepare = true;
> >> +
> >> /* Evict the majority of BOs before starting suspend sequence */
> >> r = amdgpu_device_evict_resources(adev);
> >> if (r)
> >> - return r;
> >> + goto unprepare;
> >>
> >> for (i = 0; i < adev->num_ip_blocks; i++) {
> >> if (!adev->ip_blocks[i].status.valid)
> >> @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev)
> >> continue;
> >> r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
> >> if (r)
> >> - return r;
> >> + goto unprepare;
> >> }
> >>
> >> - return 0;
> >> +unprepare:
> >> + adev->in_prepare = FALSE;
> >> +
> >> + return r;
> >> +}
> >> +
> >> +/**
> >> + * amdgpu_device_freeze - run S4 sequence
> >> + *
> >> + * @dev: drm dev pointer
> >> + *
> >> + * Prepare to put the hw in the S4 state (all asics).
> >> + * Returns 0 for success or an error on failure.
> >> + * Called at driver freeze.
> >> + */
> >> +int amdgpu_device_freeze(struct drm_device *drm_dev)
> >> +{
> >> + struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >> + int r;
> >> +
> >> + adev->in_s4 = true;
> >> +
> >> + r = amdgpu_device_evict_resources(adev);
> >
> > Won't this be too late to allocate memory? Doesn't this need to
> > happen in prepare() even for S4?
>
> Hmm; possibly. I'll swap it back with your other suggestion.
I think we need to know at prepare time what state we are targeting.
I think that would allow us to clean up a lot of the in_s3, in_s4,
in_s0x checks. Something like adev->target_pm_state maybe?
Alex
>
> Thanks
> >
> > Alex
> >
> >> + if (r)
> >> + goto cleanup;
> >> +
> >> + r = amdgpu_device_suspend(drm_dev, true);
> >> + if (r)
> >> + goto cleanup;
> >> +
> >> + if (amdgpu_acpi_should_gpu_reset(adev))
> >> + r = amdgpu_asic_reset(adev);
> >> +
> >> +cleanup:
> >> + adev->in_s4 = false;
> >> +
> >> + return r;
> >> }
> >>
> >> /**
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> index b74f68a15802..fc9caa14c9d6 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> @@ -2456,6 +2456,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
> >> {
> >> struct drm_device *drm_dev = dev_get_drvdata(dev);
> >> struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >> + int r;
> >>
> >> /* Return a positive number here so
> >> * DPM_FLAG_SMART_SUSPEND works properly
> >> @@ -2464,13 +2465,6 @@ static int amdgpu_pmops_prepare(struct device *dev)
> >> pm_runtime_suspended(dev))
> >> return 1;
> >>
> >> - /* if we will not support s3 or s2i for the device
> >> - * then skip suspend
> >> - */
> >> - if (!amdgpu_acpi_is_s0ix_active(adev) &&
> >> - !amdgpu_acpi_is_s3_active(adev))
> >> - return 1;
> >> -
> >> return amdgpu_device_prepare(drm_dev);
> >> }
> >>
> >> @@ -2488,6 +2482,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
> >> adev->in_s0ix = true;
> >> else if (amdgpu_acpi_is_s3_active(adev))
> >> adev->in_s3 = true;
> >> +
> >> if (!adev->in_s0ix && !adev->in_s3)
> >> return 0;
> >> return amdgpu_device_suspend(drm_dev, true);
> >> @@ -2528,18 +2523,8 @@ static int amdgpu_pmops_resume(struct device *dev)
> >> static int amdgpu_pmops_freeze(struct device *dev)
> >> {
> >> struct drm_device *drm_dev = dev_get_drvdata(dev);
> >> - struct amdgpu_device *adev = drm_to_adev(drm_dev);
> >> - int r;
> >> -
> >> - adev->in_s4 = true;
> >> - r = amdgpu_device_suspend(drm_dev, true);
> >> - adev->in_s4 = false;
> >> - if (r)
> >> - return r;
> >>
> >> - if (amdgpu_acpi_should_gpu_reset(adev))
> >> - return amdgpu_asic_reset(adev);
> >> - return 0;
> >> + return amdgpu_device_freeze(drm_dev);
> >> }
> >>
> >> static int amdgpu_pmops_thaw(struct device *dev)
> >> --
> >> 2.34.1
> >>
>
More information about the amd-gfx
mailing list