[PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend

Mario Limonciello mario.limonciello at amd.com
Wed Feb 7 22:36:07 UTC 2024


On 2/7/2024 16:34, Alex Deucher wrote:
> On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello
> <mario.limonciello at amd.com> wrote:
>>
>> commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback")
>> intentionally moved the eviction of resources to earlier in the suspend
>> process, but this introduced a subtle change that it occurs before adev->in_s0ix
>> or adev->in_s3 are set. This meant that APUs actually started to evict
>> resources at suspend time as well.
>>
>> Add a new `in_prepare` flag that is set for the life of the prepare() callback
>> to return the old code flow. Drop the existing call to return 1 in this case because
>> the suspend() callback looks for the flags too.
>>
>> Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict
>> resources in this callback so that APUs will still get resources evicted.
>>
>> Reported-by: Jürg Billeter <j at bitron.ch>
>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
>> Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback")
>> Signed-off-by: Mario Limonciello <mario.limonciello at amd.com>
>> ---
>> v1->v2:
>>   * Add and use new in_prepare member
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 21 ++--------
>>   3 files changed, 48 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 5d5be3e20687..f9db09a9017a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1075,7 +1075,8 @@ struct amdgpu_device {
>>          u8                              reset_magic[AMDGPU_RESET_MAGIC_NUM];
>>
>>          /* s3/s4 mask */
>> -       bool                            in_suspend;
>> +       bool                            in_prepare;
>> +       bool                            in_suspend;
>>          bool                            in_s3;
>>          bool                            in_s4;
>>          bool                            in_s0ix;
>> @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
>>   int amdgpu_device_prepare(struct drm_device *dev);
>>   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
>>   int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
>> +int amdgpu_device_freeze(struct drm_device *drm_dev);
>>   u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
>>   int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
>>   void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 2bc460cb993d..0a337fcd89b4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
>>          int ret;
>>
>>          /* No need to evict vram on APUs for suspend to ram or s2idle */
>> -       if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
>> +       if ((adev->in_prepare) && (adev->flags & AMD_IS_APU))
> 
> Could probably simplify this to:
> if ((!adev->in_s4) && (adev->flags & AMD_IS_APU))
> 
> Then you could drop the in_prepare variable.
> 
>>                  return 0;
>>
>>          ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
>> @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev)
>>          if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
>>                  return 0;
>>
>> +       adev->in_prepare = true;
>> +
>>          /* Evict the majority of BOs before starting suspend sequence */
>>          r = amdgpu_device_evict_resources(adev);
>>          if (r)
>> -               return r;
>> +               goto unprepare;
>>
>>          for (i = 0; i < adev->num_ip_blocks; i++) {
>>                  if (!adev->ip_blocks[i].status.valid)
>> @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev)
>>                          continue;
>>                  r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
>>                  if (r)
>> -                       return r;
>> +                       goto unprepare;
>>          }
>>
>> -       return 0;
>> +unprepare:
>> +       adev->in_prepare = FALSE;
>> +
>> +       return r;
>> +}
>> +
>> +/**
>> + * amdgpu_device_freeze - run S4 sequence
>> + *
>> + * @dev: drm dev pointer
>> + *
>> + * Prepare to put the hw in the S4 state (all asics).
>> + * Returns 0 for success or an error on failure.
>> + * Called at driver freeze.
>> + */
>> +int amdgpu_device_freeze(struct drm_device *drm_dev)
>> +{
>> +       struct amdgpu_device *adev = drm_to_adev(drm_dev);
>> +       int r;
>> +
>> +       adev->in_s4 = true;
>> +
>> +       r = amdgpu_device_evict_resources(adev);
> 
> Won't this be too late to allocate memory?  Doesn't this need to
> happen in prepare() even for S4?

Hmm; possibly.  I'll swap it back with your other suggestion.

Thanks
> 
> Alex
> 
>> +       if (r)
>> +               goto cleanup;
>> +
>> +       r = amdgpu_device_suspend(drm_dev, true);
>> +       if (r)
>> +               goto cleanup;
>> +
>> +       if (amdgpu_acpi_should_gpu_reset(adev))
>> +               r = amdgpu_asic_reset(adev);
>> +
>> +cleanup:
>> +       adev->in_s4 = false;
>> +
>> +       return r;
>>   }
>>
>>   /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index b74f68a15802..fc9caa14c9d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -2456,6 +2456,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
>>   {
>>          struct drm_device *drm_dev = dev_get_drvdata(dev);
>>          struct amdgpu_device *adev = drm_to_adev(drm_dev);
>> +       int r;
>>
>>          /* Return a positive number here so
>>           * DPM_FLAG_SMART_SUSPEND works properly
>> @@ -2464,13 +2465,6 @@ static int amdgpu_pmops_prepare(struct device *dev)
>>              pm_runtime_suspended(dev))
>>                  return 1;
>>
>> -       /* if we will not support s3 or s2i for the device
>> -        *  then skip suspend
>> -        */
>> -       if (!amdgpu_acpi_is_s0ix_active(adev) &&
>> -           !amdgpu_acpi_is_s3_active(adev))
>> -               return 1;
>> -
>>          return amdgpu_device_prepare(drm_dev);
>>   }
>>
>> @@ -2488,6 +2482,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
>>                  adev->in_s0ix = true;
>>          else if (amdgpu_acpi_is_s3_active(adev))
>>                  adev->in_s3 = true;
>> +
>>          if (!adev->in_s0ix && !adev->in_s3)
>>                  return 0;
>>          return amdgpu_device_suspend(drm_dev, true);
>> @@ -2528,18 +2523,8 @@ static int amdgpu_pmops_resume(struct device *dev)
>>   static int amdgpu_pmops_freeze(struct device *dev)
>>   {
>>          struct drm_device *drm_dev = dev_get_drvdata(dev);
>> -       struct amdgpu_device *adev = drm_to_adev(drm_dev);
>> -       int r;
>> -
>> -       adev->in_s4 = true;
>> -       r = amdgpu_device_suspend(drm_dev, true);
>> -       adev->in_s4 = false;
>> -       if (r)
>> -               return r;
>>
>> -       if (amdgpu_acpi_should_gpu_reset(adev))
>> -               return amdgpu_asic_reset(adev);
>> -       return 0;
>> +       return amdgpu_device_freeze(drm_dev);
>>   }
>>
>>   static int amdgpu_pmops_thaw(struct device *dev)
>> --
>> 2.34.1
>>



More information about the amd-gfx mailing list