[PATCH] amdgpu/pm/legacy: fix suspend/resume issues
Lazar, Lijo
lijo.lazar at amd.com
Mon Feb 17 09:56:49 UTC 2025
On 2/12/2025 9:41 PM, Alex Deucher wrote:
> From: "chr[]" <chris at rudorff.com>
>
> resume and irq handler happily races in set_power_state()
>
> * amdgpu_legacy_dpm_compute_clocks() needs lock
> * protect irq work handler
> * fix dpm_enabled usage
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2524
> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")
> Signed-off-by: chr[] <chris at rudorff.com>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 19 ++++++++++++++----
> .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 2 ++
> drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 20 +++++++++++++++----
> 3 files changed, 33 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
> index 67a8e22b1126d..db0d3d4dba685 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
> @@ -3042,6 +3042,7 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block)
> if (!amdgpu_dpm)
> return 0;
>
> + mutex_lock(&adev->pm.mutex);
> kv_dpm_setup_asic(adev);
> ret = kv_dpm_enable(adev);
> if (ret)
> @@ -3049,6 +3050,8 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block)
> else
> adev->pm.dpm_enabled = true;
> amdgpu_legacy_dpm_compute_clocks(adev);
> + mutex_unlock(&adev->pm.mutex);
> +
> return ret;
> }
>
> @@ -3067,10 +3070,13 @@ static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
>
> if (adev->pm.dpm_enabled) {
> + mutex_lock(&adev->pm.mutex);
> + adev->pm.dpm_enabled = false;
> /* disable dpm */
> kv_dpm_disable(adev);
> /* reset the power state */
> adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
> + mutex_unlock(&adev->pm.mutex);
> }
> return 0;
> }
> @@ -3080,18 +3086,23 @@ static int kv_dpm_resume(struct amdgpu_ip_block *ip_block)
> int ret;
> struct amdgpu_device *adev = ip_block->adev;
>
> - if (adev->pm.dpm_enabled) {
> + if (!amdgpu_dpm)
> + return 0;
> +
> + if (!adev->pm.dpm_enabled) {
> + mutex_lock(&adev->pm.mutex);
> /* asic init will reset to the boot state */
> kv_dpm_setup_asic(adev);
> ret = kv_dpm_enable(adev);
> if (ret)
> adev->pm.dpm_enabled = false;
Need braces here as well since else part is with braces.
> - else
> + else {
> adev->pm.dpm_enabled = true;
> - if (adev->pm.dpm_enabled)
> amdgpu_legacy_dpm_compute_clocks(adev);
> + }
> + mutex_unlock(&adev->pm.mutex);
> }
> - return 0;
> + return ret;
Possibility of returning unintialized value.
> }
>
> static bool kv_dpm_is_idle(void *handle)
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> index e861355ebd75b..e06c25c945007 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> @@ -1012,6 +1012,7 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
> if (!adev->pm.dpm_enabled)
> return;
>
> + mutex_lock(&adev->pm.mutex);
I think taking this lock should happen before the dpm_enabled variable
check. Otherwise, it could happen that part of this sequence might still
go through after a kv/si_dpm_suspend.
> if (!pp_funcs->read_sensor(adev->powerplay.pp_handle,
> AMDGPU_PP_SENSOR_GPU_TEMP,
> (void *)&temp,
> @@ -1033,4 +1034,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
> adev->pm.dpm.state = dpm_state;
>
> amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle);
> + mutex_unlock(&adev->pm.mutex);
> }
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> index a87dcf0974bc1..c40ec4f74afb0 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> @@ -7786,6 +7786,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block)
> if (!amdgpu_dpm)
> return 0;
>
> + mutex_lock(&adev->pm.mutex);
> si_dpm_setup_asic(adev);
> ret = si_dpm_enable(adev);
> if (ret)
> @@ -7793,6 +7794,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block)
> else
> adev->pm.dpm_enabled = true;
> amdgpu_legacy_dpm_compute_clocks(adev);
> + mutex_unlock(&adev->pm.mutex);
> return ret;
> }
>
> @@ -7811,11 +7813,15 @@ static int si_dpm_suspend(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
>
> if (adev->pm.dpm_enabled) {
> + mutex_lock(&adev->pm.mutex);
> + adev->pm.dpm_enabled = false;
> /* disable dpm */
> si_dpm_disable(adev);
> /* reset the power state */
> adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
> + mutex_unlock(&adev->pm.mutex);
> }
> +
> return 0;
> }
>
> @@ -7824,18 +7830,24 @@ static int si_dpm_resume(struct amdgpu_ip_block *ip_block)
> int ret;
> struct amdgpu_device *adev = ip_block->adev;
>
> - if (adev->pm.dpm_enabled) {
> + if (!amdgpu_dpm)
> + return 0;
> +
> + if (!adev->pm.dpm_enabled) {
> /* asic init will reset to the boot state */
> + mutex_lock(&adev->pm.mutex);
> si_dpm_setup_asic(adev);
> ret = si_dpm_enable(adev);
> if (ret)
> adev->pm.dpm_enabled = false;
Same 'braces' comment.
> - else
> + else {
> adev->pm.dpm_enabled = true;
> - if (adev->pm.dpm_enabled)
> amdgpu_legacy_dpm_compute_clocks(adev);
> + }
> + mutex_unlock(&adev->pm.mutex);
> }
> - return 0;
> +
> + return ret;
Possibility of returning unintialized value.
Thanks,
Lijo
> }
>
> static bool si_dpm_is_idle(void *handle)
More information about the amd-gfx
mailing list