[AMDGPU/PM/legacy] PATCH SI/CIK: suspend / resume gpu hang [issue 2524]
Alexandre Demers
alexandre.f.demers at gmail.com
Wed Feb 5 19:12:40 UTC 2025
A patch has been laying dormant for more than a year that would fix
suspend/resume GPU hangs happening on SI/CIK's pm suspend/resume
cycles. See: https://gitlab.freedesktop.org/drm/amd/-/issues/2524
I'm not the original proposer of the patch, however this person hasn't
been answering after a few months since Alex Deutcher had suggested he
send the patch to amd-gfx mailing list for review.
This patch handles the mutex locks/unlocks needed to prevent the
suspend/resume hangs. It mimics what was included in
commit 3712e7a494596b26861f4dc9b81676d1d0272eaf
Author: Evan Quan <evan.quan at amd.com>
Date: Tue Nov 16 14:30:20 2021 +0800
drm/amd/pm: unified lock protections in amdgpu_dpm.c
While you could add my "Reviewed-by", I've not tested it and it may be
possible to narrow the locks/unlocks around less calls.
I'm willing to test it on Pitcairn and Tahiti for any regression I
could find if requested.
Alexandre Demers
----
>From e62461803e84c181d6d237e27a215b788d72fa41 Mon Sep 17 00:00:00 2001
From: "chr[]" <chris at socke>
Date: Sun, 23 Apr 2023 06:13:47 +0200
Subject: [PATCH] amdgpu: fix suspend/resume issues
resume and irq handler happily races in set_power_state()
* amdgpu_legacy_dpm_compute_clocks() needs lock
* protect irq work handler
* fix dpm_enabled usage
---
drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 19 ++++++++++++++----
.../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 2 ++
drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 20 +++++++++++++++----
3 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index f5e08b60f66e..e260224b6152 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -3056,6 +3056,7 @@ static int kv_dpm_hw_init(void *handle)
if (!amdgpu_dpm)
return 0;
+ mutex_lock(&adev->pm.mutex);
kv_dpm_setup_asic(adev);
ret = kv_dpm_enable(adev);
if (ret)
@@ -3063,6 +3064,8 @@ static int kv_dpm_hw_init(void *handle)
else
adev->pm.dpm_enabled = true;
amdgpu_legacy_dpm_compute_clocks(adev);
+ mutex_unlock(&adev->pm.mutex);
+
return ret;
}
@@ -3081,10 +3084,13 @@ static int kv_dpm_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->pm.dpm_enabled) {
+ mutex_lock(&adev->pm.mutex);
+ adev->pm.dpm_enabled = false;
/* disable dpm */
kv_dpm_disable(adev);
/* reset the power state */
adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps =
adev->pm.dpm.boot_ps;
+ mutex_unlock(&adev->pm.mutex);
}
return 0;
}
@@ -3094,18 +3100,23 @@ static int kv_dpm_resume(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->pm.dpm_enabled) {
+ if (!amdgpu_dpm)
+ return 0;
+
+ if (!adev->pm.dpm_enabled) {
+ mutex_lock(&adev->pm.mutex);
/* asic init will reset to the boot state */
kv_dpm_setup_asic(adev);
ret = kv_dpm_enable(adev);
if (ret)
adev->pm.dpm_enabled = false;
- else
+ else {
adev->pm.dpm_enabled = true;
- if (adev->pm.dpm_enabled)
amdgpu_legacy_dpm_compute_clocks(adev);
+ }
+ mutex_unlock(&adev->pm.mutex);
}
- return 0;
+ return ret;
}
static bool kv_dpm_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index d3fe149d8476..665c218d9003 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -1047,6 +1047,7 @@ void amdgpu_dpm_thermal_work_handler(struct
work_struct *work)
if (!adev->pm.dpm_enabled)
return;
+ mutex_lock(&adev->pm.mutex);
if (!pp_funcs->read_sensor(adev->powerplay.pp_handle,
AMDGPU_PP_SENSOR_GPU_TEMP,
(void *)&temp,
@@ -1068,4 +1069,5 @@ void amdgpu_dpm_thermal_work_handler(struct
work_struct *work)
adev->pm.dpm.state = dpm_state;
amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle);
+ mutex_unlock(&adev->pm.mutex);
}
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index 49c398ec0aaf..15084872975b 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -7797,6 +7797,7 @@ static int si_dpm_hw_init(void *handle)
if (!amdgpu_dpm)
return 0;
+ mutex_lock(&adev->pm.mutex);
si_dpm_setup_asic(adev);
ret = si_dpm_enable(adev);
if (ret)
@@ -7804,6 +7805,7 @@ static int si_dpm_hw_init(void *handle)
else
adev->pm.dpm_enabled = true;
amdgpu_legacy_dpm_compute_clocks(adev);
+ mutex_unlock(&adev->pm.mutex);
return ret;
}
@@ -7822,11 +7824,15 @@ static int si_dpm_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->pm.dpm_enabled) {
+ mutex_lock(&adev->pm.mutex);
+ adev->pm.dpm_enabled = false;
/* disable dpm */
si_dpm_disable(adev);
/* reset the power state */
adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps =
adev->pm.dpm.boot_ps;
+ mutex_unlock(&adev->pm.mutex);
}
+
return 0;
}
@@ -7835,18 +7841,24 @@ static int si_dpm_resume(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->pm.dpm_enabled) {
+ if (!amdgpu_dpm)
+ return 0;
+
+ if (!adev->pm.dpm_enabled) {
/* asic init will reset to the boot state */
+ mutex_lock(&adev->pm.mutex);
si_dpm_setup_asic(adev);
ret = si_dpm_enable(adev);
if (ret)
adev->pm.dpm_enabled = false;
- else
+ else {
adev->pm.dpm_enabled = true;
- if (adev->pm.dpm_enabled)
amdgpu_legacy_dpm_compute_clocks(adev);
+ }
+ mutex_unlock(&adev->pm.mutex);
}
- return 0;
+
+ return ret;
}
static bool si_dpm_is_idle(void *handle)
--
2.30.2
More information about the amd-gfx
mailing list