[PATCH 2/2] drm/amdgpu: adjust aca init/fini sequence to match gpu reset
Zhang, Hawking
Hawking.Zhang at amd.com
Wed Jan 24 06:35:17 UTC 2024
[AMD Official Use Only - General]
Series is
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Yang Wang
Sent: Wednesday, January 24, 2024 13:59
To: amd-gfx at lists.freedesktop.org
Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: [PATCH 2/2] drm/amdgpu: adjust aca init/fini sequence to match gpu reset
- move aca init/fini function into ras init/fini to adapt gpu reset
sequence.
- add new function amdgpu_aca_reset()
Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 7 +++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ------
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 +++++++++++++--
4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 52a0ea2f0ebf..40c1d5c4a9d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -688,6 +688,13 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
aca_manager_fini(&aca->mgr);
}
+int amdgpu_aca_reset(struct amdgpu_device *adev) {
+ amdgpu_aca_fini(adev);
+
+ return amdgpu_aca_init(adev);
+}
+
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) {
struct amdgpu_aca *aca = &adev->aca;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 6e9a35eda683..2da50e095883 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -185,6 +185,7 @@ struct aca_info {
int amdgpu_aca_init(struct amdgpu_device *adev); void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs); bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 56d9dfa61290..dac73f8fbda4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4039,10 +4039,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_device_get_pcie_info(adev);
- r = amdgpu_aca_init(adev);
- if (r)
- return r;
-
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); @@ -4437,8 +4433,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
amdgpu_reset_fini(adev);
- amdgpu_aca_fini(adev);
-
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5b519dc4df01..f7c6ea60316d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3348,10 +3348,18 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
- if (amdgpu_aca_is_enabled(adev))
+ if (amdgpu_aca_is_enabled(adev)) {
+ if (amdgpu_in_reset(adev))
+ r = amdgpu_aca_reset(adev);
+ else
+ r = amdgpu_aca_init(adev);
+ if (r)
+ return r;
+
amdgpu_ras_set_aca_debug_mode(adev, false);
- else
+ } else {
amdgpu_ras_set_mca_debug_mode(adev, false);
+ }
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
obj = node->ras_obj;
@@ -3420,6 +3428,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
amdgpu_ras_fs_fini(adev);
amdgpu_ras_interrupt_remove_all(adev);
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_fini(adev);
+
WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
if (AMDGPU_RAS_GET_FEATURES(con->features))
--
2.34.1
More information about the amd-gfx
mailing list