[PATCH 2/2] drm/amdgpu: adjust aca init/fini sequence to match gpu reset

Zhang, Hawking Hawking.Zhang at amd.com
Wed Jan 24 06:35:17 UTC 2024


[AMD Official Use Only - General]

Series is

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Yang Wang
Sent: Wednesday, January 24, 2024 13:59
To: amd-gfx at lists.freedesktop.org
Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: [PATCH 2/2] drm/amdgpu: adjust aca init/fini sequence to match gpu reset

- move aca init/fini function into ras init/fini to adapt gpu reset
  sequence.
- add new function amdgpu_aca_reset()

Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c    |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 ------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 15 +++++++++++++--
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 52a0ea2f0ebf..40c1d5c4a9d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -688,6 +688,13 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
        aca_manager_fini(&aca->mgr);
 }

+int amdgpu_aca_reset(struct amdgpu_device *adev) {
+       amdgpu_aca_fini(adev);
+
+       return amdgpu_aca_init(adev);
+}
+
 void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)  {
        struct amdgpu_aca *aca = &adev->aca;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 6e9a35eda683..2da50e095883 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -185,6 +185,7 @@ struct aca_info {

 int amdgpu_aca_init(struct amdgpu_device *adev);  void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
 void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);  bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 56d9dfa61290..dac73f8fbda4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4039,10 +4039,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,

        amdgpu_device_get_pcie_info(adev);

-       r = amdgpu_aca_init(adev);
-       if (r)
-               return r;
-
        r = amdgpu_device_get_job_timeout_settings(adev);
        if (r) {
                dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); @@ -4437,8 +4433,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)

        amdgpu_reset_fini(adev);

-       amdgpu_aca_fini(adev);
-
        /* free i2c buses */
        if (!amdgpu_device_has_dc_support(adev))
                amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5b519dc4df01..f7c6ea60316d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3348,10 +3348,18 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev))
                return 0;

-       if (amdgpu_aca_is_enabled(adev))
+       if (amdgpu_aca_is_enabled(adev)) {
+               if (amdgpu_in_reset(adev))
+                       r = amdgpu_aca_reset(adev);
+                else
+                       r = amdgpu_aca_init(adev);
+               if (r)
+                       return r;
+
                amdgpu_ras_set_aca_debug_mode(adev, false);
-       else
+       } else {
                amdgpu_ras_set_mca_debug_mode(adev, false);
+       }

        list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
                obj = node->ras_obj;
@@ -3420,6 +3428,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
        amdgpu_ras_fs_fini(adev);
        amdgpu_ras_interrupt_remove_all(adev);

+       if (amdgpu_aca_is_enabled(adev))
+               amdgpu_aca_fini(adev);
+
        WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");

        if (AMDGPU_RAS_GET_FEATURES(con->features))
--
2.34.1



More information about the amd-gfx mailing list