[PATCH 2/2] drm/amdgpu: adjust aca init/fini sequence to match gpu reset

Yang Wang kevinyang.wang at amd.com
Wed Jan 24 05:59:15 UTC 2024


- move aca init/fini function into ras init/fini to adapt gpu reset
  sequence.
- add new function amdgpu_aca_reset()

Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c    |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 ------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 15 +++++++++++++--
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 52a0ea2f0ebf..40c1d5c4a9d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -688,6 +688,13 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
 	aca_manager_fini(&aca->mgr);
 }
 
+int amdgpu_aca_reset(struct amdgpu_device *adev)
+{
+	amdgpu_aca_fini(adev);
+
+	return amdgpu_aca_init(adev);
+}
+
 void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
 {
 	struct amdgpu_aca *aca = &adev->aca;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 6e9a35eda683..2da50e095883 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -185,6 +185,7 @@ struct aca_info {
 
 int amdgpu_aca_init(struct amdgpu_device *adev);
 void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
 void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
 bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 56d9dfa61290..dac73f8fbda4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4039,10 +4039,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	amdgpu_device_get_pcie_info(adev);
 
-	r = amdgpu_aca_init(adev);
-	if (r)
-		return r;
-
 	r = amdgpu_device_get_job_timeout_settings(adev);
 	if (r) {
 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
@@ -4437,8 +4433,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 
 	amdgpu_reset_fini(adev);
 
-	amdgpu_aca_fini(adev);
-
 	/* free i2c buses */
 	if (!amdgpu_device_has_dc_support(adev))
 		amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5b519dc4df01..f7c6ea60316d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3348,10 +3348,18 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
-	if (amdgpu_aca_is_enabled(adev))
+	if (amdgpu_aca_is_enabled(adev)) {
+		if (amdgpu_in_reset(adev))
+			r = amdgpu_aca_reset(adev);
+		 else
+			r = amdgpu_aca_init(adev);
+		if (r)
+			return r;
+
 		amdgpu_ras_set_aca_debug_mode(adev, false);
-	else
+	} else {
 		amdgpu_ras_set_mca_debug_mode(adev, false);
+	}
 
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		obj = node->ras_obj;
@@ -3420,6 +3428,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 	amdgpu_ras_fs_fini(adev);
 	amdgpu_ras_interrupt_remove_all(adev);
 
+	if (amdgpu_aca_is_enabled(adev))
+		amdgpu_aca_fini(adev);
+
 	WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
 
 	if (AMDGPU_RAS_GET_FEATURES(con->features))
-- 
2.34.1



More information about the amd-gfx mailing list