[PATCH] drm/amdgpu: fix RAS unload driver issue in SRIOV

Yang Wang kevinyang.wang at amd.com
Tue May 7 02:49:40 UTC 2024


Fix null pointer issue when unload driver in SRIOV mode.

Adjust the function position to ensure that the amdgpu_mca/aca_xxx_init()
related functions can be initialized properly.

Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 36509fa9fecf..36deac3b1440 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3611,10 +3611,6 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 	struct amdgpu_ras_block_object *obj;
 	int r;
 
-	/* Guest side doesn't need init ras feature */
-	if (amdgpu_sriov_vf(adev))
-		return 0;
-
 	amdgpu_ras_event_mgr_init(adev);
 
 	if (amdgpu_aca_is_enabled(adev)) {
@@ -3625,7 +3621,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 		if (r)
 			return r;
 
-		amdgpu_ras_set_aca_debug_mode(adev, false);
+		if (!amdgpu_sriov_vf(adev))
+		    amdgpu_ras_set_aca_debug_mode(adev, false);
 	} else {
 		if (amdgpu_in_reset(adev))
 			r = amdgpu_mca_reset(adev);
@@ -3634,9 +3631,14 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
 		if (r)
 			return r;
 
-		amdgpu_ras_set_mca_debug_mode(adev, false);
+		if (!amdgpu_sriov_vf(adev))
+		    amdgpu_ras_set_mca_debug_mode(adev, false);
 	}
 
+	/* Guest side doesn't need init ras feature */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 		obj = node->ras_obj;
 		if (!obj) {
-- 
2.34.1



More information about the amd-gfx mailing list