[PATCH] drm/amdgpu: fix regresstion on SR-IOV gpu reset failed

Rex Zhu Rex.Zhu at amd.com
Wed Oct 18 09:29:39 UTC 2017


fw ucode is corrupted after vf flr by PSP so ucode_init() is
a must in psp_hw_init othewise KIQ/KCQ enabling will fail

Revert "drm/amdgpu: refine code delete duplicated error handling"
This reverts commit e57b87ff828f95efe992468e6d18c2c059b27aa9.
Revert "drm/amdgpu: move amdgpu_ucode_init_bo to amdgpu_device.c"
This reverts commit 815b8f8595148d06a64d2ce4282e8e80dfcb02f1.

Change-Id: I4c4f05ce85f7828e7ac19a2bfffa223614cca59a
Signed-off-by: Rex Zhu <Rex.Zhu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 8 --------
 drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c       | 9 +++++++++
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2ae488b..0e35e09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1680,7 +1680,6 @@ static int amdgpu_init(struct amdgpu_device *adev)
 			return r;
 		}
 		adev->ip_blocks[i].status.sw = true;
-
 		/* need to do gmc hw init early so we can allocate gpu mem */
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			r = amdgpu_vram_scratch_init(adev);
@@ -1711,11 +1710,6 @@ static int amdgpu_init(struct amdgpu_device *adev)
 		}
 	}
 
-	mutex_lock(&adev->firmware.mutex);
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
-		amdgpu_ucode_init_bo(adev);
-	mutex_unlock(&adev->firmware.mutex);
-
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.sw)
 			continue;
@@ -1851,8 +1845,6 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 
 		adev->ip_blocks[i].status.hw = false;
 	}
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
-		amdgpu_ucode_fini_bo(adev);
 
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.sw)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 3b42f40..5f5aa5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle)
 	int ret = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
+		amdgpu_ucode_init_bo(adev);
 
 	if (adev->powerplay.ip_funcs->hw_init)
 		ret = adev->powerplay.ip_funcs->hw_init(
@@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle)
 		ret = adev->powerplay.ip_funcs->hw_fini(
 					adev->powerplay.pp_handle);
 
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
+		amdgpu_ucode_fini_bo(adev);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index f1035a6..447d446 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -411,6 +411,13 @@ static int psp_hw_init(void *handle)
 		return 0;
 
 	mutex_lock(&adev->firmware.mutex);
+	/*
+	 * This sequence is just used on hw_init only once, no need on
+	 * resume.
+	 */
+	ret = amdgpu_ucode_init_bo(adev);
+	if (ret)
+		goto failed;
 
 	ret = psp_load_fw(adev);
 	if (ret) {
@@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle)
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		return 0;
 
+	amdgpu_ucode_fini_bo(adev);
+
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
-- 
1.9.1



More information about the amd-gfx mailing list