[PATCH 3/4] drm/amdgpu: enable mode1 reset

Wenhui Sheng Wenhui.Sheng at amd.com
Fri Jul 10 05:46:25 UTC 2020


For sienna cichlid, add mode1 reset path for sGPU.

Signed-off-by: Likun Gao <Likun.Gao at amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 24 ++++++++------
 drivers/gpu/drm/amd/amdgpu/nv.c               | 32 +++++++++++++------
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c    |  2 +-
 .../drm/amd/powerplay/sienna_cichlid_ppt.c    |  2 +-
 4 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 565dc59a9e89..85d2aee546b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2119,6 +2119,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
 	 */
 	switch (amdgpu_asic_reset_method(adev)) {
 	case AMD_RESET_METHOD_BACO:
+	case AMD_RESET_METHOD_MODE1:
 	case AMD_RESET_METHOD_PSP_MODE1:
 		return true;
 	default:
@@ -4220,7 +4221,8 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
 	 */
 	reset_method = amdgpu_asic_reset_method(adev);
 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
-	     (reset_method != AMD_RESET_METHOD_PSP_MODE1))
+	    (reset_method != AMD_RESET_METHOD_PSP_MODE1) &&
+	    (reset_method != AMD_RESET_METHOD_MODE1))
 		return -EINVAL;
 
 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
@@ -4275,16 +4277,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	struct amdgpu_device *tmp_adev = NULL;
 	int i, r = 0;
 	bool in_ras_intr = amdgpu_ras_intr_triggered();
-	bool use_baco =
-		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
-		true : false;
+	bool is_full_reset = false;
 	bool audio_suspended = false;
 
+	if ((amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ||
+	    (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE1))
+		is_full_reset = true;
+
 	/*
 	 * Flush RAM to disk so that after reboot
 	 * the user can read log and see why the system rebooted.
 	 */
-	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
+	if (in_ras_intr && !is_full_reset && amdgpu_ras_get_context(adev)->reboot) {
 
 		DRM_WARN("Emergency reboot.");
 
@@ -4293,7 +4297,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	}
 
 	dev_info(adev->dev, "GPU %s begin!\n",
-		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
+		(in_ras_intr && !is_full_reset) ? "jobs stop":"reset");
 
 	/*
 	 * Here we trylock to avoid chain of resets executing from
@@ -4365,7 +4369,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		amdgpu_fbdev_set_suspend(tmp_adev, 1);
 
 		/* disable ras on ALL IPs */
-		if (!(in_ras_intr && !use_baco) &&
+		if (!(in_ras_intr && !is_full_reset) &&
 		      amdgpu_device_ip_need_full_reset(tmp_adev))
 			amdgpu_ras_suspend(tmp_adev);
 
@@ -4377,12 +4381,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
 
-			if (in_ras_intr && !use_baco)
+			if (in_ras_intr && !is_full_reset)
 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
 		}
 	}
 
-	if (in_ras_intr && !use_baco)
+	if (in_ras_intr && !is_full_reset)
 		goto skip_sched_resume;
 
 	/*
@@ -4459,7 +4463,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 skip_sched_resume:
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 		/*unlock kfd: SRIOV would do it separately */
-		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+		if (!(in_ras_intr && !is_full_reset) && !amdgpu_sriov_vf(tmp_adev))
 	                amdgpu_amdkfd_post_reset(tmp_adev);
 		if (audio_suspended)
 			amdgpu_device_resume_display_audio(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 1d27dd3676ad..abccb155b28e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -258,23 +258,32 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
-static int nv_asic_mode1_reset(struct amdgpu_device *adev)
+static int nv_asic_mode1_reset(struct amdgpu_device *adev,
+			    enum amd_reset_method reset_method)
 {
 	u32 i;
 	int ret = 0;
 
 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
 
-	dev_info(adev->dev, "GPU psp mode1 reset\n");
+	dev_info(adev->dev, "GPU %s reset\n",
+			 reset_method ==  AMD_RESET_METHOD_MODE1
+			 ? "mode1" : "psp mode1");
 
 	/* disable BM */
 	pci_clear_master(adev->pdev);
 
 	pci_save_state(adev->pdev);
 
-	ret = psp_gpu_reset(adev);
+	if (reset_method == AMD_RESET_METHOD_MODE1)
+		ret = smu_mode1_reset(&adev->smu);
+	else
+		ret = psp_gpu_reset(adev);
+
 	if (ret)
-		dev_err(adev->dev, "GPU psp mode1 reset failed\n");
+		dev_err(adev->dev, "GPU %s reset failed\n",
+				reset_method ==  AMD_RESET_METHOD_MODE1
+				? "mode1" : "psp mode1");
 
 	pci_restore_state(adev->pdev);
 
@@ -307,8 +316,10 @@ nv_asic_reset_method(struct amdgpu_device *adev)
 {
 	struct smu_context *smu = &adev->smu;
 
-	if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
+	if (smu_baco_is_support(smu))
 		return AMD_RESET_METHOD_BACO;
+	else if (smu_mode1_reset_is_support(smu))
+		return AMD_RESET_METHOD_MODE1;
 	else
 		return AMD_RESET_METHOD_PSP_MODE1;
 }
@@ -317,17 +328,20 @@ static int nv_asic_reset(struct amdgpu_device *adev)
 {
 	int ret = 0;
 	struct smu_context *smu = &adev->smu;
+	enum amd_reset_method reset_method;
+
+	reset_method = nv_asic_reset_method(adev);
+	if (reset_method == AMD_RESET_METHOD_BACO) {
+		dev_info(adev->dev, "GPU BACO reset\n");
 
-	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
 		ret = smu_baco_enter(smu);
 		if (ret)
 			return ret;
 		ret = smu_baco_exit(smu);
 		if (ret)
 			return ret;
-	} else {
-		ret = nv_asic_mode1_reset(adev);
-	}
+	} else
+		ret = nv_asic_mode1_reset(adev, reset_method);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 4f7d064e16e4..014815bcae93 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -2039,7 +2039,7 @@ static bool navi10_is_baco_supported(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t val;
 
-	if (!smu_v11_0_baco_is_support(smu))
+	if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
 		return false;
 
 	val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
index 612788bbc6c3..319480550bb7 100644
--- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
@@ -1761,7 +1761,7 @@ static bool sienna_cichlid_is_baco_supported(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t val;
 
-	if (!smu_v11_0_baco_is_support(smu))
+	if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
 		return false;
 
 	val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
-- 
2.17.1



More information about the amd-gfx mailing list