[PATCH 2/3] drm/amdgpu: enable mode1 reset
Sheng, Wenhui
Wenhui.Sheng at amd.com
Mon Jul 13 03:45:26 UTC 2020
[AMD Official Use Only - Internal Distribution Only]
[AMD Official Use Only - Internal Distribution Only]
Ok will refine it
Brs
Wenhui
-----Original Message-----
From: Quan, Evan <Evan.Quan at amd.com>
Sent: Monday, July 13, 2020 11:26 AM
To: Sheng, Wenhui <Wenhui.Sheng at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Gao, Likun <Likun.Gao at amd.com>; Sheng, Wenhui <Wenhui.Sheng at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: RE: [PATCH 2/3] drm/amdgpu: enable mode1 reset
[AMD Official Use Only - Internal Distribution Only]
It seems this patch contains different changes. Better to split into several patches:
One for amdgpu_ras_need_emergency_restart() changes.
One for the following changes:
+if (amdgpu_dpm_is_mode1_reset_supported(adev)) { dev_info(adev->dev,
+"GPU smu mode1 reset\n"); ret = amdgpu_dpm_mode1_reset(adev); } else {
+dev_info(adev->dev, "GPU psp mode1 reset\n"); ret =
+psp_gpu_reset(adev); }
+
One for the changes below
-if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
+if (smu_baco_is_support(smu))
And maybe the final one for coding style fixes.
BR
Evan
-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Wenhui Sheng
Sent: Friday, July 10, 2020 10:17 PM
To: amd-gfx at lists.freedesktop.org
Cc: Gao, Likun <Likun.Gao at amd.com>; Sheng, Wenhui <Wenhui.Sheng at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: [PATCH 2/3] drm/amdgpu: enable mode1 reset
For sienna cichlid, add mode1 reset path for sGPU.
And fix some corner cases about mode1 mode reuse.
v2: hiding MP0/MP1 mode1 reset under AMD_RESET_METHOD_MODE1
Signed-off-by: Likun Gao <Likun.Gao at amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 +++++++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 +++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 +
drivers/gpu/drm/amd/amdgpu/nv.c | 19 +++++++++++-------
drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 +-
5 files changed, 37 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fec6cf3f0c8a..6ce6e6bb8b50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4274,18 +4274,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; -bool in_ras_intr = amdgpu_ras_intr_triggered(); -bool use_baco =
-(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
-true : false;
+bool need_emergency_restart = false;
bool audio_suspended = false;
+/**
+ * Special case: RAS triggered and full reset isn't supported */
+need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
/*
* Flush RAM to disk so that after reboot
* the user can read log and see why the system rebooted.
*/
-if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
-
+if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
DRM_WARN("Emergency reboot.");
ksys_sync_helper();
@@ -4293,7 +4294,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, }
dev_info(adev->dev, "GPU %s begin!\n",
-(in_ras_intr && !use_baco) ? "jobs stop":"reset");
+need_emergency_restart ? "jobs stop":"reset");
/*
* Here we trylock to avoid chain of resets executing from @@ -4365,7 +4366,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_fbdev_set_suspend(tmp_adev, 1);
/* disable ras on ALL IPs */
-if (!(in_ras_intr && !use_baco) &&
+if (!need_emergency_restart &&
amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
@@ -4377,12 +4378,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
-if (in_ras_intr && !use_baco)
+if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
-if (in_ras_intr && !use_baco)
+if (need_emergency_restart)
goto skip_sched_resume;
/*
@@ -4459,7 +4460,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ -if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3a3fa8567c94..6f06e1214622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2135,3 +2135,14 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) amdgpu_ras_reset_gpu(adev); } }
+
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) { if
+(adev->asic_type == CHIP_VEGA20 &&
+ adev->pm.fw_version <= 0x283400) {
+return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
+amdgpu_ras_intr_triggered(); }
+
+return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e7df5d8429f8..b2667342cf67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -633,4 +633,5 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 356849136d1d..9f1240bd0310 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -265,17 +265,21 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
-dev_info(adev->dev, "GPU mode1 reset\n");
-
/* disable BM */
pci_clear_master(adev->pdev);
pci_save_state(adev->pdev);
-ret = psp_gpu_reset(adev);
+if (amdgpu_dpm_is_mode1_reset_supported(adev)) { dev_info(adev->dev,
+"GPU smu mode1 reset\n"); ret = amdgpu_dpm_mode1_reset(adev); } else {
+dev_info(adev->dev, "GPU psp mode1 reset\n"); ret =
+psp_gpu_reset(adev); }
+
if (ret)
dev_err(adev->dev, "GPU mode1 reset failed\n");
-
pci_restore_state(adev->pdev);
/* wait for asic to come out of reset */ @@ -307,7 +311,7 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu;
-if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
+if (smu_baco_is_support(smu))
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_MODE1;
@@ -319,15 +323,16 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu;
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+dev_info(adev->dev, "GPU BACO reset\n");
+
ret = smu_baco_enter(smu);
if (ret)
return ret;
ret = smu_baco_exit(smu);
if (ret)
return ret;
-} else {
+} else
ret = nv_asic_mode1_reset(adev);
-}
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 4f7d064e16e4..014815bcae93 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -2039,7 +2039,7 @@ static bool navi10_is_baco_supported(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t val;
-if (!smu_v11_0_baco_is_support(smu))
+if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
return false;
val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx at lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cevan.quan%40amd.com%7C5f3fbaad4309421b1e7508d824dbed1e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637299874318950168&sdata=iVrmi9ovJz57IzZfR3wFAJDEhVmwXmTGOlzvMIxAZq4%3D&reserved=0
More information about the amd-gfx
mailing list