[PATCH 3/4] drm/amdgpu: enable mode1 reset
Wenhui Sheng
Wenhui.Sheng at amd.com
Fri Jul 10 05:46:25 UTC 2020
For sienna cichlid, add mode1 reset path for sGPU.
Signed-off-by: Likun Gao <Likun.Gao at amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 ++++++++------
drivers/gpu/drm/amd/amdgpu/nv.c | 32 +++++++++++++------
drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 +-
.../drm/amd/powerplay/sienna_cichlid_ppt.c | 2 +-
4 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 565dc59a9e89..85d2aee546b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2119,6 +2119,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
*/
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_BACO:
+ case AMD_RESET_METHOD_MODE1:
case AMD_RESET_METHOD_PSP_MODE1:
return true;
default:
@@ -4220,7 +4221,8 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
*/
reset_method = amdgpu_asic_reset_method(adev);
if ((reset_method != AMD_RESET_METHOD_BACO) &&
- (reset_method != AMD_RESET_METHOD_PSP_MODE1))
+ (reset_method != AMD_RESET_METHOD_PSP_MODE1) &&
+ (reset_method != AMD_RESET_METHOD_MODE1))
return -EINVAL;
p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
@@ -4275,16 +4277,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
bool in_ras_intr = amdgpu_ras_intr_triggered();
- bool use_baco =
- (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
- true : false;
+ bool is_full_reset = false;
bool audio_suspended = false;
+ if ((amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ||
+ (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE1))
+ is_full_reset = true;
+
/*
* Flush RAM to disk so that after reboot
* the user can read log and see why the system rebooted.
*/
- if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
+ if (in_ras_intr && !is_full_reset && amdgpu_ras_get_context(adev)->reboot) {
DRM_WARN("Emergency reboot.");
@@ -4293,7 +4297,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
dev_info(adev->dev, "GPU %s begin!\n",
- (in_ras_intr && !use_baco) ? "jobs stop":"reset");
+ (in_ras_intr && !is_full_reset) ? "jobs stop":"reset");
/*
* Here we trylock to avoid chain of resets executing from
@@ -4365,7 +4369,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_fbdev_set_suspend(tmp_adev, 1);
/* disable ras on ALL IPs */
- if (!(in_ras_intr && !use_baco) &&
+ if (!(in_ras_intr && !is_full_reset) &&
amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
@@ -4377,12 +4381,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
- if (in_ras_intr && !use_baco)
+ if (in_ras_intr && !is_full_reset)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
- if (in_ras_intr && !use_baco)
+ if (in_ras_intr && !is_full_reset)
goto skip_sched_resume;
/*
@@ -4459,7 +4463,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/*unlock kfd: SRIOV would do it separately */
- if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+ if (!(in_ras_intr && !is_full_reset) && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 1d27dd3676ad..abccb155b28e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -258,23 +258,32 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
-static int nv_asic_mode1_reset(struct amdgpu_device *adev)
+static int nv_asic_mode1_reset(struct amdgpu_device *adev,
+ enum amd_reset_method reset_method)
{
u32 i;
int ret = 0;
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
- dev_info(adev->dev, "GPU psp mode1 reset\n");
+ dev_info(adev->dev, "GPU %s reset\n",
+ reset_method == AMD_RESET_METHOD_MODE1
+ ? "mode1" : "psp mode1");
/* disable BM */
pci_clear_master(adev->pdev);
pci_save_state(adev->pdev);
- ret = psp_gpu_reset(adev);
+ if (reset_method == AMD_RESET_METHOD_MODE1)
+ ret = smu_mode1_reset(&adev->smu);
+ else
+ ret = psp_gpu_reset(adev);
+
if (ret)
- dev_err(adev->dev, "GPU psp mode1 reset failed\n");
+ dev_err(adev->dev, "GPU %s reset failed\n",
+ reset_method == AMD_RESET_METHOD_MODE1
+ ? "mode1" : "psp mode1");
pci_restore_state(adev->pdev);
@@ -307,8 +316,10 @@ nv_asic_reset_method(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
- if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
+ if (smu_baco_is_support(smu))
return AMD_RESET_METHOD_BACO;
+ else if (smu_mode1_reset_is_support(smu))
+ return AMD_RESET_METHOD_MODE1;
else
return AMD_RESET_METHOD_PSP_MODE1;
}
@@ -317,17 +328,20 @@ static int nv_asic_reset(struct amdgpu_device *adev)
{
int ret = 0;
struct smu_context *smu = &adev->smu;
+ enum amd_reset_method reset_method;
+
+ reset_method = nv_asic_reset_method(adev);
+ if (reset_method == AMD_RESET_METHOD_BACO) {
+ dev_info(adev->dev, "GPU BACO reset\n");
- if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
ret = smu_baco_enter(smu);
if (ret)
return ret;
ret = smu_baco_exit(smu);
if (ret)
return ret;
- } else {
- ret = nv_asic_mode1_reset(adev);
- }
+ } else
+ ret = nv_asic_mode1_reset(adev, reset_method);
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 4f7d064e16e4..014815bcae93 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -2039,7 +2039,7 @@ static bool navi10_is_baco_supported(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
uint32_t val;
- if (!smu_v11_0_baco_is_support(smu))
+ if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
return false;
val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
index 612788bbc6c3..319480550bb7 100644
--- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
@@ -1761,7 +1761,7 @@ static bool sienna_cichlid_is_baco_supported(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
uint32_t val;
- if (!smu_v11_0_baco_is_support(smu))
+ if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
return false;
val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
--
2.17.1
More information about the amd-gfx
mailing list