[PATCH 4/6] drm/amdgpu: bypass RAS error reset in some conditions
Tao Zhou
tao.zhou1 at amd.com
Tue Oct 17 12:45:50 UTC 2023
PMFW is responsible for RAS error reset in some conditions, driver can
skip the operation.
v2: add check for ras->in_recovery, it's set earlier than
amdgpu_in_reset.
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 95c7ba889e2d..806c6d4deb63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1178,11 +1178,19 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
{
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
if (!block_obj || !block_obj->hw_ops)
return 0;
- if (!amdgpu_ras_is_supported(adev, block))
+ /* skip ras error reset in gpu reset */
+ if (amdgpu_in_reset(adev) && atomic_read(&ras->in_recovery) &&
+ mca_funcs && mca_funcs->mca_set_debug_mode)
+ return 0;
+
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_mca_debug_mode(adev))
return 0;
if (block_obj->hw_ops->reset_ras_error_count)
@@ -1195,6 +1203,8 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
{
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1202,7 +1212,13 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
return 0;
}
- if (!amdgpu_ras_is_supported(adev, block))
+ /* skip ras error reset in gpu reset */
+ if (amdgpu_in_reset(adev) && atomic_read(&ras->in_recovery) &&
+ mca_funcs && mca_funcs->mca_set_debug_mode)
+ return 0;
+
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_mca_debug_mode(adev))
return 0;
if (block_obj->hw_ops->reset_ras_error_count)
--
2.35.1
More information about the amd-gfx
mailing list