[PATCH 25/26] drm/amdgpu: support gfx ras error injection and err_cnt query
Alex Deucher
alexdeucher at gmail.com
Wed Jul 31 17:58:17 UTC 2019
From: Dennis Li <Dennis.Li at amd.com>
check gfx error count in both ras querry function and
ras interrupt handler.
gfx ras is still disabled by default due to known stability
issue found in gpu reset.
Signed-off-by: Dennis Li <Dennis.Li at amd.com>
Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 ++++++++++++++++---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ccd5863bca88..a96b0f17c619 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -600,6 +600,10 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
if (adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, &err_data);
break;
+ case AMDGPU_RAS_BLOCK__GFX:
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, &err_data);
+ break;
default:
break;
}
@@ -637,13 +641,22 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
- if (block_info.block_id != TA_RAS_BLOCK__UMC) {
+ switch (info->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ if (adev->gfx.funcs->ras_error_inject)
+ ret = adev->gfx.funcs->ras_error_inject(adev, info);
+ else
+ ret = -EINVAL;
+ break;
+ case AMDGPU_RAS_BLOCK__UMC:
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ break;
+ default:
DRM_INFO("%s error injection is not supported yet\n",
ras_block_str(info->head.block));
- return -EINVAL;
+ ret = -EINVAL;
}
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
if (ret)
DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
ras_block_str(info->head.block),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d7902e782be4..206176710f79 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5607,6 +5607,8 @@ static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
{
/* TODO ue will trigger an interrupt. */
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE;
}
--
2.20.1
More information about the amd-gfx
mailing list