[PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV
Tao Zhou
tao.zhou1 at amd.com
Thu Dec 8 07:51:18 UTC 2022
Send message to PF if VF receives RAS poison consumption interrupt.
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 44 +++++++++++++++----------
1 file changed, 26 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f76c19fc0392..1c7fcb4f2380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -169,25 +169,33 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
{
int ret = AMDGPU_RAS_SUCCESS;
- if (!adev->gmc.xgmi.connected_to_cpu) {
- struct ras_err_data err_data = {0, 0, 0, NULL};
- struct ras_common_if head = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- };
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
-
- ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
-
- if (ret == AMDGPU_RAS_SUCCESS && obj) {
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+ if (!amdgpu_sriov_vf(adev)) {
+ if (!adev->gmc.xgmi.connected_to_cpu) {
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ }
+ } else if (reset) {
+ /* MCA poison handler is only responsible for GPU reset,
+ * let MCA notifier do page retirement.
+ */
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
}
- } else if (reset) {
- /* MCA poison handler is only responsible for GPU reset,
- * let MCA notifier do page retirement.
- */
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- amdgpu_ras_reset_gpu(adev);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV!\n");
}
return ret;
--
2.35.1
More information about the amd-gfx
mailing list