[PATCH 3/6] drm/amdgpu: add RAS error info support for umc_v12_0

Yang Wang kevinyang.wang at amd.com
Thu Oct 12 13:14:03 UTC 2023


add RAS error info support for umc_v12_0.

Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 7714c2ef2cdc..cc66d44211fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -137,15 +137,27 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
 					uint32_t ch_inst, void *data)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)data;
+	unsigned long ue_count = 0, ce_count = 0;
+
+	/* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3],
+	 * which can be used as die ID directly */
+	struct amdgpu_smuio_mcm_config_info mcm_info = {
+		.socket_id = adev->smuio.funcs->get_socket_id(adev),
+		.die_id = node_inst,
+	};
+
 	uint64_t umc_reg_offset =
 		get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
 
 	umc_v12_0_query_correctable_error_count(adev,
 					umc_reg_offset,
-					&(err_data->ce_count));
+					&ue_count);
 	umc_v12_0_query_uncorrectable_error_count(adev,
 					umc_reg_offset,
-					&(err_data->ue_count));
+					&ue_count);
+
+	amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+	amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
 
 	return 0;
 }
-- 
2.34.1



More information about the amd-gfx mailing list