[PATCH] drm/amdgpu: handle extra UE register entries for gfx v9_4_3
Tao Zhou
tao.zhou1 at amd.com
Tue Oct 31 07:08:43 UTC 2023
The UE registe list is larger than CE list.
Reported-by: yipeng.chai at amd.com
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 41bbabd9ad4d..046ae95b366a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3799,6 +3799,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
}
}
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
@@ -3838,6 +3859,23 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
}
}
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
--
2.35.1
More information about the amd-gfx
mailing list