[Intel-xe] [PATCH 09/11] drm/xe: Handle MDFI error severity.

Himal Prasad Ghimiray himal.prasad.ghimiray at intel.com
Wed Sep 27 11:46:25 UTC 2023


Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
 drivers/gpu/drm/xe/regs/xe_tile_error_regs.h |  7 +++++++
 drivers/gpu/drm/xe/xe_hw_error.c             | 16 ++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
index 04701c62f0d9..8a5f6cd29304 100644
--- a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
@@ -48,4 +48,11 @@
 								(base) + _SOC_GCOERRSTS, \
 								(base) + _SOC_GNFERRSTS))
 
+#define LOCAL_FIRST_IEH_HEADER_LOG_REG			XE_REG(0x2822b0)
+#define MDFI_SEVERITY_FATAL		0x00330000
+#define MDFI_SEVERITY_NONFATAL		0x00310000
+#define MDFI_SEVERITY(x)				((x) == HARDWARE_ERROR_FATAL ? \
+								MDFI_SEVERITY_FATAL : \
+								MDFI_SEVERITY_NONFATAL)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index aeece9e705dc..dcf395bd985f 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -608,7 +608,7 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
 	const struct err_msg_cntr_pair *soc_mstr_lcl_err_reg;
 	const struct err_msg_cntr_pair *soc_slave_glbl_err_reg;
 	const struct err_msg_cntr_pair *soc_slave_lcl_err_reg;
-	u32 errbit, base, slave_base;
+	u32 errbit, base, slave_base, ieh_header;
 	int i;
 
 	const char *hwerr_to_str = hardware_error_type_to_str(hw_err);
@@ -684,9 +684,21 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
 		drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d SOC_LOCAL_ERR_STAT_MASTER_REG_%s:0x%08lx\n",
 			 tile->id, hwerr_to_str, lcl_errstat);
 
-		for_each_set_bit(errbit, &lcl_errstat, 32)
+		for_each_set_bit(errbit, &lcl_errstat, 32) {
+			if (errbit == 4 || errbit == 6) {
+				ieh_header = xe_mmio_read32(gt, LOCAL_FIRST_IEH_HEADER_LOG_REG);
+				drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d LOCAL_FIRST_IEH_HEADER_LOG_REG:0x%08x\n",
+					 tile->id, ieh_header);
+
+				if (ieh_header != MDFI_SEVERITY(hw_err)) {
+					lcl_errstat &= ~REG_BIT(errbit);
+					continue;
+				}
+			}
+
 			xe_soc_log_err_update_cntr(tile, hw_err, errbit,
 						   soc_mstr_lcl_err_reg);
+		}
 
 		xe_mmio_write32(gt, SOC_LOCAL_ERR_STAT_MASTER_REG(base, hw_err), lcl_errstat);
 	}
-- 
2.25.1



More information about the Intel-xe mailing list