[Intel-xe] [PATCH 09/11] drm/xe: Handle MDFI error severity.
Himal Prasad Ghimiray
himal.prasad.ghimiray at intel.com
Wed Sep 27 11:46:25 UTC 2023
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
drivers/gpu/drm/xe/regs/xe_tile_error_regs.h | 7 +++++++
drivers/gpu/drm/xe/xe_hw_error.c | 16 ++++++++++++++--
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
index 04701c62f0d9..8a5f6cd29304 100644
--- a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
@@ -48,4 +48,11 @@
(base) + _SOC_GCOERRSTS, \
(base) + _SOC_GNFERRSTS))
+#define LOCAL_FIRST_IEH_HEADER_LOG_REG XE_REG(0x2822b0)
+#define MDFI_SEVERITY_FATAL 0x00330000
+#define MDFI_SEVERITY_NONFATAL 0x00310000
+#define MDFI_SEVERITY(x) ((x) == HARDWARE_ERROR_FATAL ? \
+ MDFI_SEVERITY_FATAL : \
+ MDFI_SEVERITY_NONFATAL)
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
index aeece9e705dc..dcf395bd985f 100644
--- a/drivers/gpu/drm/xe/xe_hw_error.c
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -608,7 +608,7 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
const struct err_msg_cntr_pair *soc_mstr_lcl_err_reg;
const struct err_msg_cntr_pair *soc_slave_glbl_err_reg;
const struct err_msg_cntr_pair *soc_slave_lcl_err_reg;
- u32 errbit, base, slave_base;
+ u32 errbit, base, slave_base, ieh_header;
int i;
const char *hwerr_to_str = hardware_error_type_to_str(hw_err);
@@ -684,9 +684,21 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d SOC_LOCAL_ERR_STAT_MASTER_REG_%s:0x%08lx\n",
tile->id, hwerr_to_str, lcl_errstat);
- for_each_set_bit(errbit, &lcl_errstat, 32)
+ for_each_set_bit(errbit, &lcl_errstat, 32) {
+ if (errbit == 4 || errbit == 6) {
+ ieh_header = xe_mmio_read32(gt, LOCAL_FIRST_IEH_HEADER_LOG_REG);
+ drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d LOCAL_FIRST_IEH_HEADER_LOG_REG:0x%08x\n",
+ tile->id, ieh_header);
+
+ if (ieh_header != MDFI_SEVERITY(hw_err)) {
+ lcl_errstat &= ~REG_BIT(errbit);
+ continue;
+ }
+ }
+
xe_soc_log_err_update_cntr(tile, hw_err, errbit,
soc_mstr_lcl_err_reg);
+ }
xe_mmio_write32(gt, SOC_LOCAL_ERR_STAT_MASTER_REG(base, hw_err), lcl_errstat);
}
--
2.25.1
More information about the Intel-xe
mailing list