[Intel-xe] [PATCH 09/11] drm/xe: Handle MDFI error severity.
Aravind Iddamsetty
aravind.iddamsetty at linux.intel.com
Wed Oct 4 12:11:18 UTC 2023
On 27/09/23 17:16, Himal Prasad Ghimiray wrote:
missing commit message.
Thanks,
Aravind.
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_tile_error_regs.h | 7 +++++++
> drivers/gpu/drm/xe/xe_hw_error.c | 16 ++++++++++++++--
> 2 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
> index 04701c62f0d9..8a5f6cd29304 100644
> --- a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
> @@ -48,4 +48,11 @@
> (base) + _SOC_GCOERRSTS, \
> (base) + _SOC_GNFERRSTS))
>
> +#define LOCAL_FIRST_IEH_HEADER_LOG_REG XE_REG(0x2822b0)
> +#define MDFI_SEVERITY_FATAL 0x00330000
> +#define MDFI_SEVERITY_NONFATAL 0x00310000
> +#define MDFI_SEVERITY(x) ((x) == HARDWARE_ERROR_FATAL ? \
> + MDFI_SEVERITY_FATAL : \
> + MDFI_SEVERITY_NONFATAL)
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
> index aeece9e705dc..dcf395bd985f 100644
> --- a/drivers/gpu/drm/xe/xe_hw_error.c
> +++ b/drivers/gpu/drm/xe/xe_hw_error.c
> @@ -608,7 +608,7 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
> const struct err_msg_cntr_pair *soc_mstr_lcl_err_reg;
> const struct err_msg_cntr_pair *soc_slave_glbl_err_reg;
> const struct err_msg_cntr_pair *soc_slave_lcl_err_reg;
> - u32 errbit, base, slave_base;
> + u32 errbit, base, slave_base, ieh_header;
> int i;
>
> const char *hwerr_to_str = hardware_error_type_to_str(hw_err);
> @@ -684,9 +684,21 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
> drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d SOC_LOCAL_ERR_STAT_MASTER_REG_%s:0x%08lx\n",
> tile->id, hwerr_to_str, lcl_errstat);
>
> - for_each_set_bit(errbit, &lcl_errstat, 32)
> + for_each_set_bit(errbit, &lcl_errstat, 32) {
> + if (errbit == 4 || errbit == 6) {
> + ieh_header = xe_mmio_read32(gt, LOCAL_FIRST_IEH_HEADER_LOG_REG);
> + drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d LOCAL_FIRST_IEH_HEADER_LOG_REG:0x%08x\n",
> + tile->id, ieh_header);
> +
> + if (ieh_header != MDFI_SEVERITY(hw_err)) {
> + lcl_errstat &= ~REG_BIT(errbit);
> + continue;
> + }
> + }
> +
> xe_soc_log_err_update_cntr(tile, hw_err, errbit,
> soc_mstr_lcl_err_reg);
> + }
>
> xe_mmio_write32(gt, SOC_LOCAL_ERR_STAT_MASTER_REG(base, hw_err), lcl_errstat);
> }
More information about the Intel-xe
mailing list