[Intel-xe] [PATCH 09/11] drm/xe: Handle MDFI error severity.

Aravind Iddamsetty aravind.iddamsetty at linux.intel.com
Wed Oct 4 12:11:18 UTC 2023


On 27/09/23 17:16, Himal Prasad Ghimiray wrote:

missing commit message.

Thanks,
Aravind.
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/regs/xe_tile_error_regs.h |  7 +++++++
>  drivers/gpu/drm/xe/xe_hw_error.c             | 16 ++++++++++++++--
>  2 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
> index 04701c62f0d9..8a5f6cd29304 100644
> --- a/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_tile_error_regs.h
> @@ -48,4 +48,11 @@
>  								(base) + _SOC_GCOERRSTS, \
>  								(base) + _SOC_GNFERRSTS))
>  
> +#define LOCAL_FIRST_IEH_HEADER_LOG_REG			XE_REG(0x2822b0)
> +#define MDFI_SEVERITY_FATAL		0x00330000
> +#define MDFI_SEVERITY_NONFATAL		0x00310000
> +#define MDFI_SEVERITY(x)				((x) == HARDWARE_ERROR_FATAL ? \
> +								MDFI_SEVERITY_FATAL : \
> +								MDFI_SEVERITY_NONFATAL)
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
> index aeece9e705dc..dcf395bd985f 100644
> --- a/drivers/gpu/drm/xe/xe_hw_error.c
> +++ b/drivers/gpu/drm/xe/xe_hw_error.c
> @@ -608,7 +608,7 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
>  	const struct err_msg_cntr_pair *soc_mstr_lcl_err_reg;
>  	const struct err_msg_cntr_pair *soc_slave_glbl_err_reg;
>  	const struct err_msg_cntr_pair *soc_slave_lcl_err_reg;
> -	u32 errbit, base, slave_base;
> +	u32 errbit, base, slave_base, ieh_header;
>  	int i;
>  
>  	const char *hwerr_to_str = hardware_error_type_to_str(hw_err);
> @@ -684,9 +684,21 @@ xe_soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
>  		drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d SOC_LOCAL_ERR_STAT_MASTER_REG_%s:0x%08lx\n",
>  			 tile->id, hwerr_to_str, lcl_errstat);
>  
> -		for_each_set_bit(errbit, &lcl_errstat, 32)
> +		for_each_set_bit(errbit, &lcl_errstat, 32) {
> +			if (errbit == 4 || errbit == 6) {
> +				ieh_header = xe_mmio_read32(gt, LOCAL_FIRST_IEH_HEADER_LOG_REG);
> +				drm_info(&tile_to_xe(tile)->drm, HW_ERR "Tile%d LOCAL_FIRST_IEH_HEADER_LOG_REG:0x%08x\n",
> +					 tile->id, ieh_header);
> +
> +				if (ieh_header != MDFI_SEVERITY(hw_err)) {
> +					lcl_errstat &= ~REG_BIT(errbit);
> +					continue;
> +				}
> +			}
> +
>  			xe_soc_log_err_update_cntr(tile, hw_err, errbit,
>  						   soc_mstr_lcl_err_reg);
> +		}
>  
>  		xe_mmio_write32(gt, SOC_LOCAL_ERR_STAT_MASTER_REG(base, hw_err), lcl_errstat);
>  	}


More information about the Intel-xe mailing list