[PATCH] drm/amdgpu: drop xmgi23 error query/reset support

Zhou1, Tao Tao.Zhou1 at amd.com
Thu Mar 10 08:23:07 UTC 2022


Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>

> -----Original Message-----
> From: Hawking Zhang <Hawking.Zhang at amd.com>
> Sent: Thursday, March 10, 2022 4:14 PM
> To: amd-gfx at lists.freedesktop.org; Zhou1, Tao <Tao.Zhou1 at amd.com>
> Cc: Zhang, Hawking <Hawking.Zhang at amd.com>
> Subject: [PATCH] drm/amdgpu: drop xmgi23 error query/reset support
> 
> xgmi_ras is only initialized when host to GPU interface is PCIE. in such case,
> xgmi23 is disabled and protected by security firmware. Host access will results to
> security violation
> 
> Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 22 ----------------------
>  1 file changed, 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 4a789c789c6c..c6e52be7e8ac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -32,7 +32,6 @@
>  #include "wafl/wafl2_4_0_0_smn.h"
>  #include "wafl/wafl2_4_0_0_sh_mask.h"
> 
> -#define smnPCS_XGMI23_PCS_ERROR_STATUS   0x11a01210
>  #define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c
>  #define smnPCS_GOPX1_PCS_ERROR_STATUS    0x12200210
> 
> @@ -67,17 +66,6 @@ static const int wafl_pcs_err_status_reg_arct[] = {
>  	smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,  };
> 
> -static const int xgmi23_pcs_err_status_reg_aldebaran[] = {
> -	smnPCS_XGMI23_PCS_ERROR_STATUS,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x100000,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x200000,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x300000,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x400000,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x500000,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x600000,
> -	smnPCS_XGMI23_PCS_ERROR_STATUS + 0x700000
> -};
> -
>  static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
>  	smnPCS_XGMI3X16_PCS_ERROR_STATUS,
>  	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000, @@ -778,9
> +766,6 @@ static void amdgpu_xgmi_reset_ras_error_count(struct
> amdgpu_device *adev)
>  					 xgmi_pcs_err_status_reg_vg20[i]);
>  		break;
>  	case CHIP_ALDEBARAN:
> -		for (i = 0; i <
> ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++)
> -			pcs_clear_status(adev,
> -
> xgmi23_pcs_err_status_reg_aldebaran[i]);
>  		for (i = 0; i <
> ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++)
>  			pcs_clear_status(adev,
> 
> xgmi3x16_pcs_err_status_reg_aldebaran[i]);
> @@ -881,13 +866,6 @@ static void
> amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
>  		}
>  		break;
>  	case CHIP_ALDEBARAN:
> -		/* check xgmi23 pcs error */
> -		for (i = 0; i <
> ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) {
> -			data =
> RREG32_PCIE(xgmi23_pcs_err_status_reg_aldebaran[i]);
> -			if (data)
> -				amdgpu_xgmi_query_pcs_error_status(adev,
> -						data, &ue_cnt, &ce_cnt, true);
> -		}
>  		/* check xgmi3x16 pcs error */
>  		for (i = 0; i <
> ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) {
>  			data =
> RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]);
> --
> 2.17.1



More information about the amd-gfx mailing list