[PATCH] drm/amdgpu: add umc_convert_error_address to simplify code

Zhou1, Tao Tao.Zhou1 at amd.com
Fri Jan 28 03:33:38 UTC 2022


[AMD Official Use Only]

Ping...

> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Sent: Wednesday, January 26, 2022 7:05 PM
> To: amd-gfx at lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Chai,
> Thomas <YiPeng.Chai at amd.com>; Clements, John <John.Clements at amd.com>;
> Lazar, Lijo <Lijo.Lazar at amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1 at amd.com>
> Subject: [PATCH] drm/amdgpu: add umc_convert_error_address to simplify
> code
> 
> Make code reusable and more simple.
> 
> Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 94 +++++++++------------------
> drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 82 +++++++++--------------
>  2 files changed, 61 insertions(+), 115 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index 47452b61b615..4abcdda42ac6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -114,21 +114,13 @@ static void
> umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
>  	}
>  }
> 
> -static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> -					 struct ras_err_data *err_data,
> -					 uint32_t ch_inst,
> -					 uint32_t umc_inst)
> +static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
> +					struct ras_err_data *err_data, uint32_t
> ch_inst,
> +					uint32_t umc_inst, uint64_t err_addr,
> +					uint64_t mc_umc_status)
>  {
> -	uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
>  	uint32_t channel_index;
> -	uint32_t eccinfo_table_idx;
> -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> -	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> -	channel_index =
> -		adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> -
> -	mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> +	uint64_t soc_pa, retired_page, column;
> 
>  	if (mc_umc_status == 0)
>  		return;
> @@ -136,12 +128,13 @@ static void
> umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
>  	if (!err_data->err_addr)
>  		return;
> 
> +	channel_index =
> +			adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num +
> +ch_inst];
> +
>  	/* calculate error address if ue/ce error is detected */
>  	if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
>  	    (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
>  	    REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> -		err_addr = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
>  		err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> 
>  		/* translate umc channel address to soc pa, 3 parts are included
> */ @@ -173,6 +166,23 @@ static void
> umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
>  	}
>  }
> 
> +static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> +					 struct ras_err_data *err_data,
> +					 uint32_t ch_inst,
> +					 uint32_t umc_inst)
> +{
> +	uint64_t mc_umc_status, err_addr;
> +	uint32_t eccinfo_table_idx;
> +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> +	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> +	mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> +	err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> +
> +	umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> +			err_addr, mc_umc_status);
> +}
> +
>  static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device
> *adev,
>  					     void *ras_error_status)
>  {
> @@ -348,9 +358,7 @@ static void umc_v6_7_query_error_address(struct
> amdgpu_device *adev,
>  					 uint32_t umc_inst)
>  {
>  	uint32_t mc_umc_status_addr;
> -	uint32_t channel_index;
> -	uint64_t mc_umc_status, mc_umc_addrt0;
> -	uint64_t err_addr, soc_pa, retired_page, column;
> +	uint64_t mc_umc_status, mc_umc_addrt0, err_addr;
> 
>  	mc_umc_status_addr =
>  		SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -358,54 +366,10 @@ static
> void umc_v6_7_query_error_address(struct amdgpu_device *adev,
>  		SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_ADDRT0);
> 
>  	mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
> +	err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
> 
> -	if (mc_umc_status == 0)
> -		return;
> -
> -	if (!err_data->err_addr) {
> -		/* clear umc status */
> -		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> -		return;
> -	}
> -
> -	channel_index =
> -		adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> -
> -	/* calculate error address if ue/ce error is detected */
> -	if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
> -	    (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
> -	    REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> -		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) *
> 4);
> -		err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> -
> -		/* translate umc channel address to soc pa, 3 parts are included
> */
> -		soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
> -				ADDR_OF_256B_BLOCK(channel_index) |
> -				OFFSET_IN_256B_BLOCK(err_addr);
> -
> -		/* The umc channel bits are not original values, they are hashed
> */
> -		SET_CHANNEL_HASH(channel_index, soc_pa);
> -
> -		/* clear [C4 C3 C2] in soc physical address */
> -		soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
> -
> -		/* we only save ue error information currently, ce is skipped */
> -		if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
> -				== 1) {
> -			/* loop for all possibilities of [C4 C3 C2] */
> -			for (column = 0; column <
> UMC_V6_7_NA_MAP_PA_NUM; column++) {
> -				retired_page = soc_pa | (column <<
> UMC_V6_7_PA_C2_BIT);
> -				amdgpu_umc_fill_error_record(err_data,
> err_addr,
> -					retired_page, channel_index, umc_inst);
> -
> -				/* shift R14 bit */
> -				retired_page ^= (0x1ULL <<
> UMC_V6_7_PA_R14_BIT);
> -				amdgpu_umc_fill_error_record(err_data,
> err_addr,
> -					retired_page, channel_index, umc_inst);
> -			}
> -		}
> -	}
> +	umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> +				err_addr, mc_umc_status);
> 
>  	/* clear umc status */
>  	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> index de85a998ef99..df15b87ae12b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> @@ -115,21 +115,13 @@ static void
> umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
>  	}
>  }
> 
> -static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> -					struct ras_err_data *err_data,
> -					uint32_t ch_inst,
> -					uint32_t umc_inst)
> +static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
> +					struct ras_err_data *err_data, uint32_t
> ch_inst,
> +					uint32_t umc_inst, uint64_t err_addr,
> +					uint64_t mc_umc_status)
>  {
> -	uint64_t mc_umc_status, err_addr, retired_page;
> -	uint32_t channel_index;
> -	uint32_t eccinfo_table_idx;
> -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> -	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> -	channel_index =
> -		adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> -
> -	mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> +	uint32_t lsb, channel_index;
> +	uint64_t retired_page;
> 
>  	if (mc_umc_status == 0)
>  		return;
> @@ -137,13 +129,16 @@ static void
> umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
>  	if (!err_data->err_addr)
>  		return;
> 
> +	channel_index =
> +			adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num +
> +ch_inst];
> +
>  	/* calculate error address if ue/ce error is detected */
>  	if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
>  	    (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
>  	    REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> -		err_addr = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> +		lsb = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
>  		err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> +		err_addr &= ~((0x1ULL << lsb) - 1);
> 
>  		/* translate umc channel address to soc pa, 3 parts are included
> */
>  		retired_page = ADDR_OF_4KB_BLOCK(err_addr) | @@ -157,6
> +152,22 @@ static void umc_v8_7_ecc_info_query_error_address(struct
> amdgpu_device *adev,
>  					retired_page, channel_index, umc_inst);
>  	}
>  }
> +static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> +					struct ras_err_data *err_data,
> +					uint32_t ch_inst,
> +					uint32_t umc_inst)
> +{
> +	uint64_t mc_umc_status, err_addr;
> +	uint32_t eccinfo_table_idx;
> +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> +	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> +	mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> +	err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> +
> +	umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> +				err_addr, mc_umc_status);
> +}
> 
>  static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device
> *adev,
>  					void *ras_error_status)
> @@ -330,9 +341,8 @@ static void umc_v8_7_query_error_address(struct
> amdgpu_device *adev,
>  					 uint32_t ch_inst,
>  					 uint32_t umc_inst)
>  {
> -	uint32_t lsb, mc_umc_status_addr;
> -	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
> -	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> +	uint32_t mc_umc_status_addr;
> +	uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
> 
>  	mc_umc_status_addr =
>  		SOC15_REG_OFFSET(UMC, 0,
> mmMCA_UMC_UMC0_MCUMC_STATUST0); @@ -340,38 +350,10 @@ static
> void umc_v8_7_query_error_address(struct amdgpu_device *adev,
>  		SOC15_REG_OFFSET(UMC, 0,
> mmMCA_UMC_UMC0_MCUMC_ADDRT0);
> 
>  	mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
> +	err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
> 
> -	if (mc_umc_status == 0)
> -		return;
> -
> -	if (!err_data->err_addr) {
> -		/* clear umc status */
> -		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> -		return;
> -	}
> -
> -	/* calculate error address if ue/ce error is detected */
> -	if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
> -	    (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
> -	    REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> -		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) *
> 4);
> -		/* the lowest lsb bits should be ignored */
> -		lsb = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
> -		err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> -		err_addr &= ~((0x1ULL << lsb) - 1);
> -
> -		/* translate umc channel address to soc pa, 3 parts are included
> */
> -		retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
> -				ADDR_OF_256B_BLOCK(channel_index) |
> -				OFFSET_IN_256B_BLOCK(err_addr);
> -
> -		/* we only save ue error information currently, ce is skipped */
> -		if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
> -				== 1)
> -			amdgpu_umc_fill_error_record(err_data, err_addr,
> -					retired_page, channel_index, umc_inst);
> -	}
> +	umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> +					err_addr, mc_umc_status);
> 
>  	/* clear umc status */
>  	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> --
> 2.17.1


More information about the amd-gfx mailing list