[PATCH] drm/amd/pm: Enable ecc_info table support for smu v13_0_10

Yang, Stanley Stanley.Yang at amd.com
Wed Mar 1 08:20:51 UTC 2023


[AMD Official Use Only - General]

Reviewed-by: Stanley.Yang <Stanley.Yang at amd.com>

Regards,
Stanley
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> Candice Li
> Sent: Wednesday, March 1, 2023 2:10 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Li, Candice <Candice.Li at amd.com>
> Subject: [PATCH] drm/amd/pm: Enable ecc_info table support for smu
> v13_0_10
> 
> Support EccInfoTable which includes umc ras error count and error address.
> 
> Signed-off-by: Candice Li <candice.li at amd.com>
> Reviewed-by: Evan Quan <evan.quan at amd.com>
> ---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 75
> +++++++++++++++++++
>  1 file changed, 75 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 923a9fb3c8873c..27448ffe60a439 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -46,6 +46,7 @@
>  #include "asic_reg/mp/mp_13_0_0_sh_mask.h"
>  #include "smu_cmn.h"
>  #include "amdgpu_ras.h"
> +#include "umc_v8_10.h"
> 
>  /*
>   * DO NOT use these for err/warn/info/debug messages.
> @@ -90,6 +91,12 @@
> 
>  #define DEBUGSMC_MSG_Mode1Reset	2
> 
> +/*
> + * SMU_v13_0_10 supports ECCTABLE since version 80.34.0,
> + * use this to check ECCTABLE feature whether support  */ #define
> +SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200
> +
>  static struct cmn2asic_msg_mapping
> smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
>  	MSG_MAP(TestMessage,
> 	PPSMC_MSG_TestMessage,                 1),
>  	MSG_MAP(GetSmuVersion,
> 	PPSMC_MSG_GetSmuVersion,               1),
> @@ -229,6 +236,7 @@ static struct cmn2asic_mapping
> smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
>  	TAB_MAP(ACTIVITY_MONITOR_COEFF),
>  	[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
>  	TAB_MAP(I2C_COMMANDS),
> +	TAB_MAP(ECCINFO),
>  };
> 
>  static struct cmn2asic_mapping
> smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -462,6
> +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
>  		       AMDGPU_GEM_DOMAIN_VRAM);
>  	SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE,
> MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
>  			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> +	SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO,
> sizeof(EccInfoTable_t),
> +			PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> 
>  	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t),
> GFP_KERNEL);
>  	if (!smu_table->metrics_table)
> @@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct
> smu_context *smu)
>  	if (!smu_table->watermarks_table)
>  		goto err2_out;
> 
> +	smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size,
> GFP_KERNEL);
> +	if (!smu_table->ecc_table)
> +		goto err3_out;
> +
>  	return 0;
> 
> +err3_out:
> +	kfree(smu_table->watermarks_table);
>  err2_out:
>  	kfree(smu_table->gpu_metrics_table);
>  err1_out:
> @@ -2036,6 +2052,64 @@ static int
> smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
>  	return ret;
>  }
> 
> +static int smu_v13_0_0_check_ecc_table_support(struct smu_context
> *smu)
> +{
> +	struct amdgpu_device *adev = smu->adev;
> +	uint32_t if_version = 0xff, smu_version = 0xff;
> +	int ret = 0;
> +
> +	ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
> +	if (ret)
> +		return -EOPNOTSUPP;
> +
> +	if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) &&
> +		(smu_version >=
> SUPPORT_ECCTABLE_SMU_13_0_10_VERSION))
> +		return ret;
> +	else
> +		return -EOPNOTSUPP;
> +}
> +
> +static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
> +									void
> *table)
> +{
> +	struct smu_table_context *smu_table = &smu->smu_table;
> +	struct amdgpu_device *adev = smu->adev;
> +	EccInfoTable_t *ecc_table = NULL;
> +	struct ecc_info_per_ch *ecc_info_per_channel = NULL;
> +	int i, ret = 0;
> +	struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
> +
> +	ret = smu_v13_0_0_check_ecc_table_support(smu);
> +	if (ret)
> +		return ret;
> +
> +	ret = smu_cmn_update_table(smu,
> +					SMU_TABLE_ECCINFO,
> +					0,
> +					smu_table->ecc_table,
> +					false);
> +	if (ret) {
> +		dev_info(adev->dev, "Failed to export SMU ecc table!\n");
> +		return ret;
> +	}
> +
> +	ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
> +
> +	for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
> +		ecc_info_per_channel = &(eccinfo->ecc[i]);
> +		ecc_info_per_channel->ce_count_lo_chip =
> +				ecc_table->EccInfo[i].ce_count_lo_chip;
> +		ecc_info_per_channel->ce_count_hi_chip =
> +				ecc_table->EccInfo[i].ce_count_hi_chip;
> +		ecc_info_per_channel->mca_umc_status =
> +				ecc_table->EccInfo[i].mca_umc_status;
> +		ecc_info_per_channel->mca_umc_addr =
> +				ecc_table->EccInfo[i].mca_umc_addr;
> +	}
> +
> +	return ret;
> +}
> +
>  static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
>  	.get_allowed_feature_mask =
> smu_v13_0_0_get_allowed_feature_mask,
>  	.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
> @@ -2111,6 +2185,7 @@ static const struct pptable_funcs
> smu_v13_0_0_ppt_funcs = {
>  	.send_hbm_bad_pages_num =
> smu_v13_0_0_smu_send_bad_mem_page_num,
>  	.send_hbm_bad_channel_flag =
> smu_v13_0_0_send_bad_mem_channel_flag,
>  	.gpo_control = smu_v13_0_gpo_control,
> +	.get_ecc_info = smu_v13_0_0_get_ecc_info,
>  };
> 
>  void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
> --
> 2.17.1


More information about the amd-gfx mailing list