[PATCH] drm/amd/pm: Enable ecc_info table support for smu v13_0_10
Yang, Stanley
Stanley.Yang at amd.com
Wed Mar 1 08:20:51 UTC 2023
[AMD Official Use Only - General]
Reviewed-by: Stanley.Yang <Stanley.Yang at amd.com>
Regards,
Stanley
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> Candice Li
> Sent: Wednesday, March 1, 2023 2:10 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Li, Candice <Candice.Li at amd.com>
> Subject: [PATCH] drm/amd/pm: Enable ecc_info table support for smu
> v13_0_10
>
> Support EccInfoTable which includes umc ras error count and error address.
>
> Signed-off-by: Candice Li <candice.li at amd.com>
> Reviewed-by: Evan Quan <evan.quan at amd.com>
> ---
> .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 75
> +++++++++++++++++++
> 1 file changed, 75 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 923a9fb3c8873c..27448ffe60a439 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -46,6 +46,7 @@
> #include "asic_reg/mp/mp_13_0_0_sh_mask.h"
> #include "smu_cmn.h"
> #include "amdgpu_ras.h"
> +#include "umc_v8_10.h"
>
> /*
> * DO NOT use these for err/warn/info/debug messages.
> @@ -90,6 +91,12 @@
>
> #define DEBUGSMC_MSG_Mode1Reset 2
>
> +/*
> + * SMU_v13_0_10 supports ECCTABLE since version 80.34.0,
> + * use this to check ECCTABLE feature whether support */ #define
> +SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200
> +
> static struct cmn2asic_msg_mapping
> smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
> MSG_MAP(TestMessage,
> PPSMC_MSG_TestMessage, 1),
> MSG_MAP(GetSmuVersion,
> PPSMC_MSG_GetSmuVersion, 1),
> @@ -229,6 +236,7 @@ static struct cmn2asic_mapping
> smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
> TAB_MAP(ACTIVITY_MONITOR_COEFF),
> [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
> TAB_MAP(I2C_COMMANDS),
> + TAB_MAP(ECCINFO),
> };
>
> static struct cmn2asic_mapping
> smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -462,6
> +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
> AMDGPU_GEM_DOMAIN_VRAM);
> SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE,
> MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
> PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO,
> sizeof(EccInfoTable_t),
> + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
>
> smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t),
> GFP_KERNEL);
> if (!smu_table->metrics_table)
> @@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct
> smu_context *smu)
> if (!smu_table->watermarks_table)
> goto err2_out;
>
> + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size,
> GFP_KERNEL);
> + if (!smu_table->ecc_table)
> + goto err3_out;
> +
> return 0;
>
> +err3_out:
> + kfree(smu_table->watermarks_table);
> err2_out:
> kfree(smu_table->gpu_metrics_table);
> err1_out:
> @@ -2036,6 +2052,64 @@ static int
> smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
> return ret;
> }
>
> +static int smu_v13_0_0_check_ecc_table_support(struct smu_context
> *smu)
> +{
> + struct amdgpu_device *adev = smu->adev;
> + uint32_t if_version = 0xff, smu_version = 0xff;
> + int ret = 0;
> +
> + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
> + if (ret)
> + return -EOPNOTSUPP;
> +
> + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) &&
> + (smu_version >=
> SUPPORT_ECCTABLE_SMU_13_0_10_VERSION))
> + return ret;
> + else
> + return -EOPNOTSUPP;
> +}
> +
> +static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
> + void
> *table)
> +{
> + struct smu_table_context *smu_table = &smu->smu_table;
> + struct amdgpu_device *adev = smu->adev;
> + EccInfoTable_t *ecc_table = NULL;
> + struct ecc_info_per_ch *ecc_info_per_channel = NULL;
> + int i, ret = 0;
> + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
> +
> + ret = smu_v13_0_0_check_ecc_table_support(smu);
> + if (ret)
> + return ret;
> +
> + ret = smu_cmn_update_table(smu,
> + SMU_TABLE_ECCINFO,
> + 0,
> + smu_table->ecc_table,
> + false);
> + if (ret) {
> + dev_info(adev->dev, "Failed to export SMU ecc table!\n");
> + return ret;
> + }
> +
> + ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
> +
> + for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
> + ecc_info_per_channel = &(eccinfo->ecc[i]);
> + ecc_info_per_channel->ce_count_lo_chip =
> + ecc_table->EccInfo[i].ce_count_lo_chip;
> + ecc_info_per_channel->ce_count_hi_chip =
> + ecc_table->EccInfo[i].ce_count_hi_chip;
> + ecc_info_per_channel->mca_umc_status =
> + ecc_table->EccInfo[i].mca_umc_status;
> + ecc_info_per_channel->mca_umc_addr =
> + ecc_table->EccInfo[i].mca_umc_addr;
> + }
> +
> + return ret;
> +}
> +
> static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
> .get_allowed_feature_mask =
> smu_v13_0_0_get_allowed_feature_mask,
> .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
> @@ -2111,6 +2185,7 @@ static const struct pptable_funcs
> smu_v13_0_0_ppt_funcs = {
> .send_hbm_bad_pages_num =
> smu_v13_0_0_smu_send_bad_mem_page_num,
> .send_hbm_bad_channel_flag =
> smu_v13_0_0_send_bad_mem_channel_flag,
> .gpo_control = smu_v13_0_gpo_control,
> + .get_ecc_info = smu_v13_0_0_get_ecc_info,
> };
>
> void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
> --
> 2.17.1
More information about the amd-gfx
mailing list