<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]--><style><!--
/* Font Definitions */
@font-face
{font-family:SimSun;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:DengXian;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:"\@DengXian";
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:"\@SimSun";
panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
font-size:11.0pt;
font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:#0563C1;
text-decoration:underline;}
span.EmailStyle20
{mso-style-type:personal-reply;
font-family:"Calibri",sans-serif;
color:windowtext;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;}
@page WordSection1
{size:8.5in 11.0in;
margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div class="WordSection1">
<p class="MsoNormal">Hi Kevin,<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div style="border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt">
<div>
<div style="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal"><b><span lang="ZH-CN" style="font-family:DengXian">发件人</span></b><b><span style="font-family:DengXian">:</span></b><span style="font-family:DengXian"> Wang, Yang(Kevin) <KevinYang.Wang@amd.com>
<br>
<b><span lang="ZH-CN">发送时间</span>:</b> Monday, May 23, 2022 4:49 PM<br>
<b><span lang="ZH-CN">收件人</span>:</b> Yang, Stanley <Stanley.Yang@amd.com>; amd-gfx@lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com><br>
<b><span lang="ZH-CN">主题</span>:</b> Re: [PATCH Review 1/2] drm/amdgpu/pm: support mca_ceumc_addr in ecctable<o:p></o:p></span></p>
</div>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<p style="margin:5.0pt"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:blue">[AMD Official Use Only - General]<o:p></o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;color:black"><o:p> </o:p></span></p>
</div>
<div class="MsoNormal" align="center" style="text-align:center">
<hr size="2" width="98%" align="center">
</div>
<div id="divRplyFwdMsg">
<p class="MsoNormal"><b><span style="color:black">From:</span></b><span style="color:black"> amd-gfx <<a href="mailto:amd-gfx-bounces@lists.freedesktop.org">amd-gfx-bounces@lists.freedesktop.org</a>> on behalf of Stanley.Yang <<a href="mailto:Stanley.Yang@amd.com">Stanley.Yang@amd.com</a>><br>
<b>Sent:</b> Monday, May 23, 2022 4:17 PM<br>
<b>To:</b> <a href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a> <<a href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>>; Zhang, Hawking <<a href="mailto:Hawking.Zhang@amd.com">Hawking.Zhang@amd.com</a>>;
Zhou1, Tao <<a href="mailto:Tao.Zhou1@amd.com">Tao.Zhou1@amd.com</a>>; Quan, Evan <<a href="mailto:Evan.Quan@amd.com">Evan.Quan@amd.com</a>>; Lazar, Lijo <<a href="mailto:Lijo.Lazar@amd.com">Lijo.Lazar@amd.com</a>><br>
<b>Cc:</b> Yang, Stanley <<a href="mailto:Stanley.Yang@amd.com">Stanley.Yang@amd.com</a>><br>
<b>Subject:</b> [PATCH Review 1/2] drm/amdgpu/pm: support mca_ceumc_addr in ecctable</span>
<o:p></o:p></p>
<div>
<p class="MsoNormal"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal">SMU add a new variable mca_ceumc_addr to record<br>
umc correctable error address in EccInfo table,<br>
driver side add ecctable_v2 to support this feature<br>
<br>
Signed-off-by: Stanley.Yang <<a href="mailto:Stanley.Yang@amd.com">Stanley.Yang@amd.com</a>><br>
---<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 +<br>
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 +<br>
.../inc/pmfw_if/smu13_driver_if_aldebaran.h | 15 +++<br>
.../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 101 ++++++++++++++----<br>
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 +<br>
5 files changed, 98 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
index b9a6fac2b8b2..28e603243b67 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
@@ -328,6 +328,7 @@ struct ecc_info_per_ch {<br>
uint16_t ce_count_hi_chip;<br>
uint64_t mca_umc_status;<br>
uint64_t mca_umc_addr;<br>
+ uint64_t mca_ceumc_addr;<br>
};<br>
<br>
struct umc_ecc_info {<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h<br>
index a6a7b6c33683..9f7257ada437 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h<br>
@@ -322,6 +322,7 @@ enum smu_table_id<br>
SMU_TABLE_PACE,<br>
SMU_TABLE_ECCINFO,<br>
SMU_TABLE_COMBO_PPTABLE,<br>
+ SMU_TABLE_ECCINFO_V2,<br>
SMU_TABLE_COUNT,<br>
};<br>
<br>
@@ -340,6 +341,7 @@ struct smu_table_context<br>
void *driver_pptable;<br>
void *combo_pptable;<br>
void *ecc_table;<br>
+ void *ecc_table_v2; // adapt to smu support record mca_ceumc_addr<br>
void *driver_smu_config_table;<br>
struct smu_table tables[SMU_TABLE_COUNT];<br>
/*<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h<br>
index 0f67c56c2863..2868604eff49 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h<br>
@@ -522,6 +522,21 @@ typedef struct {<br>
EccInfo_t EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];<br>
} EccInfoTable_t;<br>
<br>
+typedef struct {<br>
+ uint64_t mca_umc_status;<br>
+ uint64_t mca_umc_addr;<br>
+ uint64_t mca_ceumc_addr;<br>
+<br>
+ uint16_t ce_count_lo_chip;<br>
+ uint16_t ce_count_hi_chip;<br>
+<br>
+ uint32_t eccPadding;<br>
+} EccInfo_t_v2;<br>
+<br>
+typedef struct {<br>
+ EccInfo_t_v2 EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];<br>
+} EccInfoTable_t_v2;<br>
+<br>
// These defines are used with the following messages:<br>
// SMC_MSG_TransferTableDram2Smu<br>
// SMC_MSG_TransferTableSmu2Dram<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c<br>
index 38af648cb857..e58df9490cec 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c<br>
@@ -82,6 +82,12 @@<br>
*/<br>
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00<br>
<br>
+/*<br>
+ * SMU support mca_ceumc_addr in ECCTABLE since version 68.55.0,<br>
+ * use this to check mca_ceumc_addr record whether support<br>
+ */<br>
+#define SUPPORT_ECCTABLE_V2_SMU_VERSION 0x00443700<br>
+<br>
/*<br>
* SMU support BAD CHENNEL info MSG since version 68.51.00,<br>
* use this to check ECCTALE feature whether support<br>
@@ -239,6 +245,9 @@ static int aldebaran_tables_init(struct smu_context *smu)<br>
SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),<br>
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);<br>
<br>
+ SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO_V2, sizeof(EccInfoTable_t_v2),<br>
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);<br>
+<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">[kevin]:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">this table mapping is not needed, the reason as below.<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal"> smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);<br>
if (!smu_table->metrics_table)<br>
return -ENOMEM;<br>
@@ -255,6 +264,10 @@ static int aldebaran_tables_init(struct smu_context *smu)<br>
if (!smu_table->ecc_table)<br>
return -ENOMEM;<br>
<br>
+ smu_table->ecc_table_v2 = kzalloc(tables[SMU_TABLE_ECCINFO_V2].size, GFP_KERNEL);<br>
+ if (!smu_table->ecc_table_v2)<br>
+ return -ENOMEM;;<br>
+<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">[kevin]:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">add eccinfo table v2 is not needed for this case, this table is only used store table data from pmfw to driver,<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">you can create a large enough table which can save ecc table data directly.<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">e.g:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">size = max(sizeof(<span style="color:black;background:white">EccInfoTable_t_v2), sizeof(EccInfoTable_t));</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="color:black;background:white">smu_table->ecc_table = kzalloc(size, GFP_KERNEL);</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="color:black;background:white">Best Regards,</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">Kevin<o:p></o:p></p>
<p class="MsoNormal"><b><i>[Yang, Stanley] : this method is not forward compatible, or driver need complex convert to get the correct value, if new driver use an old pmfw.<o:p></o:p></i></b></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt"> return 0;<br>
}<br>
<br>
@@ -1802,7 +1815,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,<br>
return sizeof(struct gpu_metrics_v1_3);<br>
}<br>
<br>
-static int aldebaran_check_ecc_table_support(struct smu_context *smu)<br>
+static int aldebaran_check_ecc_table_support(struct smu_context *smu,<br>
+ int *ecctable_version)<br>
{<br>
uint32_t if_version = 0xff, smu_version = 0xff;<br>
int ret = 0;<br>
@@ -1815,6 +1829,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu)<br>
<br>
if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION)<br>
ret = -EOPNOTSUPP;<br>
+ else if (smu_version >= SUPPORT_ECCTABLE_SMU_VERSION &&<br>
+ smu_version < SUPPORT_ECCTABLE_V2_SMU_VERSION)<br>
+ *ecctable_version = 1;<br>
+ else<br>
+ *ecctable_version = 2;<br>
<br>
return ret;<br>
}<br>
@@ -1824,36 +1843,72 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,<br>
{<br>
struct smu_table_context *smu_table = &smu->smu_table;<br>
EccInfoTable_t *ecc_table = NULL;<br>
+ EccInfoTable_t_v2 *ecc_table_v2 = NULL;<br>
struct ecc_info_per_ch *ecc_info_per_channel = NULL;<br>
int i, ret = 0;<br>
+ int table_version = 0;<br>
struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;<br>
<br>
- ret = aldebaran_check_ecc_table_support(smu);<br>
+ ret = aldebaran_check_ecc_table_support(smu, &table_version);<br>
if (ret)<br>
return ret;<br>
<br>
- ret = smu_cmn_update_table(smu,<br>
- SMU_TABLE_ECCINFO,<br>
- 0,<br>
- smu_table->ecc_table,<br>
- false);<br>
- if (ret) {<br>
- dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n");<br>
- return ret;<br>
- }<br>
+ if (table_version == 1) {<br>
+ ret = smu_cmn_update_table(smu,<br>
+ SMU_TABLE_ECCINFO,<br>
+ 0,<br>
+ smu_table->ecc_table,<br>
+ false);<br>
+ if (ret) {<br>
+ dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n");<br>
+ return ret;<br>
+ }<br>
+<br>
+ ecc_table = (EccInfoTable_t *)smu_table->ecc_table;<br>
+<br>
+ for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {<br>
+ ecc_info_per_channel = &(eccinfo->ecc[i]);<br>
+ ecc_info_per_channel->ce_count_lo_chip =<br>
+ ecc_table->EccInfo[i].ce_count_lo_chip;<br>
+ ecc_info_per_channel->ce_count_hi_chip =<br>
+ ecc_table->EccInfo[i].ce_count_hi_chip;<br>
+ ecc_info_per_channel->mca_umc_status =<br>
+ ecc_table->EccInfo[i].mca_umc_status;<br>
+ ecc_info_per_channel->mca_umc_addr =<br>
+ ecc_table->EccInfo[i].mca_umc_addr;<br>
+ }<br>
+ } else if (table_version == 2) {<br>
+ /* still use SMU_TABLE_ECC_INFO index,<br>
+ * smu 68.55.0 add mca_ceumc_addr variable<br>
+ * in EccInfo_t struct to report correctable<br>
+ * error address and the table_id is not changed<br>
+ */<br>
+ ret = smu_cmn_update_table(smu,<br>
+ SMU_TABLE_ECCINFO,<br>
+ 0,<br>
+ smu_table->ecc_table_v2,<br>
+ false);<br>
<br>
- ecc_table = (EccInfoTable_t *)smu_table->ecc_table;<br>
-<br>
- for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {<br>
- ecc_info_per_channel = &(eccinfo->ecc[i]);<br>
- ecc_info_per_channel->ce_count_lo_chip =<br>
- ecc_table->EccInfo[i].ce_count_lo_chip;<br>
- ecc_info_per_channel->ce_count_hi_chip =<br>
- ecc_table->EccInfo[i].ce_count_hi_chip;<br>
- ecc_info_per_channel->mca_umc_status =<br>
- ecc_table->EccInfo[i].mca_umc_status;<br>
- ecc_info_per_channel->mca_umc_addr =<br>
- ecc_table->EccInfo[i].mca_umc_addr;<br>
+ if (ret) {<br>
+ dev_info(smu->adev->dev, "Failed to export SMU ecc table_v2!\n");<br>
+ return ret;<br>
+ }<br>
+<br>
+ ecc_table_v2 = (EccInfoTable_t_v2 *)smu_table->ecc_table_v2;<br>
+<br>
+ for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {<br>
+ ecc_info_per_channel = &(eccinfo->ecc[i]);<br>
+ ecc_info_per_channel->ce_count_lo_chip =<br>
+ ecc_table_v2->EccInfo[i].ce_count_lo_chip;<br>
+ ecc_info_per_channel->ce_count_hi_chip =<br>
+ ecc_table_v2->EccInfo[i].ce_count_hi_chip;<br>
+ ecc_info_per_channel->mca_umc_status =<br>
+ ecc_table_v2->EccInfo[i].mca_umc_status;<br>
+ ecc_info_per_channel->mca_umc_addr =<br>
+ ecc_table_v2->EccInfo[i].mca_umc_addr;<br>
+ ecc_info_per_channel->mca_ceumc_addr =<br>
+ ecc_table_v2->EccInfo[i].mca_ceumc_addr;<br>
+ }<br>
}<br>
<br>
return ret;<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c<br>
index ae6321af9d88..af2d84a16f3e 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c<br>
@@ -552,9 +552,11 @@ int smu_v13_0_fini_smc_tables(struct smu_context *smu)<br>
kfree(smu_table->hardcode_pptable);<br>
smu_table->hardcode_pptable = NULL;<br>
<br>
+ kfree(smu_table->ecc_table_v2);<br>
kfree(smu_table->ecc_table);<br>
kfree(smu_table->metrics_table);<br>
kfree(smu_table->watermarks_table);<br>
+ smu_table->ecc_table_v2 = NULL;<br>
smu_table->ecc_table = NULL;<br>
smu_table->metrics_table = NULL;<br>
smu_table->watermarks_table = NULL;<br>
-- <br>
2.17.1<o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</body>
</html>