<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]--><style><!--
/* Font Definitions */
@font-face
        {font-family:SimSun;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:DengXian;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"\@DengXian";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"\@SimSun";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#0563C1;
        text-decoration:underline;}
span.EmailStyle20
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div class="WordSection1">
<p class="MsoNormal">Hi Kevin,<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div style="border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt">
<div>
<div style="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal"><b><span lang="ZH-CN" style="font-family:DengXian">发件人</span></b><b><span style="font-family:DengXian">:</span></b><span style="font-family:DengXian"> Wang, Yang(Kevin) <KevinYang.Wang@amd.com>
<br>
<b><span lang="ZH-CN">发送时间</span>:</b> Monday, May 23, 2022 4:49 PM<br>
<b><span lang="ZH-CN">收件人</span>:</b> Yang, Stanley <Stanley.Yang@amd.com>; amd-gfx@lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com><br>
<b><span lang="ZH-CN">主题</span>:</b> Re: [PATCH Review 1/2] drm/amdgpu/pm: support mca_ceumc_addr in ecctable<o:p></o:p></span></p>
</div>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<p style="margin:5.0pt"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:blue">[AMD Official Use Only - General]<o:p></o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;color:black"><o:p> </o:p></span></p>
</div>
<div class="MsoNormal" align="center" style="text-align:center">
<hr size="2" width="98%" align="center">
</div>
<div id="divRplyFwdMsg">
<p class="MsoNormal"><b><span style="color:black">From:</span></b><span style="color:black"> amd-gfx <<a href="mailto:amd-gfx-bounces@lists.freedesktop.org">amd-gfx-bounces@lists.freedesktop.org</a>> on behalf of Stanley.Yang <<a href="mailto:Stanley.Yang@amd.com">Stanley.Yang@amd.com</a>><br>
<b>Sent:</b> Monday, May 23, 2022 4:17 PM<br>
<b>To:</b> <a href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a> <<a href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>>; Zhang, Hawking <<a href="mailto:Hawking.Zhang@amd.com">Hawking.Zhang@amd.com</a>>;
 Zhou1, Tao <<a href="mailto:Tao.Zhou1@amd.com">Tao.Zhou1@amd.com</a>>; Quan, Evan <<a href="mailto:Evan.Quan@amd.com">Evan.Quan@amd.com</a>>; Lazar, Lijo <<a href="mailto:Lijo.Lazar@amd.com">Lijo.Lazar@amd.com</a>><br>
<b>Cc:</b> Yang, Stanley <<a href="mailto:Stanley.Yang@amd.com">Stanley.Yang@amd.com</a>><br>
<b>Subject:</b> [PATCH Review 1/2] drm/amdgpu/pm: support mca_ceumc_addr in ecctable</span>
<o:p></o:p></p>
<div>
<p class="MsoNormal"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal">SMU add a new variable mca_ceumc_addr to record<br>
umc correctable error address in EccInfo table,<br>
driver side add ecctable_v2 to support this feature<br>
<br>
Signed-off-by: Stanley.Yang <<a href="mailto:Stanley.Yang@amd.com">Stanley.Yang@amd.com</a>><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h       |   1 +<br>
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |   2 +<br>
 .../inc/pmfw_if/smu13_driver_if_aldebaran.h   |  15 +++<br>
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c    | 101 ++++++++++++++----<br>
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    |   2 +<br>
 5 files changed, 98 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
index b9a6fac2b8b2..28e603243b67 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
@@ -328,6 +328,7 @@ struct ecc_info_per_ch {<br>
         uint16_t ce_count_hi_chip;<br>
         uint64_t mca_umc_status;<br>
         uint64_t mca_umc_addr;<br>
+       uint64_t mca_ceumc_addr;<br>
 };<br>
 <br>
 struct umc_ecc_info {<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h<br>
index a6a7b6c33683..9f7257ada437 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h<br>
@@ -322,6 +322,7 @@ enum smu_table_id<br>
         SMU_TABLE_PACE,<br>
         SMU_TABLE_ECCINFO,<br>
         SMU_TABLE_COMBO_PPTABLE,<br>
+       SMU_TABLE_ECCINFO_V2,<br>
         SMU_TABLE_COUNT,<br>
 };<br>
 <br>
@@ -340,6 +341,7 @@ struct smu_table_context<br>
         void                            *driver_pptable;<br>
         void                            *combo_pptable;<br>
         void                            *ecc_table;<br>
+       void                            *ecc_table_v2;  // adapt to smu support record mca_ceumc_addr<br>
         void                            *driver_smu_config_table;<br>
         struct smu_table                tables[SMU_TABLE_COUNT];<br>
         /*<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h<br>
index 0f67c56c2863..2868604eff49 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_aldebaran.h<br>
@@ -522,6 +522,21 @@ typedef struct {<br>
         EccInfo_t  EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];<br>
 } EccInfoTable_t;<br>
 <br>
+typedef struct {<br>
+       uint64_t mca_umc_status;<br>
+       uint64_t mca_umc_addr;<br>
+       uint64_t mca_ceumc_addr;<br>
+<br>
+       uint16_t ce_count_lo_chip;<br>
+       uint16_t ce_count_hi_chip;<br>
+<br>
+       uint32_t eccPadding;<br>
+} EccInfo_t_v2;<br>
+<br>
+typedef struct {<br>
+       EccInfo_t_v2  EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];<br>
+} EccInfoTable_t_v2;<br>
+<br>
 // These defines are used with the following messages:<br>
 // SMC_MSG_TransferTableDram2Smu<br>
 // SMC_MSG_TransferTableSmu2Dram<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c<br>
index 38af648cb857..e58df9490cec 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c<br>
@@ -82,6 +82,12 @@<br>
  */<br>
 #define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00<br>
 <br>
+/*<br>
+ * SMU support mca_ceumc_addr in ECCTABLE since version 68.55.0,<br>
+ * use this to check mca_ceumc_addr record whether support<br>
+ */<br>
+#define SUPPORT_ECCTABLE_V2_SMU_VERSION 0x00443700<br>
+<br>
 /*<br>
  * SMU support BAD CHENNEL info MSG since version 68.51.00,<br>
  * use this to check ECCTALE feature whether support<br>
@@ -239,6 +245,9 @@ static int aldebaran_tables_init(struct smu_context *smu)<br>
         SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),<br>
                        PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);<br>
 <br>
+       SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO_V2, sizeof(EccInfoTable_t_v2),<br>
+                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);<br>
+<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">[kevin]:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">this table mapping is not needed, the reason as below.<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">         smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);<br>
         if (!smu_table->metrics_table)<br>
                 return -ENOMEM;<br>
@@ -255,6 +264,10 @@ static int aldebaran_tables_init(struct smu_context *smu)<br>
         if (!smu_table->ecc_table)<br>
                 return -ENOMEM;<br>
 <br>
+       smu_table->ecc_table_v2 = kzalloc(tables[SMU_TABLE_ECCINFO_V2].size, GFP_KERNEL);<br>
+       if (!smu_table->ecc_table_v2)<br>
+               return -ENOMEM;;<br>
+<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">[kevin]:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">add eccinfo table v2 is not needed for this case, this table is only used store table data from pmfw to driver,<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">you can create a large enough table which can save ecc table data directly.<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal">e.g:<o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">size = max(sizeof(<span style="color:black;background:white">EccInfoTable_t_v2), sizeof(EccInfoTable_t));</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="color:black;background:white">smu_table->ecc_table = kzalloc(size, GFP_KERNEL);</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal"><span style="color:black;background:white">Best Regards,</span><o:p></o:p></p>
</div>
<div>
<p class="MsoNormal">Kevin<o:p></o:p></p>
<p class="MsoNormal"><b><i>[Yang, Stanley] :  this method is not forward compatible, or driver need complex convert to get the correct value, if new driver use an old pmfw.<o:p></o:p></i></b></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt">         return 0;<br>
 }<br>
 <br>
@@ -1802,7 +1815,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,<br>
         return sizeof(struct gpu_metrics_v1_3);<br>
 }<br>
 <br>
-static int aldebaran_check_ecc_table_support(struct smu_context *smu)<br>
+static int aldebaran_check_ecc_table_support(struct smu_context *smu,<br>
+               int *ecctable_version)<br>
 {<br>
         uint32_t if_version = 0xff, smu_version = 0xff;<br>
         int ret = 0;<br>
@@ -1815,6 +1829,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu)<br>
 <br>
         if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION)<br>
                 ret = -EOPNOTSUPP;<br>
+       else if (smu_version >= SUPPORT_ECCTABLE_SMU_VERSION &&<br>
+                       smu_version < SUPPORT_ECCTABLE_V2_SMU_VERSION)<br>
+               *ecctable_version = 1;<br>
+       else<br>
+               *ecctable_version = 2;<br>
 <br>
         return ret;<br>
 }<br>
@@ -1824,36 +1843,72 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,<br>
 {<br>
         struct smu_table_context *smu_table = &smu->smu_table;<br>
         EccInfoTable_t *ecc_table = NULL;<br>
+       EccInfoTable_t_v2 *ecc_table_v2 = NULL;<br>
         struct ecc_info_per_ch *ecc_info_per_channel = NULL;<br>
         int i, ret = 0;<br>
+       int table_version = 0;<br>
         struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;<br>
 <br>
-       ret = aldebaran_check_ecc_table_support(smu);<br>
+       ret = aldebaran_check_ecc_table_support(smu, &table_version);<br>
         if (ret)<br>
                 return ret;<br>
 <br>
-       ret = smu_cmn_update_table(smu,<br>
-                              SMU_TABLE_ECCINFO,<br>
-                              0,<br>
-                              smu_table->ecc_table,<br>
-                              false);<br>
-       if (ret) {<br>
-               dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n");<br>
-               return ret;<br>
-       }<br>
+       if (table_version == 1) {<br>
+               ret = smu_cmn_update_table(smu,<br>
+                                      SMU_TABLE_ECCINFO,<br>
+                                      0,<br>
+                                      smu_table->ecc_table,<br>
+                                      false);<br>
+               if (ret) {<br>
+                       dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n");<br>
+                       return ret;<br>
+               }<br>
+<br>
+               ecc_table = (EccInfoTable_t *)smu_table->ecc_table;<br>
+<br>
+               for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {<br>
+                       ecc_info_per_channel = &(eccinfo->ecc[i]);<br>
+                       ecc_info_per_channel->ce_count_lo_chip =<br>
+                               ecc_table->EccInfo[i].ce_count_lo_chip;<br>
+                       ecc_info_per_channel->ce_count_hi_chip =<br>
+                               ecc_table->EccInfo[i].ce_count_hi_chip;<br>
+                       ecc_info_per_channel->mca_umc_status =<br>
+                               ecc_table->EccInfo[i].mca_umc_status;<br>
+                       ecc_info_per_channel->mca_umc_addr =<br>
+                               ecc_table->EccInfo[i].mca_umc_addr;<br>
+               }<br>
+       } else if (table_version == 2) {<br>
+               /* still use SMU_TABLE_ECC_INFO index,<br>
+                * smu 68.55.0 add mca_ceumc_addr variable<br>
+                * in EccInfo_t struct to report correctable<br>
+                * error address and the table_id is not changed<br>
+                */<br>
+               ret = smu_cmn_update_table(smu,<br>
+                                      SMU_TABLE_ECCINFO,<br>
+                                      0,<br>
+                                      smu_table->ecc_table_v2,<br>
+                                          false);<br>
 <br>
-       ecc_table = (EccInfoTable_t *)smu_table->ecc_table;<br>
-<br>
-       for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {<br>
-               ecc_info_per_channel = &(eccinfo->ecc[i]);<br>
-               ecc_info_per_channel->ce_count_lo_chip =<br>
-                       ecc_table->EccInfo[i].ce_count_lo_chip;<br>
-               ecc_info_per_channel->ce_count_hi_chip =<br>
-                       ecc_table->EccInfo[i].ce_count_hi_chip;<br>
-               ecc_info_per_channel->mca_umc_status =<br>
-                       ecc_table->EccInfo[i].mca_umc_status;<br>
-               ecc_info_per_channel->mca_umc_addr =<br>
-                       ecc_table->EccInfo[i].mca_umc_addr;<br>
+               if (ret) {<br>
+                       dev_info(smu->adev->dev, "Failed to export SMU ecc table_v2!\n");<br>
+                       return ret;<br>
+               }<br>
+<br>
+               ecc_table_v2 = (EccInfoTable_t_v2 *)smu_table->ecc_table_v2;<br>
+<br>
+               for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {<br>
+                       ecc_info_per_channel = &(eccinfo->ecc[i]);<br>
+                       ecc_info_per_channel->ce_count_lo_chip =<br>
+                               ecc_table_v2->EccInfo[i].ce_count_lo_chip;<br>
+                       ecc_info_per_channel->ce_count_hi_chip =<br>
+                               ecc_table_v2->EccInfo[i].ce_count_hi_chip;<br>
+                       ecc_info_per_channel->mca_umc_status =<br>
+                               ecc_table_v2->EccInfo[i].mca_umc_status;<br>
+                       ecc_info_per_channel->mca_umc_addr =<br>
+                               ecc_table_v2->EccInfo[i].mca_umc_addr;<br>
+                       ecc_info_per_channel->mca_ceumc_addr =<br>
+                               ecc_table_v2->EccInfo[i].mca_ceumc_addr;<br>
+               }<br>
         }<br>
 <br>
         return ret;<br>
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c<br>
index ae6321af9d88..af2d84a16f3e 100644<br>
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c<br>
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c<br>
@@ -552,9 +552,11 @@ int smu_v13_0_fini_smc_tables(struct smu_context *smu)<br>
         kfree(smu_table->hardcode_pptable);<br>
         smu_table->hardcode_pptable = NULL;<br>
 <br>
+       kfree(smu_table->ecc_table_v2);<br>
         kfree(smu_table->ecc_table);<br>
         kfree(smu_table->metrics_table);<br>
         kfree(smu_table->watermarks_table);<br>
+       smu_table->ecc_table_v2 = NULL;<br>
         smu_table->ecc_table = NULL;<br>
         smu_table->metrics_table = NULL;<br>
         smu_table->watermarks_table = NULL;<br>
-- <br>
2.17.1<o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</div>
</body>
</html>