<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);" class="elementToProof">
</div>
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" color="#000000" style="font-size:11pt"><b>From:</b> amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Stanley.Yang <Stanley.Yang@amd.com><br>
<b>Sent:</b> Wednesday, May 25, 2022 2:10 PM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com><br>
<b>Cc:</b> Yang, Stanley <Stanley.Yang@amd.com><br>
<b>Subject:</b> [PATCH Review v3 2/2] drm/amdgpu: print umc correctable error address</font>
<div> </div>
</div>
<div class="BodyFragment">
<div class="PlainText elementToProof" style="font-size: 11pt;">Changed from V1:<br>
        remove unnecessary same row physical address calculation<br>
<br>
Changed from V2:<br>
        move record_ce_addr_supported to umc_ecc_info struct<br>
<br>
Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h       |  5 ++<br>
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c         | 50 ++++++++++++++++++-<br>
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c    |  1 +<br>
 3 files changed, 54 insertions(+), 2 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
index 28e603243b67..bf5a95104ec1 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h<br>
@@ -333,6 +333,11 @@ struct ecc_info_per_ch {<br>
 <br>
 struct umc_ecc_info {<br>
         struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];<br>
+<br>
+       /* Determine smu ecctable whether support<br>
+        * record correctable error address<br>
+        */<br>
+       int record_ce_addr_supported;<br>
 };<br>
 </div>
<div class="PlainText elementToProof" style="font-size: 11pt;">[kevin]:</div>
<div class="PlainText elementToProof" style="font-size: 11pt;">
<ol>
<li><span><span style="margin:0px;font-size:12pt;font-family:Calibri, Arial, Helvetica, sans-serif">the new field of <span style="margin:0px;font-size:14.6667px;font-family:"Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif;background-color:rgb(255, 255, 255);display:inline !important"><i>record_ce_addr_supported </i></span>is
 not set on sienna_cichlid chip.</span><br>
</span></li></ol>
</div>
<ol start="2">
<li class="PlainText elementToProof" style="font-size: 11pt;">and this field is better to renamed to others when this ecc table(pmfw side) update again in the furture. .e.g: ecc_table_version</li></ol>
<div class="PlainText elementToProof" style="font-size: 11pt;">
<div style="margin:0px;font-size:12pt;font-family:Calibri, Arial, Helvetica, sans-serif" class="elementToProof">
<br>
</div>
<div style="margin:0px;font-size:12pt;font-family:Calibri, Arial, Helvetica, sans-serif">
Best Regards</div>
<span style="margin:0px;font-size:12pt;font-family:Calibri, Arial, Helvetica, sans-serif">Kevin</span></div>
<div class="PlainText elementToProof" style=""><font face="Calibri, Arial, Helvetica, sans-serif"><br>
</font></div>
<div class="PlainText elementToProof" style=""><span style="font-size: 11pt;"> struct amdgpu_ras {</span><br>
<span style="font-size: 11pt;">diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c</span><br>
<span style="font-size: 11pt;">index 606892dbea1c..bf7524f16b66 100644</span><br>
<span style="font-size: 11pt;">--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c</span><br>
<span style="font-size: 11pt;">+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c</span><br>
<span style="font-size: 11pt;">@@ -119,6 +119,24 @@ static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device</span><br>
<span style="font-size: 11pt;">                 *error_count += 1;</span><br>
<span style="font-size: 11pt;"> </span><br>
<span style="font-size: 11pt;">                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+               if (ras->umc_ecc.record_ce_addr_supported)      {</span><br>
<span style="font-size: 11pt;">+                       uint64_t err_addr, soc_pa;</span><br>
<span style="font-size: 11pt;">+                       uint32_t channel_index =</span><br>
<span style="font-size: 11pt;">+                               adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;</span><br>
<span style="font-size: 11pt;">+                       err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);</span><br>
<span style="font-size: 11pt;">+                       /* translate umc channel address to soc pa, 3 parts are included */</span><br>
<span style="font-size: 11pt;">+                       soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |</span><br>
<span style="font-size: 11pt;">+                                       ADDR_OF_256B_BLOCK(channel_index) |</span><br>
<span style="font-size: 11pt;">+                                       OFFSET_IN_256B_BLOCK(err_addr);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       /* The umc channel bits are not original values, they are hashed */</span><br>
<span style="font-size: 11pt;">+                       SET_CHANNEL_HASH(channel_index, soc_pa);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);</span><br>
<span style="font-size: 11pt;">+               }</span><br>
<span style="font-size: 11pt;">         }</span><br>
<span style="font-size: 11pt;"> }</span><br>
<span style="font-size: 11pt;"> </span><br>
<span style="font-size: 11pt;">@@ -251,7 +269,9 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev</span><br>
<span style="font-size: 11pt;"> </span><br>
<span style="font-size: 11pt;"> static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,</span><br>
<span style="font-size: 11pt;">                                                    uint32_t umc_reg_offset,</span><br>
<span style="font-size: 11pt;">-                                                  unsigned long *error_count)</span><br>
<span style="font-size: 11pt;">+                                                  unsigned long *error_count,</span><br>
<span style="font-size: 11pt;">+                                                  uint32_t ch_inst,</span><br>
<span style="font-size: 11pt;">+                                                  uint32_t umc_inst)</span><br>
<span style="font-size: 11pt;"> {</span><br>
<span style="font-size: 11pt;">         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;</span><br>
<span style="font-size: 11pt;">         uint32_t ecc_err_cnt, ecc_err_cnt_addr;</span><br>
<span style="font-size: 11pt;">@@ -295,6 +315,31 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,</span><br>
<span style="font-size: 11pt;">                 *error_count += 1;</span><br>
<span style="font-size: 11pt;"> </span><br>
<span style="font-size: 11pt;">                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+               {</span><br>
<span style="font-size: 11pt;">+                       uint64_t err_addr, soc_pa;</span><br>
<span style="font-size: 11pt;">+                       uint32_t mc_umc_addrt0;</span><br>
<span style="font-size: 11pt;">+                       uint32_t channel_index;</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       mc_umc_addrt0 =</span><br>
<span style="font-size: 11pt;">+                               SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       channel_index =</span><br>
<span style="font-size: 11pt;">+                               adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);</span><br>
<span style="font-size: 11pt;">+                       err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       /* translate umc channel address to soc pa, 3 parts are included */</span><br>
<span style="font-size: 11pt;">+                       soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |</span><br>
<span style="font-size: 11pt;">+                                       ADDR_OF_256B_BLOCK(channel_index) |</span><br>
<span style="font-size: 11pt;">+                                       OFFSET_IN_256B_BLOCK(err_addr);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       /* The umc channel bits are not original values, they are hashed */</span><br>
<span style="font-size: 11pt;">+                       SET_CHANNEL_HASH(channel_index, soc_pa);</span><br>
<span style="font-size: 11pt;">+</span><br>
<span style="font-size: 11pt;">+                       dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);</span><br>
<span style="font-size: 11pt;">+               }</span><br>
<span style="font-size: 11pt;">         }</span><br>
<span style="font-size: 11pt;"> }</span><br>
<span style="font-size: 11pt;"> </span><br>
<span style="font-size: 11pt;">@@ -395,7 +440,8 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,</span><br>
<span style="font-size: 11pt;">                                                          ch_inst);</span><br>
<span style="font-size: 11pt;">                 umc_v6_7_query_correctable_error_count(adev,</span><br>
<span style="font-size: 11pt;">                                                        umc_reg_offset,</span><br>
<span style="font-size: 11pt;">-                                                      &(err_data->ce_count));</span><br>
<span style="font-size: 11pt;">+                                                      &(err_data->ce_count),</span><br>
<span style="font-size: 11pt;">+                                                      ch_inst, umc_inst);</span><br>
<span style="font-size: 11pt;">                 umc_v6_7_querry_uncorrectable_error_count(adev,</span><br>
<span style="font-size: 11pt;">                                                           umc_reg_offset,</span><br>
<span style="font-size: 11pt;">                                                           &(err_data->ue_count));</span><br>
<span style="font-size: 11pt;">diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c</span><br>
<span style="font-size: 11pt;">index 9cdfeea58085..c7e0fec614ea 100644</span><br>
<span style="font-size: 11pt;">--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c</span><br>
<span style="font-size: 11pt;">+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c</span><br>
<span style="font-size: 11pt;">@@ -1883,6 +1883,7 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,</span><br>
<span style="font-size: 11pt;">                         ecc_info_per_channel->mca_ceumc_addr =</span><br>
<span style="font-size: 11pt;">                                 ecc_table->EccInfo_V2[i].mca_ceumc_addr;</span><br>
<span style="font-size: 11pt;">                 }</span><br>
<span style="font-size: 11pt;">+               eccinfo->record_ce_addr_supported =1;</span><br>
<span style="font-size: 11pt;">         }</span><br>
<span style="font-size: 11pt;"> </span><br>
<span style="font-size: 11pt;">         return ret;</span><br>
<span style="font-size: 11pt;">-- </span><br>
<span style="font-size: 11pt;">2.17.1</span><br>
<br>
</div>
</div>
</div>
</body>
</html>