<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
{font-family:宋体;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:DengXian;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:"\@等线";
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:"\@宋体";
panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
font-size:12.0pt;
font-family:宋体;}
span.EmailStyle19
{mso-style-type:personal-reply;
font-family:DengXian;
color:windowtext;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;}
@page WordSection1
{size:612.0pt 792.0pt;
margin:72.0pt 90.0pt 72.0pt 90.0pt;}
div.WordSection1
{page:WordSection1;}
--></style>
</head>
<body lang="ZH-CN" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<p style="font-family:Calibri;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<div class="WordSection1">
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian">Thanks tao, will update before submit.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian">Regards,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian">Stanley<o:p></o:p></span></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="color:black">发件人</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> Zhou1,
Tao <Tao.Zhou1@amd.com><br>
</span><b><span style="color:black">日期</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">
</span><span style="color:black">星期四</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">, 2022</span><span style="color:black">年</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">5</span><span style="color:black">月</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">19</span><span style="color:black">日</span><span style="font-family:"Calibri",sans-serif;color:black">
</span><span style="color:black">上午</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">10:30<br>
</span><b><span style="color:black">收件人</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> Yang, Stanley <Stanley.Yang@amd.com>, amd-gfx@lists.freedesktop.org
<amd-gfx@lists.freedesktop.org>, Zhang, Hawking <Hawking.Zhang@amd.com><br>
</span><b><span style="color:black">抄送</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> Yang, Stanley <Stanley.Yang@amd.com><br>
</span><b><span style="color:black">主题</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> RE: [PATCH Review 1/1] drm/amdgpu: support ras on
SRIOV<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt"><span lang="EN-US" style="font-size:11.0pt"><br>
<br>
> -----Original Message-----<br>
> From: Stanley.Yang <Stanley.Yang@amd.com><br>
> Sent: Wednesday, May 18, 2022 11:44 PM<br>
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking<br>
> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com><br>
> Cc: Yang, Stanley <Stanley.Yang@amd.com><br>
> Subject: [PATCH Review 1/1] drm/amdgpu: support ras on SRIOV<br>
> <br>
> support umc/gfx/sdma ras on guest side<br>
> <br>
> Changed from V1:<br>
> move sriov judgment in amdgpu_ras_interrupt_fatal_error_handler<br>
> <br>
> Change-Id: Ic7dda45d8f8cf2d5f1abc7705abc153d558da8a1<br>
> Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com><br>
> ---<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 42 ++++++++++++++++------<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 +++<br>
> drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 9 +++--<br>
> 4 files changed, 45 insertions(+), 14 deletions(-)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> index b583026dc893..ba7990d0dc0e 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> @@ -5218,6 +5218,10 @@ int amdgpu_device_gpu_recover_imp(struct<br>
> amdgpu_device *adev,<br>
> r = amdgpu_device_reset_sriov(adev, job ? false : true);<br>
> if (r)<br>
> adev->asic_reset_res = r;<br>
> +<br>
> + /* Aldebaran supports ras in SRIOV, so need resume ras during<br>
> reset */<br>
> + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))<br>
> + amdgpu_ras_resume(adev);<br>
> } else {<br>
> r = amdgpu_do_asic_reset(device_list_handle, &reset_context);<br>
> if (r && r == -EAGAIN)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> index a653cf3b3d13..2b28210c4994 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> @@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device<br>
> *adev,<br>
> /* Do not enable if it is not allowed. */<br>
> WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));<br>
> <br>
> - if (!amdgpu_ras_intr_triggered()) {<br>
> + /* Only enable ras feature operation handle on host side */<br>
> + if (!amdgpu_sriov_vf(adev) &&<br>
> + !amdgpu_ras_intr_triggered()) {<br>
> ret = psp_ras_enable_features(&adev->psp, info, enable);<br>
> if (ret) {<br>
> dev_err(adev->dev, "ras %s %s failed poison:%d<br>
> ret:%d\n", @@ -1523,6 +1525,10 @@ static int amdgpu_ras_fs_fini(struct<br>
> amdgpu_device *adev)<br>
> */<br>
> void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) {<br>
> + /* Fatal error events are handled on host side */<br>
> + if (amdgpu_sriov_vf(adev))<br>
> + return;<br>
> +<br>
> if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))<br>
> return;<br>
<br>
[Tao] The two conditions above can be merged, other than that the patch is:<br>
<br>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com><br>
<br>
> <br>
> @@ -2270,10 +2276,14 @@ static void amdgpu_ras_check_supported(struct<br>
> amdgpu_device *adev) {<br>
> adev->ras_hw_enabled = adev->ras_enabled = 0;<br>
> <br>
> - if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||<br>
> + if (!adev->is_atom_fw ||<br>
> !amdgpu_ras_asic_supported(adev))<br>
> return;<br>
> <br>
> + if (!(amdgpu_sriov_vf(adev) &&<br>
> + (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))))<br>
> + return;<br>
> +<br>
> if (!adev->gmc.xgmi.connected_to_cpu) {<br>
> if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {<br>
> dev_info(adev->dev, "MEM ECC is active.\n"); @@ -<br>
> 2285,15 +2295,21 @@ static void amdgpu_ras_check_supported(struct<br>
> amdgpu_device *adev)<br>
> <br>
> if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {<br>
> dev_info(adev->dev, "SRAM ECC is active.\n");<br>
> - adev->ras_hw_enabled |= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__UMC |<br>
> - 1 <<<br>
> AMDGPU_RAS_BLOCK__DF);<br>
> -<br>
> - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2,<br>
> 6, 0))<br>
> - adev->ras_hw_enabled |= (1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> - 1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> - else<br>
> - adev->ras_hw_enabled &= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> - 1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> + if (!amdgpu_sriov_vf(adev)) {<br>
> + adev->ras_hw_enabled |= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__UMC |<br>
> + 1 <<<br>
> AMDGPU_RAS_BLOCK__DF);<br>
> +<br>
> + if (adev->ip_versions[VCN_HWIP][0] ==<br>
> IP_VERSION(2, 6, 0))<br>
> + adev->ras_hw_enabled |= (1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> + 1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> + else<br>
> + adev->ras_hw_enabled &= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> + 1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> + } else {<br>
> + adev->ras_hw_enabled |= (1 <<<br>
> AMDGPU_RAS_BLOCK__PCIE_BIF |<br>
> + 1 <<<br>
> AMDGPU_RAS_BLOCK__SDMA |<br>
> + 1 <<<br>
> AMDGPU_RAS_BLOCK__GFX);<br>
> + }<br>
> } else {<br>
> dev_info(adev->dev, "SRAM ECC is not presented.\n");<br>
> }<br>
> @@ -2637,6 +2653,10 @@ int amdgpu_ras_late_init(struct amdgpu_device<br>
> *adev)<br>
> struct amdgpu_ras_block_object *obj;<br>
> int r;<br>
> <br>
> + /* Guest side doesn't need init ras feature */<br>
> + if (amdgpu_sriov_vf(adev))<br>
> + return 0;<br>
> +<br>
> list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {<br>
> if (!node->ras_obj) {<br>
> dev_warn(adev->dev, "Warning: abnormal ras list<br>
> node.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> index 8e221a1ba937..42c1f050542f 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> @@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct<br>
> amdgpu_device *adev,<br>
> struct amdgpu_iv_entry *entry)<br>
> {<br>
> kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);<br>
> +<br>
> + if (amdgpu_sriov_vf(adev))<br>
> + return AMDGPU_RAS_SUCCESS;<br>
> +<br>
> amdgpu_ras_reset_gpu(adev);<br>
> <br>
> return AMDGPU_RAS_SUCCESS;<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> index d6d79e97def9..18014ed0e853 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> @@ -85,9 +85,12 @@ static int psp_v13_0_init_microcode(struct psp_context<br>
> *psp)<br>
> err = psp_init_sos_microcode(psp, chip_name);<br>
> if (err)<br>
> return err;<br>
> - err = psp_init_ta_microcode(&adev->psp, chip_name);<br>
> - if (err)<br>
> - return err;<br>
> + /* It's not necessary to load ras ta on Guest side */<br>
> + if (!amdgpu_sriov_vf(adev)) {<br>
> + err = psp_init_ta_microcode(&adev->psp, chip_name);<br>
> + if (err)<br>
> + return err;<br>
> + }<br>
> break;<br>
> case IP_VERSION(13, 0, 1):<br>
> case IP_VERSION(13, 0, 3):<br>
> --<br>
> 2.17.1<o:p></o:p></span></p>
</div>
</div>
</div>
</body>
</html>