<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:宋体;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:DengXian;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"\@等线";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:"\@宋体";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        font-size:12.0pt;
        font-family:宋体;}
span.EmailStyle19
        {mso-style-type:personal-reply;
        font-family:DengXian;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 90.0pt 72.0pt 90.0pt;}
div.WordSection1
        {page:WordSection1;}
--></style>
</head>
<body lang="ZH-CN" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<p style="font-family:Calibri;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<div class="WordSection1">
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian">Thanks tao, will update before submit.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian">Regards,<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-size:10.5pt;font-family:DengXian">Stanley<o:p></o:p></span></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="color:black">发件人</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> Zhou1,
 Tao <Tao.Zhou1@amd.com><br>
</span><b><span style="color:black">日期</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">
</span><span style="color:black">星期四</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">, 2022</span><span style="color:black">年</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">5</span><span style="color:black">月</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">19</span><span style="color:black">日</span><span style="font-family:"Calibri",sans-serif;color:black">
</span><span style="color:black">上午</span><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">10:30<br>
</span><b><span style="color:black">收件人</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> Yang, Stanley <Stanley.Yang@amd.com>, amd-gfx@lists.freedesktop.org
 <amd-gfx@lists.freedesktop.org>, Zhang, Hawking <Hawking.Zhang@amd.com><br>
</span><b><span style="color:black">抄送</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> Yang, Stanley <Stanley.Yang@amd.com><br>
</span><b><span style="color:black">主题</span></b><b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black">:</span></b><span lang="EN-US" style="font-family:"Calibri",sans-serif;color:black"> RE: [PATCH Review 1/1] drm/amdgpu: support ras on
 SRIOV<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt"><span lang="EN-US" style="font-size:11.0pt"><br>
<br>
> -----Original Message-----<br>
> From: Stanley.Yang <Stanley.Yang@amd.com><br>
> Sent: Wednesday, May 18, 2022 11:44 PM<br>
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking<br>
> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com><br>
> Cc: Yang, Stanley <Stanley.Yang@amd.com><br>
> Subject: [PATCH Review 1/1] drm/amdgpu: support ras on SRIOV<br>
> <br>
> support umc/gfx/sdma ras on guest side<br>
> <br>
> Changed from V1:<br>
>     move sriov judgment in amdgpu_ras_interrupt_fatal_error_handler<br>
> <br>
> Change-Id: Ic7dda45d8f8cf2d5f1abc7705abc153d558da8a1<br>
> Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com><br>
> ---<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 +++<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 42 ++++++++++++++++------<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c   |  4 +++<br>
>  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c     |  9 +++--<br>
>  4 files changed, 45 insertions(+), 14 deletions(-)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> index b583026dc893..ba7990d0dc0e 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> @@ -5218,6 +5218,10 @@ int amdgpu_device_gpu_recover_imp(struct<br>
> amdgpu_device *adev,<br>
>                r = amdgpu_device_reset_sriov(adev, job ? false : true);<br>
>                if (r)<br>
>                        adev->asic_reset_res = r;<br>
> +<br>
> +             /* Aldebaran supports ras in SRIOV, so need resume ras during<br>
> reset */<br>
> +             if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))<br>
> +                     amdgpu_ras_resume(adev);<br>
>        } else {<br>
>                r = amdgpu_do_asic_reset(device_list_handle, &reset_context);<br>
>                if (r && r == -EAGAIN)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> index a653cf3b3d13..2b28210c4994 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c<br>
> @@ -726,7 +726,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device<br>
> *adev,<br>
>        /* Do not enable if it is not allowed. */<br>
>        WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));<br>
> <br>
> -     if (!amdgpu_ras_intr_triggered()) {<br>
> +     /* Only enable ras feature operation handle on host side */<br>
> +     if (!amdgpu_sriov_vf(adev) &&<br>
> +             !amdgpu_ras_intr_triggered()) {<br>
>                ret = psp_ras_enable_features(&adev->psp, info, enable);<br>
>                if (ret) {<br>
>                        dev_err(adev->dev, "ras %s %s failed poison:%d<br>
> ret:%d\n", @@ -1523,6 +1525,10 @@ static int amdgpu_ras_fs_fini(struct<br>
> amdgpu_device *adev)<br>
>   */<br>
>  void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)  {<br>
> +     /* Fatal error events are handled on host side */<br>
> +     if (amdgpu_sriov_vf(adev))<br>
> +             return;<br>
> +<br>
>        if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))<br>
>                return;<br>
<br>
[Tao] The two conditions above can be merged, other than that the patch is:<br>
<br>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com><br>
<br>
> <br>
> @@ -2270,10 +2276,14 @@ static void amdgpu_ras_check_supported(struct<br>
> amdgpu_device *adev)  {<br>
>        adev->ras_hw_enabled = adev->ras_enabled = 0;<br>
> <br>
> -     if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||<br>
> +     if (!adev->is_atom_fw ||<br>
>            !amdgpu_ras_asic_supported(adev))<br>
>                return;<br>
> <br>
> +     if (!(amdgpu_sriov_vf(adev) &&<br>
> +             (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))))<br>
> +             return;<br>
> +<br>
>        if (!adev->gmc.xgmi.connected_to_cpu) {<br>
>                if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {<br>
>                        dev_info(adev->dev, "MEM ECC is active.\n"); @@ -<br>
> 2285,15 +2295,21 @@ static void amdgpu_ras_check_supported(struct<br>
> amdgpu_device *adev)<br>
> <br>
>                if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {<br>
>                        dev_info(adev->dev, "SRAM ECC is active.\n");<br>
> -                     adev->ras_hw_enabled |= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__UMC |<br>
> -                                                 1 <<<br>
> AMDGPU_RAS_BLOCK__DF);<br>
> -<br>
> -                     if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2,<br>
> 6, 0))<br>
> -                             adev->ras_hw_enabled |= (1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> -                                             1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> -                     else<br>
> -                             adev->ras_hw_enabled &= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> -                                             1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> +                     if (!amdgpu_sriov_vf(adev)) {<br>
> +                             adev->ras_hw_enabled |= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__UMC |<br>
> +                                                         1 <<<br>
> AMDGPU_RAS_BLOCK__DF);<br>
> +<br>
> +                             if (adev->ip_versions[VCN_HWIP][0] ==<br>
> IP_VERSION(2, 6, 0))<br>
> +                                     adev->ras_hw_enabled |= (1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> +                                                     1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> +                             else<br>
> +                                     adev->ras_hw_enabled &= ~(1 <<<br>
> AMDGPU_RAS_BLOCK__VCN |<br>
> +                                                     1 <<<br>
> AMDGPU_RAS_BLOCK__JPEG);<br>
> +                     } else {<br>
> +                             adev->ras_hw_enabled |= (1 <<<br>
> AMDGPU_RAS_BLOCK__PCIE_BIF |<br>
> +                                                             1 <<<br>
> AMDGPU_RAS_BLOCK__SDMA |<br>
> +                                                             1 <<<br>
> AMDGPU_RAS_BLOCK__GFX);<br>
> +                     }<br>
>                } else {<br>
>                        dev_info(adev->dev, "SRAM ECC is not presented.\n");<br>
>                }<br>
> @@ -2637,6 +2653,10 @@ int amdgpu_ras_late_init(struct amdgpu_device<br>
> *adev)<br>
>        struct amdgpu_ras_block_object *obj;<br>
>        int r;<br>
> <br>
> +     /* Guest side doesn't need init ras feature */<br>
> +     if (amdgpu_sriov_vf(adev))<br>
> +             return 0;<br>
> +<br>
>        list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {<br>
>                if (!node->ras_obj) {<br>
>                        dev_warn(adev->dev, "Warning: abnormal ras list<br>
> node.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> index 8e221a1ba937..42c1f050542f 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c<br>
> @@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct<br>
> amdgpu_device *adev,<br>
>                struct amdgpu_iv_entry *entry)<br>
>  {<br>
>        kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);<br>
> +<br>
> +     if (amdgpu_sriov_vf(adev))<br>
> +             return AMDGPU_RAS_SUCCESS;<br>
> +<br>
>        amdgpu_ras_reset_gpu(adev);<br>
> <br>
>        return AMDGPU_RAS_SUCCESS;<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> index d6d79e97def9..18014ed0e853 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c<br>
> @@ -85,9 +85,12 @@ static int psp_v13_0_init_microcode(struct psp_context<br>
> *psp)<br>
>                err = psp_init_sos_microcode(psp, chip_name);<br>
>                if (err)<br>
>                        return err;<br>
> -             err = psp_init_ta_microcode(&adev->psp, chip_name);<br>
> -             if (err)<br>
> -                     return err;<br>
> +             /* It's not necessary to load ras ta on Guest side */<br>
> +             if (!amdgpu_sriov_vf(adev)) {<br>
> +                     err = psp_init_ta_microcode(&adev->psp, chip_name);<br>
> +                     if (err)<br>
> +                             return err;<br>
> +             }<br>
>                break;<br>
>        case IP_VERSION(13, 0, 1):<br>
>        case IP_VERSION(13, 0, 3):<br>
> --<br>
> 2.17.1<o:p></o:p></span></p>
</div>
</div>
</div>
</body>
</html>