<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:DengXian;
        panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"\@DengXian";
        panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:10.0pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
span.EmailStyle19
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
--></style>
</head>
<body lang="EN-US" link="blue" vlink="purple" style="word-wrap:break-word">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div class="WordSection1">
<p class="MsoNormal"><span style="font-size:11.0pt">Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="font-size:11.0pt">Regards,<br>
Hawking<o:p></o:p></span></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="font-size:12.0pt;color:black">From:
</span></b><span style="font-size:12.0pt;color:black">amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Liu, Shaoyun <Shaoyun.Liu@amd.com><br>
<b>Date: </b>Thursday, September 8, 2022 at 08:05<br>
<b>To: </b>amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Subject: </b>RE: [PATCH] drm/amdgpu: Use per device reset_domain for XGMI on sriov configuration<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt"><span style="font-size:11.0pt">[AMD Official Use Only - General]<br>
<br>
[AMD Official Use Only - General]<br>
<br>
ping<br>
<br>
-----Original Message-----<br>
From: Liu, Shaoyun <Shaoyun.Liu@amd.com><br>
Sent: Wednesday, September 7, 2022 11:38 AM<br>
To: amd-gfx@lists.freedesktop.org<br>
Cc: Liu, Shaoyun <Shaoyun.Liu@amd.com><br>
Subject: [PATCH] drm/amdgpu: Use per device reset_domain for XGMI on sriov configuration<br>
<br>
For SRIOV configuration, host driver control the reset method(either FLR or heavier chain reset). The host will notify the guest individually with FLR message if individual GPU within the hive need to be reset. So for guest side, no need to use hive->reset_domain
 to replace the original per device reset_domain<br>
<br>
Signed-off-by: shaoyunl <shaoyun.liu@amd.com><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 ++++++------<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 36 +++++++++++++---------<br>
 2 files changed, 33 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
index 62b26f0e37b0..a5533e0d9d6c 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
@@ -2453,17 +2453,19 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)<br>
                if (amdgpu_xgmi_add_device(adev) == 0) {<br>
                        struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);<br>
<br>
-                       if (!hive->reset_domain ||<br>
-                           !amdgpu_reset_get_reset_domain(hive->reset_domain)) {<br>
-                               r = -ENOENT;<br>
+                       if(!amdgpu_sriov_vf(adev)) {<br>
+                               if (!hive->reset_domain ||<br>
+                                   !amdgpu_reset_get_reset_domain(hive->reset_domain)) {<br>
+                                       r = -ENOENT;<br>
+                                       amdgpu_put_xgmi_hive(hive);<br>
+                                       goto init_failed;<br>
+                               }<br>
+<br>
+                               /* Drop the early temporary reset domain we created for device */<br>
+                               amdgpu_reset_put_reset_domain(adev->reset_domain);<br>
+                               adev->reset_domain = hive->reset_domain;<br>
                                amdgpu_put_xgmi_hive(hive);<br>
-                               goto init_failed;<br>
                        }<br>
-<br>
-                       /* Drop the early temporary reset domain we created for device */<br>
-                       amdgpu_reset_put_reset_domain(adev->reset_domain);<br>
-                       adev->reset_domain = hive->reset_domain;<br>
-                       amdgpu_put_xgmi_hive(hive);<br>
                }<br>
        }<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c<br>
index d3b483aa81f8..a78b589e4f4f 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c<br>
@@ -391,24 +391,32 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)<br>
                goto pro_end;<br>
        }<br>
<br>
+       /**<br>
+        * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,<br>
+        * Host driver decide how to reset the GPU either through FLR or chain reset.<br>
+        * Guest side will get individual notifications from the host for the FLR<br>
+        * if necessary.<br>
+        */<br>
+       if (!amdgpu_sriov_vf(adev)) {<br>
        /**<br>
         * Avoid recreating reset domain when hive is reconstructed for the case<br>
-        * of reset the devices in the XGMI hive during probe for SRIOV<br>
+        * of reset the devices in the XGMI hive during probe for passthrough<br>
+GPU<br>
         * See <a href="https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Famd-gfx%2Fmsg58836.html&amp;data=05%7C01%7Chawking.zhang%40amd.com%7C1cc58853b47048a4e25208da912dc783%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637981923054102228%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=DmZeEevkv%2FlVa07m0HUCkC6ozH0BgJ4uPGnYe41NAOs%3D&amp;reserved=0">
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Famd-gfx%2Fmsg58836.html&amp;data=05%7C01%7Chawking.zhang%40amd.com%7C1cc58853b47048a4e25208da912dc783%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637981923054102228%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=DmZeEevkv%2FlVa07m0HUCkC6ozH0BgJ4uPGnYe41NAOs%3D&amp;reserved=0</a><br>
         */<br>
-       if (adev->reset_domain->type != XGMI_HIVE) {<br>
-               hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");<br>
-                       if (!hive->reset_domain) {<br>
-                               dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");<br>
-                               ret = -ENOMEM;<br>
-                               kobject_put(&hive->kobj);<br>
-                               kfree(hive);<br>
-                               hive = NULL;<br>
-                               goto pro_end;<br>
-                       }<br>
-       } else {<br>
-               amdgpu_reset_get_reset_domain(adev->reset_domain);<br>
-               hive->reset_domain = adev->reset_domain;<br>
+               if (adev->reset_domain->type != XGMI_HIVE) {<br>
+                       hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");<br>
+                               if (!hive->reset_domain) {<br>
+                                       dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");<br>
+                                       ret = -ENOMEM;<br>
+                                       kobject_put(&hive->kobj);<br>
+                                       kfree(hive);<br>
+                                       hive = NULL;<br>
+                                       goto pro_end;<br>
+                               }<br>
+               } else {<br>
+                       amdgpu_reset_get_reset_domain(adev->reset_domain);<br>
+                       hive->reset_domain = adev->reset_domain;<br>
+               }<br>
        }<br>
<br>
        hive->hive_id = adev->gmc.xgmi.hive_id;<br>
--<br>
2.17.1<o:p></o:p></span></p>
</div>
</div>
</div>
</body>
</html>