<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Reviewed-by: Alex Deucher <alexander.deucher@amd.com><br>
</div>
<div id="appendonsend"></div>
<div style="font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<br>
</div>
<hr tabindex="-1" style="display:inline-block; width:98%">
<div id="divRplyFwdMsg" dir="ltr"><font style="font-size:11pt" face="Calibri, sans-serif" color="#000000"><b>From:</b> amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Evan Quan <evan.quan@amd.com><br>
<b>Sent:</b> Thursday, June 27, 2019 11:31 PM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org<br>
<b>Cc:</b> Quan, Evan<br>
<b>Subject:</b> [PATCH] drm/amdgpu: fix MGPU fan boost enablement for XGMI reset</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt">
<div class="PlainText">MGPU fan boost feature should not be enabled until all the<br>
devices from the same hive are all back from reset.<br>
<br>
Change-Id: I03a69434ff28f4eac209bd91320dde8a238a33cf<br>
Signed-off-by: Evan Quan <evan.quan@amd.com><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  4 ++++<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++++++<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  4 ++--<br>
 3 files changed, 19 insertions(+), 2 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
index 7541e1b076b0..9efa0423c242 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
@@ -1219,6 +1219,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );<br>
 static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }<br>
 #endif<br>
 <br>
+<br>
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);<br>
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);<br>
+<br>
 #include "amdgpu_object.h"<br>
 <br>
 /* used by df_v3_6.c and amdgpu_pmu.c */<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
index a2d234c07fc4..f39eb7b37c8b 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
@@ -3558,6 +3558,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,<br>
                                 if (vram_lost)<br>
                                         amdgpu_device_fill_reset_magic(tmp_adev);<br>
 <br>
+                               /*<br>
+                                * Add this ASIC as tracked as reset was already<br>
+                                * complete successfully.<br>
+                                */<br>
+                               amdgpu_register_gpu_instance(tmp_adev);<br>
+<br>
                                 r = amdgpu_device_ip_late_init(tmp_adev);<br>
                                 if (r)<br>
                                         goto out;<br>
@@ -3692,6 +3698,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,<br>
                 device_list_handle = &device_list;<br>
         }<br>
 <br>
+       /*<br>
+        * Mark these ASICs to be reseted as untracked first<br>
+        * And add them back after reset completed<br>
+        */<br>
+       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)<br>
+               amdgpu_unregister_gpu_instance(tmp_adev);<br>
+<br>
         /* block all schedulers and reset given job's ring */<br>
         list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {<br>
                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c<br>
index ed051fdb509f..e2c9d8d31ed8 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c<br>
@@ -41,7 +41,7 @@<br>
 #include "amdgpu_display.h"<br>
 #include "amdgpu_ras.h"<br>
 <br>
-static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)<br>
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)<br>
 {<br>
         struct amdgpu_gpu_instance *gpu_instance;<br>
         int i;<br>
@@ -102,7 +102,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)<br>
         dev->dev_private = NULL;<br>
 }<br>
 <br>
-static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)<br>
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)<br>
 {<br>
         struct amdgpu_gpu_instance *gpu_instance;<br>
 <br>
-- <br>
2.21.0<br>
<br>
_______________________________________________<br>
amd-gfx mailing list<br>
amd-gfx@lists.freedesktop.org<br>
<a href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a></div>
</span></font></div>
</body>
</html>