<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
</head>
<body>
<p style="font-family:Calibri;font-size:10pt;color:#008000;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;" align="Left">
[Public]<br>
</p>
<br>
<div>
<div dir="ltr">
<div dir="ltr">one comment belon, other than that, looks good to me. Feel free to add<span> my RB if you send a v2. </span></div>
<div id="ms-outlook-mobile-signature">
<div><br>
</div>
<div style="color: rgb(33, 33, 33); background-color: rgb(255, 255, 255);" dir="auto">
Regards,</div>
<div style="color: rgb(33, 33, 33); background-color: rgb(255, 255, 255);" dir="auto">
Rajneesh</div>
</div>
<div id="mail-editor-reference-message-container" class="ms-outlook-mobile-reference-message">
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif"><b>From:</b> Lazar, Lijo <Lijo.Lazar@amd.com><br>
<b>Sent:</b> Thursday, October 17, 2024 5:10 AM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Zhang, Hawking <Hawking.Zhang@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Bhardwaj, Rajneesh <Rajneesh.Bhardwaj@amd.com>; Errabolu, Ramesh <Ramesh.Errabolu@amd.com><br>
<b>Subject:</b> [PATCH] drm/amdgpu: Fix the logic for NPS request failure
<div> </div>
</font></div>
<meta name="Generator" content="Microsoft Exchange Server">
<!-- converted from text --><font size="2"><span style="font-size:11pt;">
<div class="PlainText">On a hive, NPS request is placed by the first one for all devices in the<br>
hive. If the request fails, mark the mode as UNKNOWN so that subsequent<br>
devices on unload don't request it. Also, fix the mutex double lock<br>
issue in error condition, should have been mutex_unlock.<br>
<br>
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com><br>
<br>
Fixes: 44d5206ec07c ("drm/amdgpu: Place NPS mode request on unload")<br>
---<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 27 +++++++++++++-----------<br>
1 file changed, 15 insertions(+), 12 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c<br>
index fcdbcff57632..d2c25af2c5fe 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c<br>
@@ -1586,26 +1586,29 @@ int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,<br>
* devices don't request anymore.<br>
*/<br>
mutex_lock(&hive->hive_lock);<br>
+ if (atomic_read(&hive->requested_nps_mode) ==<br>
+ UNKNOWN_MEMORY_PARTITION_MODE) {<br>
+ mutex_unlock(&hive->hive_lock);</div>
<div class="PlainText" dir="ltr"><br>
</div>
<div class="PlainText" dir="ltr">Maybe a warning or debug print here is useful?</div>
<div class="PlainText"><br>
+ return 0;<br>
+ }<br>
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {<br>
r = adev->gmc.gmc_funcs->request_mem_partition_mode(<br>
tmp_adev, req_nps_mode);<br>
if (r)<br>
- goto err;<br>
+ break;<br>
+ }<br>
+ if (r) {<br>
+ /* Request back current mode if one of the requests failed */<br>
+ cur_nps_mode =<br>
+ adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);<br>
+ list_for_each_entry_continue_reverse(<br>
+ tmp_adev, &hive->device_list, gmc.xgmi.head)<br>
+ adev->gmc.gmc_funcs->request_mem_partition_mode(<br>
+ tmp_adev, cur_nps_mode);<br>
}<br>
/* Set to UNKNOWN so that other devices don't request anymore */<br>
atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);<br>
-<br>
mutex_unlock(&hive->hive_lock);<br>
<br>
- return 0;<br>
-err:<br>
- /* Request back current mode if one of the requests failed */<br>
- cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);<br>
- list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list,<br>
- gmc.xgmi.head)<br>
- adev->gmc.gmc_funcs->request_mem_partition_mode(tmp_adev,<br>
- cur_nps_mode);<br>
- mutex_lock(&hive->hive_lock);<br>
-<br>
return r;<br>
}<br>
-- <br>
2.25.1<br>
<br>
</div>
</span></font></div>
</div>
</div>
</body>
</html>