<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Calibri;font-size:10pt;color:#0000FF;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;" align="Left">
[AMD Official Use Only - AMD Internal Distribution Only]<br>
</p>
<br>
<div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
Abandon this patch as a modified new patch is sent for review.</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
Regards,</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);">
Shikang</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Shikang Fan <shikang.fan@amd.com><br>
<b>Sent:</b> Friday, November 8, 2024 3:44 PM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Fan, Shikang <Shikang.Fan@amd.com>; Liu01, Tong (Esther) <Tong.Liu01@amd.com>; Deng, Emily <Emily.Deng@amd.com><br>
<b>Subject:</b> [PATCH] [PATCH] drm/amdgpu/sriov: Check pending job finished or not to identify has bad job</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">drm_sched_free_job_work is a queue work function,<br>
so even job is finished in hw, it still needs some time to<br>
be deleted from the pending queue by drm_sched_free_job_work.<br>
here iterates over the pending job list and wait for each job to finish<br>
within specified timeout (1s by default) to avoid jobs that are not<br>
cleaned up in time or are about to finished.<br>
if wait timeout, return true<br>
<br>
Signed-off-by: Tong Liu01 <Tong.Liu01@amd.com><br>
Signed-off-by: Emily Deng <Emily.Deng@amd.com><br>
Signed-off-by: Shikang Fan <shikang.fan@amd.com><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 21 ++++++++++++++++-----<br>
 1 file changed, 16 insertions(+), 5 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
index 6c0ff1c2ae4c..83ce1c85e680 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
@@ -100,6 +100,7 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");<br>
 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)<br>
 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)<br>
 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)<br>
+#define AMDGPU_PENDING_JOB_TIMEOUT     msecs_to_jiffies(1000)<br>
 <br>
 static const struct drm_driver amdgpu_kms_driver;<br>
 <br>
@@ -5198,7 +5199,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,<br>
 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)<br>
 {<br>
         int i;<br>
-       struct drm_sched_job *job;<br>
+       struct drm_sched_job *job, *tmp;<br>
+       long r;<br>
 <br>
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {<br>
                 struct amdgpu_ring *ring = adev->rings[i];<br>
@@ -5207,11 +5209,20 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)<br>
                         continue;<br>
 <br>
                 spin_lock(&ring->sched.job_list_lock);<br>
-               job = list_first_entry_or_null(&ring->sched.pending_list,<br>
-                                              struct drm_sched_job, list);<br>
+<br>
+               /* iterates over the pending job list<br>
+                * wait for each job to finish within timeout (1s by default)<br>
+                * if wait timeout, return true<br>
+                */<br>
+               list_for_each_entry_safe(job, tmp, &ring->sched.pending_list, list) {<br>
+                       r = dma_fence_wait_timeout(&job->s_fence->finished,<br>
+                                                               false, AMDGPU_PENDING_JOB_TIMEOUT);<br>
+                       if (r <= 0) {<br>
+                               spin_unlock(&ring->sched.job_list_lock);<br>
+                               return true;<br>
+                       }<br>
+               }<br>
                 spin_unlock(&ring->sched.job_list_lock);<br>
-               if (job)<br>
-                       return true;<br>
         }<br>
         return false;<br>
 }<br>
-- <br>
2.34.1<br>
<br>
</div>
</span></font></div>
</div>
</body>
</html>