<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"><!-- P {margin-top:0;margin-bottom:0;} --></style>
</head>
<body dir="ltr">
<div id="divtagdefaultwrapper" style="font-size:12pt;color:#000000;font-family:Calibri,Helvetica,sans-serif;" dir="ltr">
<p style="margin-top:0;margin-bottom:0">Reviewed-by: Alex Deucher <alexander.deucher@amd.com><br>
</p>
</div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Christian König <ckoenig.leichtzumerken@gmail.com><br>
<b>Sent:</b> Friday, December 14, 2018 9:37:23 AM<br>
<b>To:</b> Deucher, Alexander; alexdeucher@gmail.com; amd-gfx@lists.freedesktop.org<br>
<b>Subject:</b> [PATCH] drm/amdgpu: fix IH overflow on Vega10</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">When an ring buffer overflow happens the appropriate bit is set in the WPTR<br>
register which is also written back to memory. But clearing the bit in the<br>
WPTR doesn't trigger another memory writeback.<br>
<br>
So what can happen is that we end up processing the buffer overflow over and<br>
over again because the bit is never cleared. Resulting in a random system<br>
lockup because of an infinite loop in an interrupt handler.<br>
<br>
This is 100% reproducible on Vega10, but it's most likely an issue we have<br>
in the driver over all generations all the way back to radeon.<br>
<br>
Signed-off-by: Christian König <christian.koenig@amd.com><br>
---<br>
drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 68 ++++++++++++++++----------<br>
1 file changed, 43 insertions(+), 25 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c<br>
index 992c8a8b8f77..0ab7785079c0 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c<br>
@@ -276,31 +276,49 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,<br>
<br>
wptr = le32_to_cpu(*ih->wptr_cpu);<br>
<br>
- if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {<br>
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);<br>
-<br>
- /* When a ring buffer overflow happen start parsing interrupt<br>
- * from the last not overwritten vector (wptr + 32). Hopefully<br>
- * this should allow us to catchup.<br>
- */<br>
- tmp = (wptr + 32) & ih->ptr_mask;<br>
- dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",<br>
- wptr, ih->rptr, tmp);<br>
- ih->rptr = tmp;<br>
-<br>
- if (ih == &adev->irq.ih)<br>
- reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);<br>
- else if (ih == &adev->irq.ih1)<br>
- reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);<br>
- else if (ih == &adev->irq.ih2)<br>
- reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);<br>
- else<br>
- BUG();<br>
-<br>
- tmp = RREG32_NO_KIQ(reg);<br>
- tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);<br>
- WREG32_NO_KIQ(reg, tmp);<br>
- }<br>
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))<br>
+ goto out;<br>
+<br>
+ /* Double check that the overflow wasn't already cleared. */<br>
+ if (ih == &adev->irq.ih)<br>
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);<br>
+ else if (ih == &adev->irq.ih1)<br>
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);<br>
+ else if (ih == &adev->irq.ih2)<br>
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);<br>
+ else<br>
+ BUG();<br>
+<br>
+ wptr = RREG32_NO_KIQ(reg);<br>
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))<br>
+ goto out;<br>
+<br>
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);<br>
+<br>
+ /* When a ring buffer overflow happen start parsing interrupt<br>
+ * from the last not overwritten vector (wptr + 32). Hopefully<br>
+ * this should allow us to catchup.<br>
+ */<br>
+ tmp = (wptr + 32) & ih->ptr_mask;<br>
+ dev_warn(adev->dev, "IH ring buffer overflow "<br>
+ "(0x%08X, 0x%08X, 0x%08X)\n",<br>
+ wptr, ih->rptr, tmp);<br>
+ ih->rptr = tmp;<br>
+<br>
+ if (ih == &adev->irq.ih)<br>
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);<br>
+ else if (ih == &adev->irq.ih1)<br>
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);<br>
+ else if (ih == &adev->irq.ih2)<br>
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);<br>
+ else<br>
+ BUG();<br>
+<br>
+ tmp = RREG32_NO_KIQ(reg);<br>
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);<br>
+ WREG32_NO_KIQ(reg, tmp);<br>
+<br>
+out:<br>
return (wptr & ih->ptr_mask);<br>
}<br>
<br>
-- <br>
2.17.1<br>
<br>
</div>
</span></font></div>
</body>
</html>