[PATCH] drm/amdgpu: fix IH overflow on Vega10

Deucher, Alexander Alexander.Deucher at amd.com
Fri Dec 14 15:47:55 UTC 2018


Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

________________________________
From: Christian König <ckoenig.leichtzumerken at gmail.com>
Sent: Friday, December 14, 2018 9:37:23 AM
To: Deucher, Alexander; alexdeucher at gmail.com; amd-gfx at lists.freedesktop.org
Subject: [PATCH] drm/amdgpu: fix IH overflow on Vega10

When an ring buffer overflow happens the appropriate bit is set in the WPTR
register which is also written back to memory. But clearing the bit in the
WPTR doesn't trigger another memory writeback.

So what can happen is that we end up processing the buffer overflow over and
over again because the bit is never cleared. Resulting in a random system
lockup because of an infinite loop in an interrupt handler.

This is 100% reproducible on Vega10, but it's most likely an issue we have
in the driver over all generations all the way back to radeon.

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 68 ++++++++++++++++----------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 992c8a8b8f77..0ab7785079c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -276,31 +276,49 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,

         wptr = le32_to_cpu(*ih->wptr_cpu);

-       if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
-               wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
-
-               /* When a ring buffer overflow happen start parsing interrupt
-                * from the last not overwritten vector (wptr + 32). Hopefully
-                * this should allow us to catchup.
-                */
-               tmp = (wptr + 32) & ih->ptr_mask;
-               dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
-                        wptr, ih->rptr, tmp);
-               ih->rptr = tmp;
-
-               if (ih == &adev->irq.ih)
-                       reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
-               else if (ih == &adev->irq.ih1)
-                       reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
-               else if (ih == &adev->irq.ih2)
-                       reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
-               else
-                       BUG();
-
-               tmp = RREG32_NO_KIQ(reg);
-               tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
-               WREG32_NO_KIQ(reg, tmp);
-       }
+       if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+               goto out;
+
+       /* Double check that the overflow wasn't already cleared. */
+       if (ih == &adev->irq.ih)
+               reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+       else if (ih == &adev->irq.ih1)
+               reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);
+       else if (ih == &adev->irq.ih2)
+               reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);
+       else
+               BUG();
+
+       wptr = RREG32_NO_KIQ(reg);
+       if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+               goto out;
+
+       wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+       /* When a ring buffer overflow happen start parsing interrupt
+        * from the last not overwritten vector (wptr + 32). Hopefully
+        * this should allow us to catchup.
+        */
+       tmp = (wptr + 32) & ih->ptr_mask;
+       dev_warn(adev->dev, "IH ring buffer overflow "
+                "(0x%08X, 0x%08X, 0x%08X)\n",
+                wptr, ih->rptr, tmp);
+       ih->rptr = tmp;
+
+       if (ih == &adev->irq.ih)
+               reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+       else if (ih == &adev->irq.ih1)
+               reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+       else if (ih == &adev->irq.ih2)
+               reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+       else
+               BUG();
+
+       tmp = RREG32_NO_KIQ(reg);
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+       WREG32_NO_KIQ(reg, tmp);
+
+out:
         return (wptr & ih->ptr_mask);
 }

--
2.17.1

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20181214/dfaa226c/attachment.html>


More information about the amd-gfx mailing list