[PATCH 2/2] drm/amdgpu: handle IH ring1 overflow

Philip Yang Philip.Yang at amd.com
Sat Nov 13 00:05:09 UTC 2021


IH ring1 is used to process GPU retry fault, overflow is enabled to
drain retry fault because we want receive other interrupts while
handling retry fault to recover range. There is no overflow flag set
when wptr pass rptr, so amdgpu_ih_process check rptr equals to the wptr
and exit, this can not process all faults if ring1 overflows.

Check ring1 overflows if timestamp of prev rptr IV is larger than
timestamp of current rptr IV, set rptr to wptr+32 to try catchup and
continue to process all faults.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c |  3 +--
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 30 ++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 0c7963dfacad..5f12df80cd35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -178,8 +178,7 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
 	*prev_rptr = cur_rptr;
 
 	/* check ring is empty to workaround missing wptr overflow flag */
-	return cur_rptr >= checkpoint_wptr ||
-	       (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
+	return cur_rptr >= checkpoint_wptr;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a9ca6988009e..32b3d171427a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -317,6 +317,29 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev)
 	mdelay(1);
 }
 
+static bool vega10_ih_ring_overflow(struct amdgpu_ih_ring *ih, u32 wptr)
+{
+	unsigned pre_index, cur_index;
+	uint64_t pre_ts, ts;
+	uint32_t dw1, dw2;
+
+	if (ih->rptr >= wptr)
+		return false;
+
+	cur_index = ih->rptr >> 2;
+	pre_index = ((ih->rptr - 32) & ih->ptr_mask) >> 2;
+	
+	dw1 = le32_to_cpu(ih->ring[pre_index + 1]);
+	dw2 = le32_to_cpu(ih->ring[pre_index + 2]);
+	pre_ts = dw1 | ((u64)(dw2 & 0xffff) << 32);
+
+	dw1 = le32_to_cpu(ih->ring[cur_index + 1]);
+	dw2 = le32_to_cpu(ih->ring[cur_index + 2]);
+	ts = dw1 | ((u64)(dw2 & 0xffff) << 32);
+
+	return pre_ts > ts;
+}
+
 /**
  * vega10_ih_get_wptr - get the IH ring buffer wptr
  *
@@ -348,7 +371,10 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
 
 	/* Double check that the overflow wasn't already cleared. */
 	wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
-	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+	if (ih == &adev->irq.ih && !REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	if (ih == &adev->irq.ih1 && !vega10_ih_ring_overflow(ih, wptr))
 		goto out;
 
 	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
@@ -358,7 +384,7 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
 	 * this should allow us to catchup.
 	 */
 	tmp = (wptr + 32) & ih->ptr_mask;
-	dev_warn(adev->dev, "IH ring buffer overflow "
+	DRM_DEV_DEBUG(adev->dev, "IH ring buffer overflow "
 		 "(0x%08X, 0x%08X, 0x%08X)\n",
 		 wptr, ih->rptr, tmp);
 	ih->rptr = tmp;
-- 
2.17.1



More information about the amd-gfx mailing list