[PATCH 1/5] drm/amdgpu: handle IH ring1 overflow

Philip Yang Philip.Yang at amd.com
Tue Nov 9 23:04:28 UTC 2021


IH ring1 is used to process GPU retry fault, overflow is enabled to
drain retry fault before unmapping the range, wptr may pass rptr,
amdgpu_ih_process should check rptr equals to the latest wptr to exit,
otherwise it will continue to recover outdatad retry fault after drain
retry fault is done, and generate false GPU vm fault because range is
unmapped from cpu.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f3d62e196901..d1ef61811169 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -223,7 +223,7 @@ int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
  */
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 {
-	unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
+	unsigned int count;
 	u32 wptr;
 
 	if (!ih->enabled || adev->shutdown)
@@ -232,6 +232,8 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 	wptr = amdgpu_ih_get_wptr(adev, ih);
 
 restart_ih:
+	count = AMDGPU_IH_MAX_NUM_IVS;
+
 	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
 
 	/* Order reading of wptr vs. reading of IH ring data */
@@ -240,6 +242,9 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 	while (ih->rptr != wptr && --count) {
 		amdgpu_irq_dispatch(adev, ih);
 		ih->rptr &= ih->ptr_mask;
+
+		if (ih == &adev->irq.ih1)
+			wptr = amdgpu_ih_get_wptr(adev, ih);
 	}
 
 	amdgpu_ih_set_rptr(adev, ih);
-- 
2.17.1



More information about the amd-gfx mailing list