[PATCH 6/6] drm/amdgpu: fix fence fallback timer expired error

Samuel Zhang guoqing.zhang at amd.com
Mon Apr 14 10:46:55 UTC 2025


IH is not working after switching a new gpu index for the first time.
IH handler function need to be re-registered with kernel after switching
to new gpu index.

Signed-off-by: Samuel Zhang <guoqing.zhang at amd.com>
Change-Id: Idece1c8fce24032fd08f5a8b6ac23793c51e56dd
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c |  7 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h |  1 +
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 18 ++++++++++++++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 19ce4da285e8..2292245a0c5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -326,7 +326,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	return r;
 }
 
-void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
+void amdgpu_irq_uninstall(struct amdgpu_device *adev)
 {
 	if (adev->irq.installed) {
 		free_irq(adev->irq.irq, adev_to_drm(adev));
@@ -334,7 +334,10 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
 		if (adev->irq.msi_enabled)
 			pci_free_irq_vectors(adev->pdev);
 	}
-
+}
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
+{
+	amdgpu_irq_uninstall(adev);
 	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
 	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
 	amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 04c0b4fa17a4..c6e6681b4f71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -123,6 +123,7 @@ extern const int node_id_to_phys_map[NODEID_MAX];
 void amdgpu_irq_disable_all(struct amdgpu_device *adev);
 
 int amdgpu_irq_init(struct amdgpu_device *adev);
+void amdgpu_irq_uninstall(struct amdgpu_device *adev);
 void amdgpu_irq_fini_sw(struct amdgpu_device *adev);
 void amdgpu_irq_fini_hw(struct amdgpu_device *adev);
 int amdgpu_irq_add_id(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index faa0dd75dd6d..ef996505e4dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -643,12 +643,26 @@ static int vega20_ih_hw_fini(struct amdgpu_ip_block *ip_block)
 
 static int vega20_ih_suspend(struct amdgpu_ip_block *ip_block)
 {
-	return vega20_ih_hw_fini(ip_block);
+	struct amdgpu_device *adev = ip_block->adev;
+	int r = 0;
+
+	r = vega20_ih_hw_fini(ip_block);
+	amdgpu_irq_uninstall(adev);
+	return r;
 }
 
 static int vega20_ih_resume(struct amdgpu_ip_block *ip_block)
 {
-	return vega20_ih_hw_init(ip_block);
+	struct amdgpu_device *adev = ip_block->adev;
+	int r = 0;
+
+	r = amdgpu_irq_init(adev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_irq_init failed in %s, %d\n", __func__, r);
+		return r;
+	}
+	r = vega20_ih_hw_init(ip_block);
+	return r;
 }
 
 static bool vega20_ih_is_idle(struct amdgpu_ip_block *ip_block)
-- 
2.43.5



More information about the amd-gfx mailing list