[PATCH 02/23] drm/amdgpu: Fix the KCQ hang when binding back

Thu Mar 30 19:42:13 UTC 2023

From: Shiwu Zhang <shiwu.zhang at amd.com>

Just like the KIQ, KCQ need to clear the doorbell related regs as well
to avoid hangs when to load driver again after unloading.

Signed-off-by: Shiwu Zhang <shiwu.zhang at amd.com>
Reviewed-by: Le Ma <le.ma at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 28 ++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 3bda7470b7ff..d652464e5ade 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1675,7 +1675,7 @@ static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring,
 	return 0;
 }
 
-static int gfx_v9_4_3_xcc_kiq_fini_register(struct amdgpu_ring *ring,
+static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
 					    int xcc_id)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -1693,7 +1693,7 @@ static int gfx_v9_4_3_xcc_kiq_fini_register(struct amdgpu_ring *ring,
 		}
 
 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
-			DRM_DEBUG("KIQ dequeue request failed.\n");
+			DRM_DEBUG("%s dequeue request failed.\n", ring->name);
 
 			/* Manual disable if dequeue request times out */
 			WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
@@ -1801,6 +1801,27 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id)
 	return 0;
 }
 
+static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_ring *ring;
+	int j;
+
+	for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+		ring = &adev->gfx.compute_ring[j +  xcc_id * adev->gfx.num_compute_rings];
+		if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+			mutex_lock(&adev->srbm_mutex);
+			soc15_grbm_select(adev, ring->me,
+					ring->pipe,
+					ring->queue, 0, GET_INST(GC, xcc_id));
+			gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id);
+			soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+			mutex_unlock(&adev->srbm_mutex);
+		}
+	}
+
+	return 0;
+}
+
 static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
 {
 	struct amdgpu_ring *ring;
@@ -1929,12 +1950,13 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
 				  adev->gfx.kiq[xcc_id].ring.pipe,
 				  adev->gfx.kiq[xcc_id].ring.queue, 0,
 				  GET_INST(GC, xcc_id));
-		gfx_v9_4_3_xcc_kiq_fini_register(&adev->gfx.kiq[xcc_id].ring,
+		gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring,
 						 xcc_id);
 		soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
 		mutex_unlock(&adev->srbm_mutex);
 	}
 
+	gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
 	gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
 
 	/* Skip suspend with A+A reset */
-- 
2.39.2