[PATCH 1/2] drm/amdgpu/gfx10: implement queue reset via MMIO

Thu Jan 9 06:28:06 UTC 2025

implement gfx10 kcq reset via mmio.

Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 121 ++++++++++++++++++-------
 1 file changed, 88 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 88393c2c08e4..89409cb7d195 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9457,6 +9457,92 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
 	amdgpu_ring_insert_nop(ring, num_nop - 1);
 }
 
+static int gfx_v10_0_queue_reset(struct amdgpu_ring *ring, uint32_t vmid, bool use_mmio)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	uint32_t queue_type = ring->funcs->type;
+	unsigned long flags;
+	int i, r = 0;
+
+	if (use_mmio) {
+		if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+			amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+			mutex_lock(&adev->srbm_mutex);
+			nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+			WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+			WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+			nv_grbm_select(adev, 0, 0, 0, 0);
+			mutex_unlock(&adev->srbm_mutex);
+			amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+			/* Make sure dequeue is complete */
+			amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+			mutex_lock(&adev->srbm_mutex);
+			nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+			for (i = 0; i < adev->usec_timeout; i++) {
+				if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+					break;
+				udelay(1);
+			}
+
+			if (i >= adev->usec_timeout) {
+				r = -ETIMEDOUT;
+				dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+			}
+
+			nv_grbm_select(adev, 0, 0, 0, 0);
+			mutex_unlock(&adev->srbm_mutex);
+			amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+		}
+	} else {
+		if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+			spin_lock_irqsave(&kiq->ring_lock, flags);
+
+			if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+				spin_unlock_irqrestore(&kiq->ring_lock, flags);
+				return -ENOMEM;
+			}
+
+			kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
+
+			amdgpu_ring_commit(kiq_ring);
+			spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+			r = amdgpu_ring_test_ring(kiq_ring);
+			if (r)
+				return r;
+
+			/* Make sure dequeue is complete */
+			amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+			mutex_lock(&adev->srbm_mutex);
+			nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+			for (i = 0; i < adev->usec_timeout; i++) {
+				if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+					break;
+				udelay(1);
+			}
+			if (i >= adev->usec_timeout)
+				r = -ETIMEDOUT;
+			nv_grbm_select(adev, 0, 0, 0, 0);
+			mutex_unlock(&adev->srbm_mutex);
+			amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+			if (i >= adev->usec_timeout) {
+				r = -ETIMEDOUT;
+				dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+			}
+
+		}
+	}
+
+	return r;
+}
+
 static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -9531,7 +9617,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
 	struct amdgpu_ring *kiq_ring = &kiq->ring;
 	unsigned long flags;
-	int i, r;
+	int r;
 
 	if (amdgpu_sriov_vf(adev))
 		return -EINVAL;
@@ -9539,41 +9625,10 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 		return -EINVAL;
 
-	spin_lock_irqsave(&kiq->ring_lock, flags);
-
-	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
-		spin_unlock_irqrestore(&kiq->ring_lock, flags);
-		return -ENOMEM;
-	}
-
-	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
-				   0, 0);
-	amdgpu_ring_commit(kiq_ring);
-	spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
-	r = amdgpu_ring_test_ring(kiq_ring);
+	r = gfx_v10_0_queue_reset(ring, vmid, false);
 	if (r)
 		return r;
 
-	/* make sure dequeue is complete*/
-	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-	mutex_lock(&adev->srbm_mutex);
-	nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
-			break;
-		udelay(1);
-	}
-	if (i >= adev->usec_timeout)
-		r = -ETIMEDOUT;
-	nv_grbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-	if (r) {
-		dev_err(adev->dev, "fail to wait on hqd deactivate\n");
-		return r;
-	}
-
 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
 	if (unlikely(r != 0)) {
 		dev_err(adev->dev, "fail to resv mqd_obj\n");
-- 
2.25.1