[PATCH 18/53] drm/amdgpu/gfx10: wait for reset done before remap
Alex Deucher
alexander.deucher at amd.com
Thu Jul 25 15:00:20 UTC 2024
From: Jiadong Zhu <Jiadong.Zhu at amd.com>
There is a racing condition that cp firmware modifies
MQD in reset sequence after driver updates it for
remapping. We have to wait till CP_HQD_ACTIVE becoming
false then remap the queue.
v2: fix KIQ locking (Alex)
v3: fix KIQ locking harder (Jessie)
Signed-off-by: Jiadong Zhu <Jiadong.Zhu at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 43 +++++++++++++++++++-------
1 file changed, 31 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ee0fd555c284..7b534966a297 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9513,7 +9513,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
- int r;
+ int i, r;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -9521,22 +9521,42 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
- r = -ENOMEM;
- goto out;
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
}
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
0, 0);
amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_ring_test_ring(kiq_ring);
if (r)
- goto out;
+ return r;
+
+ /* make sure dequeue is complete*/
+ gfx_v10_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v10_0_unset_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ return r;
+ }
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)){
dev_err(adev->dev, "fail to resv mqd_obj\n");
- goto out;
+ return r;
}
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
if (!r) {
@@ -9545,22 +9565,21 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
ring->mqd_ptr = NULL;
}
amdgpu_bo_unreserve(ring->mqd_obj);
- if (r){
+ if (r) {
dev_err(adev->dev, "fail to unresv mqd_obj\n");
- goto out;
+ return r;
}
+ spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
- r = -ENOMEM;
- goto out;
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
}
kiq->pmf->kiq_map_queues(kiq_ring, ring);
amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_ring_test_ring(kiq_ring);
-
-out:
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
--
2.45.2
More information about the amd-gfx
mailing list