[PATCH 13/29] drm/amdgpu/gfx10: re-emit unprocessed state on ring reset
Alex Deucher
alexander.deucher at amd.com
Fri Jun 6 06:43:38 UTC 2025
Re-emit the unprocessed state after resetting the queue.
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 49 ++++++++++++--------------
1 file changed, 23 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index b57a21c0874c8..1faa8c6a90d9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9046,21 +9046,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
-static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned int vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- WREG32_SOC15(GC, 0, mmSQ_CMD, value);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-}
-
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -9539,6 +9524,8 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ amdgpu_ring_backup_unprocessed_commands(ring, &job->hw_fence.base, true);
+
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) {
@@ -9563,10 +9550,8 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
kiq->pmf->kiq_map_queues(kiq_ring, ring);
amdgpu_ring_commit(kiq_ring);
-
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
@@ -9579,9 +9564,16 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
r = amdgpu_ring_test_ring(ring);
if (r)
return r;
+
dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
- amdgpu_fence_driver_force_completion(ring);
+ /* signal the fence of the bad job */
+ amdgpu_fence_driver_guilty_force_completion(&job->hw_fence.base);
atomic_inc(&ring->adev->gpu_reset_counter);
+ r = amdgpu_ring_reemit_unprocessed_commands(ring);
+ if (r)
+ /* if we fail to reemit, force complete all fences */
+ amdgpu_fence_driver_force_completion(ring);
+
return 0;
}
@@ -9600,6 +9592,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ amdgpu_ring_backup_unprocessed_commands(ring, &job->hw_fence.base, true);
+
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -9610,9 +9604,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
0, 0);
amdgpu_ring_commit(kiq_ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
@@ -9648,18 +9641,24 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
}
kiq->pmf->kiq_map_queues(kiq_ring, ring);
amdgpu_ring_commit(kiq_ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r)
return r;
r = amdgpu_ring_test_ring(ring);
if (r)
return r;
+
dma_fence_set_error(&job->base.s_fence->finished, -ETIME);
- amdgpu_fence_driver_force_completion(ring);
+ /* signal the fence of the bad job */
+ amdgpu_fence_driver_guilty_force_completion(&job->hw_fence.base);
atomic_inc(&ring->adev->gpu_reset_counter);
+ r = amdgpu_ring_reemit_unprocessed_commands(ring);
+ if (r)
+ /* if we fail to reemit, force complete all fences */
+ amdgpu_fence_driver_force_completion(ring);
+
return 0;
}
@@ -9895,7 +9894,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
.reset = gfx_v10_0_reset_kgq,
.emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
@@ -9936,7 +9934,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
.reset = gfx_v10_0_reset_kcq,
.emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
--
2.49.0
More information about the amd-gfx
mailing list