[PATCH 14/27] drm/amdgpu/sdma6: re-emit unprocessed state on ring reset
Alex Deucher
alexander.deucher at amd.com
Fri Jun 13 21:47:35 UTC 2025
Re-emit the unprocessed state after resetting the queue.
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 29 ++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 595e90a5274ea..00c7f440a6ba0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1537,11 +1537,23 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
return r;
}
+static bool sdma_v6_0_is_queue_selected(struct amdgpu_device *adev,
+ u32 instance_id)
+{
+ /* we always use queue0 for KGD */
+ u32 context_status = RREG32(sdma_v6_0_get_reg_offset(adev, instance_id,
+ regSDMA0_QUEUE0_CONTEXT_STATUS));
+
+ /* Check if the SELECTED bit is set */
+ return (context_status & SDMA0_QUEUE0_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *guilty_fence)
{
struct amdgpu_device *adev = ring->adev;
+ bool is_guilty;
int i, r;
if (amdgpu_sriov_vf(adev))
@@ -1557,6 +1569,10 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
return -EINVAL;
}
+ is_guilty = sdma_v6_0_is_queue_selected(adev, i);
+
+ amdgpu_ring_backup_unprocessed_commands(ring, is_guilty ? guilty_fence : NULL);
+
r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
if (r)
return r;
@@ -1564,8 +1580,17 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
r = sdma_v6_0_gfx_resume_instance(adev, i, true);
if (r)
return r;
- amdgpu_fence_driver_force_completion(ring);
- atomic_inc(&ring->adev->gpu_reset_counter);
+
+ if (is_guilty) {
+ /* signal the fence of the bad job */
+ amdgpu_fence_driver_guilty_force_completion(guilty_fence);
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ }
+ r = amdgpu_ring_reemit_unprocessed_commands(ring);
+ if (r)
+ /* if we fail to reemit, force complete all fences */
+ amdgpu_fence_driver_force_completion(ring);
+
return 0;
}
--
2.49.0
More information about the amd-gfx
mailing list