[v6 12/12] drm/amdgpu: Implement queue preemption using suspend/resume API
Alexander.Deucher at amd.com
Alexander.Deucher at amd.com
Fri Aug 1 06:21:10 UTC 2025
From: Alex Deucher <alexander.deucher at amd.com>
Replace the queue remove/add approach with suspend/resume semantics
for user queue preemption. This change:
1. Maintains queue scheduling registration while only preempting execution
- Previously used remove_queue/add_queue would fully deregister queues
- New suspend/resume approach keeps scheduler state while preempting
2. Introduces proper preemption helpers:
- amdgpu_userqueue_preempt_helper(): Suspends queue execution
- Transitions MAPPED→UNMAPPED state on success
- Marks as HUNG and triggers reset on failure
- amdgpu_userqueue_restore_helper(): Resumes queue execution
- Transitions UNMAPPED→MAPPED state on success
- Triggers GPU reset on failure
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 53 +++++++++++++++++++++--
1 file changed, 50 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 0c91302162fa..af0ac4b73ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -185,6 +185,54 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
+static int
+amdgpu_userqueue_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->preempt(uq_mgr, queue);
+ if (r) {
+ amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userqueue_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ bool gpu_reset = false;
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+ r = userq_funcs->restore(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ gpu_reset = true;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ if (gpu_reset)
+ amdgpu_userq_gpu_reset(adev);
+
+ return r;
+}
+
static void
amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
@@ -639,7 +687,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
/* Resume all the queues for this process */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
- r = amdgpu_userq_map_helper(uq_mgr, queue);
+ r = amdgpu_userqueue_restore_helper(uq_mgr, queue);
if (r)
ret = r;
}
@@ -794,10 +842,9 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
int queue_id;
int ret = 0, r;
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
/* Try to unmap all the queues in this process ctx */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
- r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ r = amdgpu_userqueue_preempt_helper(uq_mgr, queue);
if (r)
ret = r;
}
--
2.49.0
More information about the amd-gfx
mailing list