[v10 02/11] drm/amdgpu: adjust MES API used for suspend and resume
Jesse.Zhang
Jesse.Zhang at amd.com
Fri Aug 22 08:51:05 UTC 2025
From: Alex Deucher <alexander.deucher at amd.com>
Use the suspend and resume API rather than remove queue
and add queue API. The former just preempts the queue
while the latter remove it from the scheduler completely.
There is no need to do that, we only need preemption
in this case.
V2: replace queue_active with queue state
v3: set the suspend_fence_addr
v4: allocate another per queue buffer for the suspend fence, and set the sequence number.
also wait for the suspend fence. (Alex)
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 +
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 96 ++++++++++++++++++++++
2 files changed, 98 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 5111d7dce86f..d58cf48b456b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -65,6 +65,8 @@ struct amdgpu_usermode_queue {
struct dma_fence *last_fence;
u32 xcp_id;
int priority;
+ struct amdgpu_userq_obj suspend_fence;
+ uint64_t suspend_fence_seq;
};
struct amdgpu_userq_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index d6f50b13e2ba..ce5ac8c2f7e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -28,6 +28,8 @@
#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_SUSPEND_FENCE_SZ PAGE_SIZE
+#define AMDGPU_USERQ_FENCE_TIMEOUT (1000000000)
static int
mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
@@ -347,9 +349,103 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
}
+static int mes_userq_wait_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *fence_obj,
+ uint64_t fence_value,
+ uint64_t timeout)
+{
+ /* This would typically involve polling the fence value in memory
+ * until it matches the expected value or timeout occurs
+ */
+ uint64_t start_time = ktime_get_ns();
+ uint64_t *fence_addr = fence_obj->cpu_ptr;
+
+ while (*fence_addr != fence_value) {
+ if (ktime_get_ns() - start_time > timeout) {
+ return -ETIMEDOUT;
+ }
+ udelay(10);
+ }
+ return 0;
+}
+
+static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_suspend_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ return 0;
+ if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)
+ return 0;
+
+ /* Allocate suspend fence buffer if not already allocated */
+ if (!queue->suspend_fence.obj) {
+ r = amdgpu_userq_create_object(uq_mgr, &queue->suspend_fence, AMDGPU_USERQ_SUSPEND_FENCE_SZ);
+ if (r) {
+ DRM_ERROR("Failed to allocate suspend fence buffer\n");
+ return r;
+ }
+ queue->suspend_fence_seq = 0;
+ }
+
+ memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.suspend_fence_addr = queue->suspend_fence.gpu_addr;
+ queue_input.suspend_fence_value = ++queue->suspend_fence_seq;
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+
+ if (r) {
+ DRM_ERROR("Failed to suspend gang: %d\n", r);
+ return r;
+ }
+
+ /* Wait for suspend fence to be signaled */
+ r = mes_userq_wait_fence(uq_mgr, &queue->suspend_fence,
+ queue_input.suspend_fence_value,
+ AMDGPU_USERQ_FENCE_TIMEOUT);
+ if (r) {
+ DRM_ERROR("Suspend fence timeout\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_resume_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG)
+ return -EINVAL;
+ if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
+ return 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
+ return r;
+ }
+
const struct amdgpu_userq_funcs userq_mes_funcs = {
.mqd_create = mes_userq_mqd_create,
.mqd_destroy = mes_userq_mqd_destroy,
.unmap = mes_userq_unmap,
.map = mes_userq_map,
+ .preempt = mes_userq_preempt,
+ .restore = mes_userq_restore,
};
--
2.49.0
More information about the amd-gfx
mailing list