[v8 02/11] drm/amdgpu: adjust MES API used for suspend and resume
Alex Deucher
alexdeucher at gmail.com
Wed Aug 20 20:52:20 UTC 2025
On Tue, Aug 12, 2025 at 3:36 PM Alex Deucher <alexdeucher at gmail.com> wrote:
>
> On Mon, Aug 11, 2025 at 6:18 AM Jesse.Zhang <Jesse.Zhang at amd.com> wrote:
> >
> > From: Alex Deucher <alexander.deucher at amd.com>
> >
> > Use the suspend and resume API rather than remove queue
> > and add queue API. The former just preempts the queue
> > while the latter remove it from the scheduler completely.
> > There is no need to do that, we only need preemption
> > in this case.
> >
> > V2: replace queue_active with queue state
> > v3: set the suspend_fence_addr
> >
> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> > Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 51 ++++++++++++++++++++++
> > 1 file changed, 51 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > index d6f50b13e2ba..46b24035e14c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > @@ -347,9 +347,60 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
> > amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
> > }
> >
> > +static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
> > + struct amdgpu_usermode_queue *queue)
> > +{
> > + struct amdgpu_device *adev = uq_mgr->adev;
> > + struct mes_suspend_gang_input queue_input;
> > + struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> > + int r;
> > +
> > + if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
> > + return 0;
> > + if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)
> > + return 0;
> > +
> > + memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
> > + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
> > + queue_input.suspend_fence_addr = queue->fence_drv->gpu_addr;
>
> This will overwrite the user fence value with 0. You need a separate
> allocation for this. You might also need to wait for this value to
> verify if the suspend completed successfully.
This still needs to be handled.
Alex
>
> Alex
>
>
> > +
> > + amdgpu_mes_lock(&adev->mes);
> > + r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
> > + amdgpu_mes_unlock(&adev->mes);
> > + if (r)
> > + dev_err(adev->dev, "Failed to suspend queue, err (%d)\n", r);
> > + return r;
> > +}
> > +
> > +static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
> > + struct amdgpu_usermode_queue *queue)
> > +{
> > + struct amdgpu_device *adev = uq_mgr->adev;
> > + struct mes_resume_gang_input queue_input;
> > + struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> > + int r;
> > +
> > + if (queue->state == AMDGPU_USERQ_STATE_HUNG)
> > + return -EINVAL;
> > + if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
> > + return 0;
> > +
> > + memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
> > + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
> > +
> > + amdgpu_mes_lock(&adev->mes);
> > + r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
> > + amdgpu_mes_unlock(&adev->mes);
> > + if (r)
> > + dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
> > + return r;
> > + }
> > +
> > const struct amdgpu_userq_funcs userq_mes_funcs = {
> > .mqd_create = mes_userq_mqd_create,
> > .mqd_destroy = mes_userq_mqd_destroy,
> > .unmap = mes_userq_unmap,
> > .map = mes_userq_map,
> > + .preempt = mes_userq_preempt,
> > + .restore = mes_userq_restore,
> > };
> > --
> > 2.49.0
> >
More information about the amd-gfx
mailing list