[v6 08/13] drm/amdgpu: adjust MES API used for suspend and resume

Alex Deucher alexdeucher at gmail.com
Mon Aug 4 17:16:58 UTC 2025


On Mon, Aug 4, 2025 at 4:41 AM Jesse.Zhang <Jesse.Zhang at amd.com> wrote:
>
> Use the suspend and resume API rather than remove queue
> and add queue API.  The former just preempts the queue
> while the latter remove it from the scheduler completely.
> There is no need to do that, we only need preemption
> in this case.
>
> V2: replace queue_active with queue state
> v3: set the suspend_fence_addr
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 51 ++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index a871bac71e1e..8934d7113d58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -395,10 +395,61 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
>         amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
>  }
>
> +static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
> +                            struct amdgpu_usermode_queue *queue)
> +{
> +       struct amdgpu_device *adev = uq_mgr->adev;
> +       struct mes_suspend_gang_input queue_input;
> +       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> +       int r;
> +
> +       if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
> +               return 0;
> +       if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)
> +               return 0;
> +
> +       memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
> +       queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
> +       queue_input.suspend_fence_addr = queue->fence_drv->gpu_addr;

You need to allocate a separate buffer for this.  You should also set
the fence value we are waiting for.  The driver then needs to wait on
this fence location to make sure the suspend is complete.  You can use
a wb allocation similar to the status stuff in
mes_v11_0_submit_pkt_and_poll_completion().

I think you also need to specify the doorbell offset for the queue you
want to target since this function is per queue.

Alex

> +
> +       amdgpu_mes_lock(&adev->mes);
> +       r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
> +       amdgpu_mes_unlock(&adev->mes);
> +       if (r)
> +               dev_err(adev->dev, "Failed to suspend queue, err (%d)\n", r);
> +       return r;
> +}
> +
> +static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
> +                           struct amdgpu_usermode_queue *queue)
> +{
> +       struct amdgpu_device *adev = uq_mgr->adev;
> +       struct mes_resume_gang_input queue_input;
> +       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> +       int r;
> +
> +       if (queue->state == AMDGPU_USERQ_STATE_HUNG)
> +               return -EINVAL;
> +       if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
> +               return 0;
> +
> +       memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
> +       queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
> +
> +       amdgpu_mes_lock(&adev->mes);
> +       r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
> +       amdgpu_mes_unlock(&adev->mes);
> +       if (r)
> +               dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
> +       return r;
> + }
> +
>  const struct amdgpu_userq_funcs userq_mes_funcs = {
>         .mqd_create = mes_userq_mqd_create,
>         .mqd_destroy = mes_userq_mqd_destroy,
>         .unmap = mes_userq_unmap,
>         .map = mes_userq_map,
> +       .preempt = mes_userq_preempt,
> +       .restore = mes_userq_restore,
>         .detect_and_reset = mes_userq_detect_and_reset,
>  };
> --
> 2.49.0
>


More information about the amd-gfx mailing list