[PATCH 3/7] drm/amdgpu: switch from queue_active to queue state

Liang, Prike Prike.Liang at amd.com
Fri Apr 18 13:34:40 UTC 2025


[Public]

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Alex
> Deucher
> Sent: Thursday, April 17, 2025 6:21 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> Subject: [PATCH 3/7] drm/amdgpu: switch from queue_active to queue state
>
> Track the state of the queue rather than simple active vs not.  This is needed for
> other states (hung, preempted, etc.).
> While we are at it, move the state tracking into the user queue front end code.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 101 ++++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h |   9 +-
>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c    |   8 --
>  3 files changed, 77 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 04583f9d134f1..8e703dc9dfbbc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -44,6 +44,45 @@ u32 amdgpu_userqueue_get_supported_ip_mask(struct
> amdgpu_device *adev)
>       return userq_ip_mask;
>  }
>
> +static int
> +amdgpu_userqueue_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> +                           struct amdgpu_usermode_queue *queue) {
> +     struct amdgpu_device *adev = uq_mgr->adev;
> +     const struct amdgpu_userq_funcs *userq_funcs =
> +             adev->userq_funcs[queue->queue_type];
> +     int r = 0;
> +
> +     if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
> +             r = userq_funcs->unmap(uq_mgr, queue);
> +             if (r)
> +                     queue->state = AMDGPU_USERQ_STATE_HUNG;
> +             else
> +                     queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
> +     }
> +     return r;
> +}
> +
> +static int
> +amdgpu_userqueue_map_helper(struct amdgpu_userq_mgr *uq_mgr,
> +                         struct amdgpu_usermode_queue *queue) {
> +     struct amdgpu_device *adev = uq_mgr->adev;
> +     const struct amdgpu_userq_funcs *userq_funcs =
> +             adev->userq_funcs[queue->queue_type];
> +     int r = 0;
> +
> +     if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
> +             r = userq_funcs->map(uq_mgr, queue);
> +             if (r) {
> +                     queue->state = AMDGPU_USERQ_STATE_HUNG;
> +             } else {
> +                     queue->state = AMDGPU_USERQ_STATE_MAPPED;
> +             }
> +     }
> +     return r;
> +}
> +
>  static void
>  amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
>                        struct amdgpu_usermode_queue *queue, @@ -79,7 +118,7
> @@ amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr)
>       mutex_lock(&uq_mgr->userq_mutex);
>       /* Resume all the queues for this process */
>       idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
> -             ret += queue->queue_active;
> +             ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
>
>       mutex_unlock(&uq_mgr->userq_mutex);
>       return ret;
> @@ -254,9 +293,8 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int
> queue_id)
>       struct amdgpu_fpriv *fpriv = filp->driver_priv;
>       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
>       struct amdgpu_device *adev = uq_mgr->adev;
> -     const struct amdgpu_userq_funcs *uq_funcs;
>       struct amdgpu_usermode_queue *queue;
> -     int r;
> +     int r = 0;
>
>       cancel_delayed_work(&uq_mgr->resume_work);
>       mutex_lock(&uq_mgr->userq_mutex);
> @@ -267,8 +305,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int
> queue_id)
>               mutex_unlock(&uq_mgr->userq_mutex);
>               return -EINVAL;
>       }
> -     uq_funcs = adev->userq_funcs[queue->queue_type];
> -     r = uq_funcs->unmap(uq_mgr, queue);
> +     r = amdgpu_userqueue_unmap_helper(uq_mgr, queue);
>       amdgpu_bo_unpin(queue->db_obj.obj);
>       amdgpu_bo_unref(&queue->db_obj.obj);
>       amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); @@ -414,7
> +451,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
>       else
>               skip_map_queue = false;
>       if (!skip_map_queue) {
> -             r = uq_funcs->map(uq_mgr, queue);
> +             r = amdgpu_userqueue_map_helper(uq_mgr, queue);
>               if (r) {
>                       mutex_unlock(&adev->userq_mutex);
>                       DRM_ERROR("Failed to map Queue\n");
> @@ -489,19 +526,19 @@ static int
>  amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr)  {
>       struct amdgpu_device *adev = uq_mgr->adev;
> -     const struct amdgpu_userq_funcs *userq_funcs;
>       struct amdgpu_usermode_queue *queue;
>       int queue_id;
> -     int ret = 0;
> +     int ret = 0, r;
>
>       /* Resume all the queues for this process */
>       idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> -             userq_funcs = adev->userq_funcs[queue->queue_type];
> -             ret |= userq_funcs->map(uq_mgr, queue);
> +             r = amdgpu_userqueue_map_helper(uq_mgr, queue);
> +             if (r)
> +                     ret = r;
Can the error return code simplify as one variable ret through |= with the map/unmap return value?
Anyway, this patch is Reviewed-by: Prike Liang <Prike.Liang at amd.com>

Thanks,
Prike
>       }
>
>       if (ret)
> -             DRM_ERROR("Failed to map all the queues\n");
> +             dev_err(adev->dev, "Failed to map all the queues\n");
>       return ret;
>  }
>
> @@ -647,19 +684,19 @@ static int
>  amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)  {
>       struct amdgpu_device *adev = uq_mgr->adev;
> -     const struct amdgpu_userq_funcs *userq_funcs;
>       struct amdgpu_usermode_queue *queue;
>       int queue_id;
> -     int ret = 0;
> +     int ret = 0, r;
>
>       /* Try to unmap all the queues in this process ctx */
>       idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> -             userq_funcs = adev->userq_funcs[queue->queue_type];
> -             ret += userq_funcs->unmap(uq_mgr, queue);
> +             r = amdgpu_userqueue_unmap_helper(uq_mgr, queue);
> +             if (r)
> +                     ret = r;
>       }
>
>       if (ret)
> -             DRM_ERROR("Couldn't unmap all the queues\n");
> +             dev_err(adev->dev, "Couldn't unmap all the queues\n");
>       return ret;
>  }
>
> @@ -759,11 +796,10 @@ void amdgpu_userq_mgr_fini(struct
> amdgpu_userq_mgr *userq_mgr)  int amdgpu_userq_suspend(struct
> amdgpu_device *adev)  {
>       u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> -     const struct amdgpu_userq_funcs *userq_funcs;
>       struct amdgpu_usermode_queue *queue;
>       struct amdgpu_userq_mgr *uqm, *tmp;
>       int queue_id;
> -     int ret = 0;
> +     int ret = 0, r;
>
>       if (!ip_mask)
>               return 0;
> @@ -772,8 +808,9 @@ int amdgpu_userq_suspend(struct amdgpu_device
> *adev)
>       list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>               cancel_delayed_work_sync(&uqm->resume_work);
>               idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> -                     userq_funcs = adev->userq_funcs[queue->queue_type];
> -                     ret |= userq_funcs->unmap(uqm, queue);
> +                     r = amdgpu_userqueue_unmap_helper(uqm, queue);
> +                     if (r)
> +                             ret = r;
>               }
>       }
>       mutex_unlock(&adev->userq_mutex);
> @@ -783,11 +820,10 @@ int amdgpu_userq_suspend(struct amdgpu_device
> *adev)  int amdgpu_userq_resume(struct amdgpu_device *adev)  {
>       u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> -     const struct amdgpu_userq_funcs *userq_funcs;
>       struct amdgpu_usermode_queue *queue;
>       struct amdgpu_userq_mgr *uqm, *tmp;
>       int queue_id;
> -     int ret = 0;
> +     int ret = 0, r;
>
>       if (!ip_mask)
>               return 0;
> @@ -795,8 +831,9 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
>       mutex_lock(&adev->userq_mutex);
>       list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
>               idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> -                     userq_funcs = adev->userq_funcs[queue->queue_type];
> -                     ret |= userq_funcs->map(uqm, queue);
> +                     r = amdgpu_userqueue_map_helper(uqm, queue);
> +                     if (r)
> +                             ret = r;
>               }
>       }
>       mutex_unlock(&adev->userq_mutex);
> @@ -807,11 +844,10 @@ int
> amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
>                                                 u32 idx)
>  {
>       u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> -     const struct amdgpu_userq_funcs *userq_funcs;
>       struct amdgpu_usermode_queue *queue;
>       struct amdgpu_userq_mgr *uqm, *tmp;
>       int queue_id;
> -     int ret = 0;
> +     int ret = 0, r;
>
>       /* only need to stop gfx/compute */
>       if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
> AMDGPU_HW_IP_COMPUTE)))) @@ -827,8 +863,9 @@ int
> amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
>                       if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
>                            (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
> &&
>                           (queue->xcp_id == idx)) {
> -                             userq_funcs = adev->userq_funcs[queue-
> >queue_type];
> -                             ret |= userq_funcs->unmap(uqm, queue);
> +                             r = amdgpu_userqueue_unmap_helper(uqm, queue);
> +                             if (r)
> +                                     ret = r;
>                       }
>               }
>       }
> @@ -840,11 +877,10 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
>                                                  u32 idx)
>  {
>       u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> -     const struct amdgpu_userq_funcs *userq_funcs;
>       struct amdgpu_usermode_queue *queue;
>       struct amdgpu_userq_mgr *uqm, *tmp;
>       int queue_id;
> -     int ret = 0;
> +     int ret = 0, r;
>
>       /* only need to stop gfx/compute */
>       if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
> AMDGPU_HW_IP_COMPUTE)))) @@ -859,8 +895,9 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
>                       if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
>                            (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
> &&
>                           (queue->xcp_id == idx)) {
> -                             userq_funcs = adev->userq_funcs[queue-
> >queue_type];
> -                             ret |= userq_funcs->map(uqm, queue);
> +                             r = amdgpu_userqueue_map_helper(uqm, queue);
> +                             if (r)
> +                                     ret = r;
>                       }
>               }
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> index b49f147eb69cb..8f392a0947a29 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> @@ -32,6 +32,13 @@
>  #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
> #define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr,
> name)
>
> +enum amdgpu_userqueue_state {
> +     AMDGPU_USERQ_STATE_UNMAPPED = 0,
> +     AMDGPU_USERQ_STATE_MAPPED,
> +     AMDGPU_USERQ_STATE_PREEMPTED,
> +     AMDGPU_USERQ_STATE_HUNG,
> +};
> +
>  struct amdgpu_mqd_prop;
>
>  struct amdgpu_userq_obj {
> @@ -42,7 +49,7 @@ struct amdgpu_userq_obj {
>
>  struct amdgpu_usermode_queue {
>       int                     queue_type;
> -     uint8_t                 queue_active;
> +     enum amdgpu_userqueue_state state;
>       uint64_t                doorbell_handle;
>       uint64_t                doorbell_index;
>       uint64_t                flags;
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index b3157df8ae107..4c01c3a030956 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -120,9 +120,6 @@ static int mes_userq_map(struct amdgpu_userq_mgr
> *uq_mgr,
>       struct mes_add_queue_input queue_input;
>       int r;
>
> -     if (queue->queue_active)
> -             return 0;
> -
>       memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
>
>       queue_input.process_va_start = 0;
> @@ -155,7 +152,6 @@ static int mes_userq_map(struct amdgpu_userq_mgr
> *uq_mgr,
>               return r;
>       }
>
> -     queue->queue_active = true;
>       DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n",
> userq_props->doorbell_index);
>       return 0;
>  }
> @@ -168,9 +164,6 @@ static int mes_userq_unmap(struct amdgpu_userq_mgr
> *uq_mgr,
>       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
>       int r;
>
> -     if (!queue->queue_active)
> -             return 0;
> -
>       memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
>       queue_input.doorbell_offset = queue->doorbell_index;
>       queue_input.gang_context_addr = ctx->gpu_addr +
> AMDGPU_USERQ_PROC_CTX_SZ; @@ -180,7 +173,6 @@ static int
> mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
>       amdgpu_mes_unlock(&adev->mes);
>       if (r)
>               DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
> -     queue->queue_active = false;
>       return r;
>  }
>
> --
> 2.49.0



More information about the amd-gfx mailing list