[PATCH 3/7] drm/amdgpu: switch from queue_active to queue state
Liang, Prike
Prike.Liang at amd.com
Fri Apr 18 13:34:40 UTC 2025
[Public]
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Alex
> Deucher
> Sent: Thursday, April 17, 2025 6:21 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> Subject: [PATCH 3/7] drm/amdgpu: switch from queue_active to queue state
>
> Track the state of the queue rather than simple active vs not. This is needed for
> other states (hung, preempted, etc.).
> While we are at it, move the state tracking into the user queue front end code.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 101 ++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h | 9 +-
> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 8 --
> 3 files changed, 77 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 04583f9d134f1..8e703dc9dfbbc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -44,6 +44,45 @@ u32 amdgpu_userqueue_get_supported_ip_mask(struct
> amdgpu_device *adev)
> return userq_ip_mask;
> }
>
> +static int
> +amdgpu_userqueue_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> + struct amdgpu_usermode_queue *queue) {
> + struct amdgpu_device *adev = uq_mgr->adev;
> + const struct amdgpu_userq_funcs *userq_funcs =
> + adev->userq_funcs[queue->queue_type];
> + int r = 0;
> +
> + if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
> + r = userq_funcs->unmap(uq_mgr, queue);
> + if (r)
> + queue->state = AMDGPU_USERQ_STATE_HUNG;
> + else
> + queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
> + }
> + return r;
> +}
> +
> +static int
> +amdgpu_userqueue_map_helper(struct amdgpu_userq_mgr *uq_mgr,
> + struct amdgpu_usermode_queue *queue) {
> + struct amdgpu_device *adev = uq_mgr->adev;
> + const struct amdgpu_userq_funcs *userq_funcs =
> + adev->userq_funcs[queue->queue_type];
> + int r = 0;
> +
> + if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
> + r = userq_funcs->map(uq_mgr, queue);
> + if (r) {
> + queue->state = AMDGPU_USERQ_STATE_HUNG;
> + } else {
> + queue->state = AMDGPU_USERQ_STATE_MAPPED;
> + }
> + }
> + return r;
> +}
> +
> static void
> amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
> struct amdgpu_usermode_queue *queue, @@ -79,7 +118,7
> @@ amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr)
> mutex_lock(&uq_mgr->userq_mutex);
> /* Resume all the queues for this process */
> idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
> - ret += queue->queue_active;
> + ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
>
> mutex_unlock(&uq_mgr->userq_mutex);
> return ret;
> @@ -254,9 +293,8 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int
> queue_id)
> struct amdgpu_fpriv *fpriv = filp->driver_priv;
> struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> struct amdgpu_device *adev = uq_mgr->adev;
> - const struct amdgpu_userq_funcs *uq_funcs;
> struct amdgpu_usermode_queue *queue;
> - int r;
> + int r = 0;
>
> cancel_delayed_work(&uq_mgr->resume_work);
> mutex_lock(&uq_mgr->userq_mutex);
> @@ -267,8 +305,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int
> queue_id)
> mutex_unlock(&uq_mgr->userq_mutex);
> return -EINVAL;
> }
> - uq_funcs = adev->userq_funcs[queue->queue_type];
> - r = uq_funcs->unmap(uq_mgr, queue);
> + r = amdgpu_userqueue_unmap_helper(uq_mgr, queue);
> amdgpu_bo_unpin(queue->db_obj.obj);
> amdgpu_bo_unref(&queue->db_obj.obj);
> amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); @@ -414,7
> +451,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> else
> skip_map_queue = false;
> if (!skip_map_queue) {
> - r = uq_funcs->map(uq_mgr, queue);
> + r = amdgpu_userqueue_map_helper(uq_mgr, queue);
> if (r) {
> mutex_unlock(&adev->userq_mutex);
> DRM_ERROR("Failed to map Queue\n");
> @@ -489,19 +526,19 @@ static int
> amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) {
> struct amdgpu_device *adev = uq_mgr->adev;
> - const struct amdgpu_userq_funcs *userq_funcs;
> struct amdgpu_usermode_queue *queue;
> int queue_id;
> - int ret = 0;
> + int ret = 0, r;
>
> /* Resume all the queues for this process */
> idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> - userq_funcs = adev->userq_funcs[queue->queue_type];
> - ret |= userq_funcs->map(uq_mgr, queue);
> + r = amdgpu_userqueue_map_helper(uq_mgr, queue);
> + if (r)
> + ret = r;
Can the error return code simplify as one variable ret through |= with the map/unmap return value?
Anyway, this patch is Reviewed-by: Prike Liang <Prike.Liang at amd.com>
Thanks,
Prike
> }
>
> if (ret)
> - DRM_ERROR("Failed to map all the queues\n");
> + dev_err(adev->dev, "Failed to map all the queues\n");
> return ret;
> }
>
> @@ -647,19 +684,19 @@ static int
> amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) {
> struct amdgpu_device *adev = uq_mgr->adev;
> - const struct amdgpu_userq_funcs *userq_funcs;
> struct amdgpu_usermode_queue *queue;
> int queue_id;
> - int ret = 0;
> + int ret = 0, r;
>
> /* Try to unmap all the queues in this process ctx */
> idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> - userq_funcs = adev->userq_funcs[queue->queue_type];
> - ret += userq_funcs->unmap(uq_mgr, queue);
> + r = amdgpu_userqueue_unmap_helper(uq_mgr, queue);
> + if (r)
> + ret = r;
> }
>
> if (ret)
> - DRM_ERROR("Couldn't unmap all the queues\n");
> + dev_err(adev->dev, "Couldn't unmap all the queues\n");
> return ret;
> }
>
> @@ -759,11 +796,10 @@ void amdgpu_userq_mgr_fini(struct
> amdgpu_userq_mgr *userq_mgr) int amdgpu_userq_suspend(struct
> amdgpu_device *adev) {
> u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> - const struct amdgpu_userq_funcs *userq_funcs;
> struct amdgpu_usermode_queue *queue;
> struct amdgpu_userq_mgr *uqm, *tmp;
> int queue_id;
> - int ret = 0;
> + int ret = 0, r;
>
> if (!ip_mask)
> return 0;
> @@ -772,8 +808,9 @@ int amdgpu_userq_suspend(struct amdgpu_device
> *adev)
> list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> cancel_delayed_work_sync(&uqm->resume_work);
> idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> - userq_funcs = adev->userq_funcs[queue->queue_type];
> - ret |= userq_funcs->unmap(uqm, queue);
> + r = amdgpu_userqueue_unmap_helper(uqm, queue);
> + if (r)
> + ret = r;
> }
> }
> mutex_unlock(&adev->userq_mutex);
> @@ -783,11 +820,10 @@ int amdgpu_userq_suspend(struct amdgpu_device
> *adev) int amdgpu_userq_resume(struct amdgpu_device *adev) {
> u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> - const struct amdgpu_userq_funcs *userq_funcs;
> struct amdgpu_usermode_queue *queue;
> struct amdgpu_userq_mgr *uqm, *tmp;
> int queue_id;
> - int ret = 0;
> + int ret = 0, r;
>
> if (!ip_mask)
> return 0;
> @@ -795,8 +831,9 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
> mutex_lock(&adev->userq_mutex);
> list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
> idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
> - userq_funcs = adev->userq_funcs[queue->queue_type];
> - ret |= userq_funcs->map(uqm, queue);
> + r = amdgpu_userqueue_map_helper(uqm, queue);
> + if (r)
> + ret = r;
> }
> }
> mutex_unlock(&adev->userq_mutex);
> @@ -807,11 +844,10 @@ int
> amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
> u32 idx)
> {
> u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> - const struct amdgpu_userq_funcs *userq_funcs;
> struct amdgpu_usermode_queue *queue;
> struct amdgpu_userq_mgr *uqm, *tmp;
> int queue_id;
> - int ret = 0;
> + int ret = 0, r;
>
> /* only need to stop gfx/compute */
> if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
> AMDGPU_HW_IP_COMPUTE)))) @@ -827,8 +863,9 @@ int
> amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
> if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
> (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
> &&
> (queue->xcp_id == idx)) {
> - userq_funcs = adev->userq_funcs[queue-
> >queue_type];
> - ret |= userq_funcs->unmap(uqm, queue);
> + r = amdgpu_userqueue_unmap_helper(uqm, queue);
> + if (r)
> + ret = r;
> }
> }
> }
> @@ -840,11 +877,10 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
> u32 idx)
> {
> u32 ip_mask = amdgpu_userqueue_get_supported_ip_mask(adev);
> - const struct amdgpu_userq_funcs *userq_funcs;
> struct amdgpu_usermode_queue *queue;
> struct amdgpu_userq_mgr *uqm, *tmp;
> int queue_id;
> - int ret = 0;
> + int ret = 0, r;
>
> /* only need to stop gfx/compute */
> if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 <<
> AMDGPU_HW_IP_COMPUTE)))) @@ -859,8 +895,9 @@ int
> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
> if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
> (queue->queue_type == AMDGPU_HW_IP_COMPUTE))
> &&
> (queue->xcp_id == idx)) {
> - userq_funcs = adev->userq_funcs[queue-
> >queue_type];
> - ret |= userq_funcs->map(uqm, queue);
> + r = amdgpu_userqueue_map_helper(uqm, queue);
> + if (r)
> + ret = r;
> }
> }
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> index b49f147eb69cb..8f392a0947a29 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h
> @@ -32,6 +32,13 @@
> #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
> #define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr,
> name)
>
> +enum amdgpu_userqueue_state {
> + AMDGPU_USERQ_STATE_UNMAPPED = 0,
> + AMDGPU_USERQ_STATE_MAPPED,
> + AMDGPU_USERQ_STATE_PREEMPTED,
> + AMDGPU_USERQ_STATE_HUNG,
> +};
> +
> struct amdgpu_mqd_prop;
>
> struct amdgpu_userq_obj {
> @@ -42,7 +49,7 @@ struct amdgpu_userq_obj {
>
> struct amdgpu_usermode_queue {
> int queue_type;
> - uint8_t queue_active;
> + enum amdgpu_userqueue_state state;
> uint64_t doorbell_handle;
> uint64_t doorbell_index;
> uint64_t flags;
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index b3157df8ae107..4c01c3a030956 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -120,9 +120,6 @@ static int mes_userq_map(struct amdgpu_userq_mgr
> *uq_mgr,
> struct mes_add_queue_input queue_input;
> int r;
>
> - if (queue->queue_active)
> - return 0;
> -
> memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
>
> queue_input.process_va_start = 0;
> @@ -155,7 +152,6 @@ static int mes_userq_map(struct amdgpu_userq_mgr
> *uq_mgr,
> return r;
> }
>
> - queue->queue_active = true;
> DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n",
> userq_props->doorbell_index);
> return 0;
> }
> @@ -168,9 +164,6 @@ static int mes_userq_unmap(struct amdgpu_userq_mgr
> *uq_mgr,
> struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> int r;
>
> - if (!queue->queue_active)
> - return 0;
> -
> memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
> queue_input.doorbell_offset = queue->doorbell_index;
> queue_input.gang_context_addr = ctx->gpu_addr +
> AMDGPU_USERQ_PROC_CTX_SZ; @@ -180,7 +173,6 @@ static int
> mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
> amdgpu_mes_unlock(&adev->mes);
> if (r)
> DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
> - queue->queue_active = false;
> return r;
> }
>
> --
> 2.49.0
More information about the amd-gfx
mailing list