[PATCH v4 05/11] drm/amdgpu: add userq object va track helpers

Tue Jul 1 13:38:32 UTC 2025

[Public]

Regards,
      Prike

> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig at amd.com>
> Sent: Wednesday, June 25, 2025 4:02 PM
> To: Alex Deucher <alexdeucher at gmail.com>; Liang, Prike
> <Prike.Liang at amd.com>
> Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>
> Subject: Re: [PATCH v4 05/11] drm/amdgpu: add userq object va track helpers
>
> On 24.06.25 19:15, Alex Deucher wrote:
> > On Tue, Jun 24, 2025 at 4:54 AM Prike Liang <Prike.Liang at amd.com> wrote:
> >>
> >> Add the userq object virtual address get(),mapped() and put() helpers
> >> for tracking the userq obj va address usage.
> >>
> >> Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  | 132
> >> ++++++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |
> 14 +++
> >>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c |   4 +
> >>  3 files changed, 149 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> index a20f7ccbe98e..cbbd1860a69a 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >> @@ -76,6 +76,134 @@ int amdgpu_userq_input_va_validate(struct amdgpu_vm
> *vm, u64 addr,
> >>         return -EINVAL;
> >>  }
> >>
> >> +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr) {
> >> +       struct amdgpu_bo_va_mapping *mapping;
> >> +       u64 user_addr;
> >> +       int r;
> >> +
> >> +       user_addr = amdgpu_userq_va_align(addr);
> >> +       r = amdgpu_bo_reserve(vm->root.bo, false);
> >> +       if (r)
> >> +               return r;
> >> +
> >> +       mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> >> +       if (!mapping)
> >> +               goto out_err;
> >> +
> >> +       /*
> >> +        * Need to unify the following userq va reference.
> >> +        *  mqd  bo
> >> +        *  rptr bo
> >> +        *  wptr bo
> >> +        *  eop  bo
> >> +        *  doorbell bo
> >> +        */
> >> +       /*amdgpu_bo_ref(mapping->bo_va->base.bo);*/
> >> +       mapping->bo_va->queue_refcount++;
> >> +
> >> +       amdgpu_bo_unreserve(vm->root.bo);
> >> +       return 0;
> >> +
> >> +out_err:
> >> +       amdgpu_bo_unreserve(vm->root.bo);
> >> +       return -EINVAL;
> >> +}
> >> +
> >> +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) {
> >> +       struct amdgpu_bo_va_mapping *mapping;
> >> +       u64 user_addr;
> >> +       bool r;
> >> +
> >> +       user_addr = amdgpu_userq_va_align(addr);
> >> +
> >> +       if (amdgpu_bo_reserve(vm->root.bo, false))
> >> +               return false;
> >> +
> >> +       mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> >> +       if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->queue_refcount >
> 0)
> >> +               r = true;
> >> +       else
> >> +               r = false;
> >> +       amdgpu_bo_unreserve(vm->root.bo);
> >> +
> >> +       return r;
> >> +}
> >> +
> >> +bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> >> +                       struct amdgpu_usermode_queue *queue) {
> >> +
> >> +       if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va) ||
> >> +           amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
> >> +           amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
> >> +           amdgpu_userq_buffer_va_mapped(vm, queue->eop_va) ||
> >> +           amdgpu_userq_buffer_va_mapped(vm, queue->shadow_va) ||
> >> +           amdgpu_userq_buffer_va_mapped(vm, queue->csa_va))
> >
> > Only some of these are relevant for each queue type.  Rather than
> > checking all of the critical buffers in all of your helper functions,
> > it might be cleaner to add new userq callbacks that only check/update
> > the relevant buffers for the queue type.
Yes, validating on a per-queue basis will be clearer for each userq type. But that will
be more complicated and require more checkpoints. In this current proposal, the irrelevant
userq virtual address will be invalid and return early place at amdgpu_vm_bo_lookup_mapping(),
and the current implement seems to be more uniform and simpler. Though?

> >> +               return true;
> >> +       else
> >> +               return false;
> >> +}
> >> +
> >> +int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr) {
> >> +       struct amdgpu_bo_va_mapping *mapping;
> >> +       u64 user_addr;
> >> +       int r;
> >> +
> >> +       user_addr = amdgpu_userq_va_align(addr);
> >> +       r = amdgpu_bo_reserve(vm->root.bo, false);
> >> +       if (r)
> >> +               return r;
> >> +
> >> +       mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> >> +       if (!mapping)
> >> +               goto out_err;
> >> +       /*
> >> +        * If here refer to the userq VM BOs and keep the usage count by
> >> +        * amdgpu_bo_ref(mapping->bo_va->base.bo) at creating the userq IOCTL,
> >> +        * this reference and usage counter will be kept until
> amdgpu_userq_destroy(),
> >> +        * while the userq VA is unmapped at amdgpu_vm_bo_unmap(), which is
> ahead of
> >> +        * amdgpu_userq_destroy(). So, when amdgpu_vm_bo_unmap() tries to
> unmap the
> >> +        * userq VA mapping, it will result in an unmap error caused by the BO's
> reference.
> >> +        */
> >> +       /*amdgpu_bo_unref(&mapping->bo_va->base.bo);*/
> >
> > I still don't follow this.  Why can't we get a reference in userq
> > create and put the reference in userq destroy.  If the app is freeing
> > the buffers before the queue, that's an app bug.
>
> We should probably just signal the eviction fence when the application tries to
> unmap a BO which is vital for an user queue.
>
> On restoring the evicted queue we then check again if all the VAs needed for this
> queue are valid and if they aren't we deny resuming the queue.
>
> Christian.
>
> >
> > Alex
> >
> >> +
> >> +       mapping->bo_va->queue_refcount--;
> >> +
> >> +       amdgpu_bo_unreserve(vm->root.bo);
> >> +       return 0;
> >> +
> >> +out_err:
> >> +       amdgpu_bo_unreserve(vm->root.bo);
> >> +       return -EINVAL;
> >> +}
> >> +
> >> +static void amdgpu_userq_buffer_vas_get(struct amdgpu_vm *vm,
> >> +                       struct amdgpu_usermode_queue *queue) {
> >> +       amdgpu_userq_buffer_va_get(vm, queue->queue_va);
> >> +       amdgpu_userq_buffer_va_get(vm, queue->rptr_va);
> >> +       amdgpu_userq_buffer_va_get(vm, queue->wptr_va);
> >> +       amdgpu_userq_buffer_va_get(vm, queue->eop_va);
> >> +       amdgpu_userq_buffer_va_get(vm, queue->shadow_va);
> >> +       amdgpu_userq_buffer_va_get(vm, queue->csa_va); }
> >> +
> >> +int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> >> +                       struct amdgpu_usermode_queue *queue) {
> >> +       amdgpu_userq_buffer_va_put(vm, queue->queue_va);
> >> +       amdgpu_userq_buffer_va_put(vm, queue->rptr_va);
> >> +       amdgpu_userq_buffer_va_put(vm, queue->wptr_va);
> >> +       amdgpu_userq_buffer_va_put(vm, queue->eop_va);
> >> +       amdgpu_userq_buffer_va_put(vm, queue->shadow_va);
> >> +       amdgpu_userq_buffer_va_put(vm, queue->csa_va);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >>  static int
> >>  amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> >>                           struct amdgpu_usermode_queue *queue) @@
> >> -442,6 +570,9 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> >>         queue->queue_type = args->in.ip_type;
> >>         queue->vm = &fpriv->vm;
> >>         queue->priority = priority;
> >> +       queue->queue_va = args->in.queue_va;
> >> +       queue->rptr_va = args->in.rptr_va;
> >> +       queue->wptr_va = args->in.wptr_va;
> >>
> >>         db_info.queue_type = queue->queue_type;
> >>         db_info.doorbell_handle = queue->doorbell_handle; @@ -472,7
> >> +603,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> >>                 goto unlock;
> >>         }
> >>
> >> -
> >>         qid = idr_alloc(&uq_mgr->userq_idr, queue, 1,
> AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
> >>         if (qid < 0) {
> >>                 drm_file_err(uq_mgr->file, "Failed to allocate a
> >> queue id\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >> index 704935ca0c36..194ec7a6b3b2 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >> @@ -52,6 +52,13 @@ struct amdgpu_usermode_queue {
> >>         enum amdgpu_userq_state state;
> >>         uint64_t                doorbell_handle;
> >>         uint64_t                doorbell_index;
> >> +       uint64_t                queue_va;
> >> +       uint64_t                rptr_va;
> >> +       uint64_t                wptr_va;
> >> +       uint64_t                eop_va;
> >> +       uint64_t                shadow_va;
> >> +       uint64_t                csa_va;
> >> +
> >>         uint64_t                flags;
> >>         struct amdgpu_mqd_prop  *userq_prop;
> >>         struct amdgpu_userq_mgr *userq_mgr; @@ -134,4 +141,11 @@ int
> >> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device
> >> *adev,
> >>
> >>  int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
> >>                         u64 expected_size);
> >> +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr); bool
> >> +amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr); bool
> >> +amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> >> +                       struct amdgpu_usermode_queue *queue); int
> >> +amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr); int
> >> +amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> >> +                       struct amdgpu_usermode_queue *queue);
> >>  #endif
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >> index 4615d3fba530..c9cde14064d1 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> >> @@ -263,6 +263,7 @@ static int mes_userq_mqd_create(struct
> amdgpu_userq_mgr *uq_mgr,
> >>                 userq_props->hqd_active = false;
> >>                 userq_props->tmz_queue =
> >>                         mqd_user->flags &
> >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
> >> +               queue->eop_va = compute_mqd->eop_va;
> >>                 kfree(compute_mqd);
> >>         } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
> >>                 struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; @@
> >> -284,6 +285,8 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr
> *uq_mgr,
> >>                 userq_props->csa_addr = mqd_gfx_v11->csa_va;
> >>                 userq_props->tmz_queue =
> >>                         mqd_user->flags &
> >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
> >> +               queue->shadow_va = mqd_gfx_v11->shadow_va;
> >> +               queue->csa_va = mqd_gfx_v11->csa_va;
> >>
> >>                 if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11-
> >shadow_va,
> >>                                         shadow_info.shadow_size)) {
> >> @@ -317,6 +320,7 @@ static int mes_userq_mqd_create(struct
> amdgpu_userq_mgr *uq_mgr,
> >>                 }
> >>
> >>                 userq_props->csa_addr = mqd_sdma_v11->csa_va;
> >> +               queue->csa_va = mqd_sdma_v11->csa_va;
> >>                 kfree(mqd_sdma_v11);
> >>         }
> >>
> >> --
> >> 2.34.1
> >>