[PATCH v4 05/11] drm/amdgpu: add userq object va track helpers

Tue Jul 1 13:43:21 UTC 2025

[Public]

Regards,
      Prike

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Liang, Prike
> Sent: Tuesday, July 1, 2025 9:39 PM
> To: Koenig, Christian <Christian.Koenig at amd.com>; Alex Deucher
> <alexdeucher at gmail.com>
> Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>
> Subject: RE: [PATCH v4 05/11] drm/amdgpu: add userq object va track helpers
>
> [Public]
>
> Regards,
>       Prike
>
> > -----Original Message-----
> > From: Koenig, Christian <Christian.Koenig at amd.com>
> > Sent: Wednesday, June 25, 2025 4:02 PM
> > To: Alex Deucher <alexdeucher at gmail.com>; Liang, Prike
> > <Prike.Liang at amd.com>
> > Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> > <Alexander.Deucher at amd.com>
> > Subject: Re: [PATCH v4 05/11] drm/amdgpu: add userq object va track
> > helpers
> >
> > On 24.06.25 19:15, Alex Deucher wrote:
> > > On Tue, Jun 24, 2025 at 4:54 AM Prike Liang <Prike.Liang at amd.com> wrote:
> > >>
> > >> Add the userq object virtual address get(),mapped() and put()
> > >> helpers for tracking the userq obj va address usage.
> > >>
> > >> Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > >> ---
> > >>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  | 132
> > >> ++++++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |
> > 14 +++
> > >>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c |   4 +
> > >>  3 files changed, 149 insertions(+), 1 deletion(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > >> index a20f7ccbe98e..cbbd1860a69a 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > >> @@ -76,6 +76,134 @@ int amdgpu_userq_input_va_validate(struct
> > >> amdgpu_vm
> > *vm, u64 addr,
> > >>         return -EINVAL;
> > >>  }
> > >>
> > >> +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr) {
> > >> +       struct amdgpu_bo_va_mapping *mapping;
> > >> +       u64 user_addr;
> > >> +       int r;
> > >> +
> > >> +       user_addr = amdgpu_userq_va_align(addr);
> > >> +       r = amdgpu_bo_reserve(vm->root.bo, false);
> > >> +       if (r)
> > >> +               return r;
> > >> +
> > >> +       mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> > >> +       if (!mapping)
> > >> +               goto out_err;
> > >> +
> > >> +       /*
> > >> +        * Need to unify the following userq va reference.
> > >> +        *  mqd  bo
> > >> +        *  rptr bo
> > >> +        *  wptr bo
> > >> +        *  eop  bo
> > >> +        *  doorbell bo
> > >> +        */
> > >> +       /*amdgpu_bo_ref(mapping->bo_va->base.bo);*/
> > >> +       mapping->bo_va->queue_refcount++;
> > >> +
> > >> +       amdgpu_bo_unreserve(vm->root.bo);
> > >> +       return 0;
> > >> +
> > >> +out_err:
> > >> +       amdgpu_bo_unreserve(vm->root.bo);
> > >> +       return -EINVAL;
> > >> +}
> > >> +
> > >> +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) {
> > >> +       struct amdgpu_bo_va_mapping *mapping;
> > >> +       u64 user_addr;
> > >> +       bool r;
> > >> +
> > >> +       user_addr = amdgpu_userq_va_align(addr);
> > >> +
> > >> +       if (amdgpu_bo_reserve(vm->root.bo, false))
> > >> +               return false;
> > >> +
> > >> +       mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> > >> +       if (!IS_ERR_OR_NULL(mapping) &&
> > >> + mapping->bo_va->queue_refcount >
> > 0)
> > >> +               r = true;
> > >> +       else
> > >> +               r = false;
> > >> +       amdgpu_bo_unreserve(vm->root.bo);
> > >> +
> > >> +       return r;
> > >> +}
> > >> +
> > >> +bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> > >> +                       struct amdgpu_usermode_queue *queue) {
> > >> +
> > >> +       if (amdgpu_userq_buffer_va_mapped(vm, queue->queue_va) ||
> > >> +           amdgpu_userq_buffer_va_mapped(vm, queue->rptr_va) ||
> > >> +           amdgpu_userq_buffer_va_mapped(vm, queue->wptr_va) ||
> > >> +           amdgpu_userq_buffer_va_mapped(vm, queue->eop_va) ||
> > >> +           amdgpu_userq_buffer_va_mapped(vm, queue->shadow_va) ||
> > >> +           amdgpu_userq_buffer_va_mapped(vm, queue->csa_va))
> > >
> > > Only some of these are relevant for each queue type.  Rather than
> > > checking all of the critical buffers in all of your helper
> > > functions, it might be cleaner to add new userq callbacks that only
> > > check/update the relevant buffers for the queue type.
> Yes, validating on a per-queue basis will be clearer for each userq type. But that will
> be more complicated and require more checkpoints. In this current proposal, the
> irrelevant userq virtual address will be invalid and return early place at
> amdgpu_vm_bo_lookup_mapping(), and the current implement seems to be more
> uniform and simpler. Thought?
Idea? If someone still prefer to do the per queue validation, then I will do that.

> > >> +               return true;
> > >> +       else
> > >> +               return false;
> > >> +}
> > >> +
> > >> +int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr) {
> > >> +       struct amdgpu_bo_va_mapping *mapping;
> > >> +       u64 user_addr;
> > >> +       int r;
> > >> +
> > >> +       user_addr = amdgpu_userq_va_align(addr);
> > >> +       r = amdgpu_bo_reserve(vm->root.bo, false);
> > >> +       if (r)
> > >> +               return r;
> > >> +
> > >> +       mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
> > >> +       if (!mapping)
> > >> +               goto out_err;
> > >> +       /*
> > >> +        * If here refer to the userq VM BOs and keep the usage count by
> > >> +        * amdgpu_bo_ref(mapping->bo_va->base.bo) at creating the userq
> IOCTL,
> > >> +        * this reference and usage counter will be kept until
> > amdgpu_userq_destroy(),
> > >> +        * while the userq VA is unmapped at amdgpu_vm_bo_unmap(),
> > >> + which is
> > ahead of
> > >> +        * amdgpu_userq_destroy(). So, when amdgpu_vm_bo_unmap()
> > >> + tries to
> > unmap the
> > >> +        * userq VA mapping, it will result in an unmap error
> > >> + caused by the BO's
> > reference.
> > >> +        */
> > >> +       /*amdgpu_bo_unref(&mapping->bo_va->base.bo);*/
> > >
> > > I still don't follow this.  Why can't we get a reference in userq
> > > create and put the reference in userq destroy.  If the app is
> > > freeing the buffers before the queue, that's an app bug.
> >
> > We should probably just signal the eviction fence when the application
> > tries to unmap a BO which is vital for an user queue.
> >
> > On restoring the evicted queue we then check again if all the VAs
> > needed for this queue are valid and if they aren't we deny resuming the queue.
> >
> > Christian.
> >
> > >
> > > Alex
> > >
> > >> +
> > >> +       mapping->bo_va->queue_refcount--;
> > >> +
> > >> +       amdgpu_bo_unreserve(vm->root.bo);
> > >> +       return 0;
> > >> +
> > >> +out_err:
> > >> +       amdgpu_bo_unreserve(vm->root.bo);
> > >> +       return -EINVAL;
> > >> +}
> > >> +
> > >> +static void amdgpu_userq_buffer_vas_get(struct amdgpu_vm *vm,
> > >> +                       struct amdgpu_usermode_queue *queue) {
> > >> +       amdgpu_userq_buffer_va_get(vm, queue->queue_va);
> > >> +       amdgpu_userq_buffer_va_get(vm, queue->rptr_va);
> > >> +       amdgpu_userq_buffer_va_get(vm, queue->wptr_va);
> > >> +       amdgpu_userq_buffer_va_get(vm, queue->eop_va);
> > >> +       amdgpu_userq_buffer_va_get(vm, queue->shadow_va);
> > >> +       amdgpu_userq_buffer_va_get(vm, queue->csa_va); }
> > >> +
> > >> +int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> > >> +                       struct amdgpu_usermode_queue *queue) {
> > >> +       amdgpu_userq_buffer_va_put(vm, queue->queue_va);
> > >> +       amdgpu_userq_buffer_va_put(vm, queue->rptr_va);
> > >> +       amdgpu_userq_buffer_va_put(vm, queue->wptr_va);
> > >> +       amdgpu_userq_buffer_va_put(vm, queue->eop_va);
> > >> +       amdgpu_userq_buffer_va_put(vm, queue->shadow_va);
> > >> +       amdgpu_userq_buffer_va_put(vm, queue->csa_va);
> > >> +
> > >> +       return 0;
> > >> +}
> > >> +
> > >>  static int
> > >>  amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
> > >>                           struct amdgpu_usermode_queue *queue) @@
> > >> -442,6 +570,9 @@ amdgpu_userq_create(struct drm_file *filp, union
> > drm_amdgpu_userq *args)
> > >>         queue->queue_type = args->in.ip_type;
> > >>         queue->vm = &fpriv->vm;
> > >>         queue->priority = priority;
> > >> +       queue->queue_va = args->in.queue_va;
> > >> +       queue->rptr_va = args->in.rptr_va;
> > >> +       queue->wptr_va = args->in.wptr_va;
> > >>
> > >>         db_info.queue_type = queue->queue_type;
> > >>         db_info.doorbell_handle = queue->doorbell_handle; @@ -472,7
> > >> +603,6 @@ amdgpu_userq_create(struct drm_file *filp, union
> > drm_amdgpu_userq *args)
> > >>                 goto unlock;
> > >>         }
> > >>
> > >> -
> > >>         qid = idr_alloc(&uq_mgr->userq_idr, queue, 1,
> > AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
> > >>         if (qid < 0) {
> > >>                 drm_file_err(uq_mgr->file, "Failed to allocate a
> > >> queue id\n"); diff --git
> > >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > >> index 704935ca0c36..194ec7a6b3b2 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> > >> @@ -52,6 +52,13 @@ struct amdgpu_usermode_queue {
> > >>         enum amdgpu_userq_state state;
> > >>         uint64_t                doorbell_handle;
> > >>         uint64_t                doorbell_index;
> > >> +       uint64_t                queue_va;
> > >> +       uint64_t                rptr_va;
> > >> +       uint64_t                wptr_va;
> > >> +       uint64_t                eop_va;
> > >> +       uint64_t                shadow_va;
> > >> +       uint64_t                csa_va;
> > >> +
> > >>         uint64_t                flags;
> > >>         struct amdgpu_mqd_prop  *userq_prop;
> > >>         struct amdgpu_userq_mgr *userq_mgr; @@ -134,4 +141,11 @@
> > >> int amdgpu_userq_start_sched_for_enforce_isolation(struct
> > >> amdgpu_device *adev,
> > >>
> > >>  int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
> > >>                         u64 expected_size);
> > >> +int amdgpu_userq_buffer_va_get(struct amdgpu_vm *vm, u64 addr);
> > >> +bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64
> > >> +addr); bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_vm *vm,
> > >> +                       struct amdgpu_usermode_queue *queue); int
> > >> +amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, u64 addr); int
> > >> +amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> > >> +                       struct amdgpu_usermode_queue *queue);
> > >>  #endif
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > >> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > >> index 4615d3fba530..c9cde14064d1 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> > >> @@ -263,6 +263,7 @@ static int mes_userq_mqd_create(struct
> > amdgpu_userq_mgr *uq_mgr,
> > >>                 userq_props->hqd_active = false;
> > >>                 userq_props->tmz_queue =
> > >>                         mqd_user->flags &
> > >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
> > >> +               queue->eop_va = compute_mqd->eop_va;
> > >>                 kfree(compute_mqd);
> > >>         } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
> > >>                 struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; @@
> > >> -284,6 +285,8 @@ static int mes_userq_mqd_create(struct
> > >> amdgpu_userq_mgr
> > *uq_mgr,
> > >>                 userq_props->csa_addr = mqd_gfx_v11->csa_va;
> > >>                 userq_props->tmz_queue =
> > >>                         mqd_user->flags &
> > >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
> > >> +               queue->shadow_va = mqd_gfx_v11->shadow_va;
> > >> +               queue->csa_va = mqd_gfx_v11->csa_va;
> > >>
> > >>                 if (amdgpu_userq_input_va_validate(queue->vm,
> > >> mqd_gfx_v11-
> > >shadow_va,
> > >>                                         shadow_info.shadow_size)) {
> > >> @@ -317,6 +320,7 @@ static int mes_userq_mqd_create(struct
> > amdgpu_userq_mgr *uq_mgr,
> > >>                 }
> > >>
> > >>                 userq_props->csa_addr = mqd_sdma_v11->csa_va;
> > >> +               queue->csa_va = mqd_sdma_v11->csa_va;
> > >>                 kfree(mqd_sdma_v11);
> > >>         }
> > >>
> > >> --
> > >> 2.34.1
> > >>