[PATCH v5 7/9] drm/amdgpu: add userq va unmap validated helper

Tue Jul 8 12:33:21 UTC 2025

[Public]

Regards,
      Prike

> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig at amd.com>
> Sent: Tuesday, July 8, 2025 5:18 PM
> To: Liang, Prike <Prike.Liang at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> Subject: Re: [PATCH v5 7/9] drm/amdgpu: add userq va unmap validated helper
>
> On 08.07.25 09:32, Liang, Prike wrote:
> > [Public]
> >
> > Regards,
> >       Prike
> >
> >> -----Original Message-----
> >> From: Koenig, Christian <Christian.Koenig at amd.com>
> >> Sent: Monday, July 7, 2025 5:37 PM
> >> To: Liang, Prike <Prike.Liang at amd.com>; amd-gfx at lists.freedesktop.org
> >> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> >> Subject: Re: [PATCH v5 7/9] drm/amdgpu: add userq va unmap validated
> >> helper
> >>
> >>
> >>
> >> On 04.07.25 12:33, Prike Liang wrote:
> >>> This helper can validate the userq whether can be unmapped prior to
> >>> the userq VA GEM unmap.
> >>>
> >>> Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 78
> >>> +++++++++++++++++++++++  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> |
> >>> 3 +
> >>>  2 files changed, 81 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> index 25a35ab7395b..30838e5279bd 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> >>> @@ -1180,3 +1180,81 @@ int
> >> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device
> >> *adev,
> >>>     mutex_unlock(&adev->userq_mutex);
> >>>     return ret;
> >>>  }
> >>> +
> >>> +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct
> >> amdgpu_usermode_queue *queue,
> >>> +                                                   uint64_t va) {
> >>> +   va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END;
> >>
> >> Still clear NAK to doing that here.
> > It seems there's no existing VM helper for aligning the userspace VA into gem
> mapping range.
> > Without such alignment change, could you please give more detail how to covert
> the VA between the user input and GEM mapping range?
>
> Ok sounds like I wasn't clear enough what the problem is: Don't convert the VA
> between the user input and the GEM mapping ranges!
>
> Instead when the queue is resumed validate the VAs and make sure that all of them
> are mapped. See amdgpu_cs_find_mapping() for an example.
Thank you for the explanation. Yes, in the patch #5 amdgpu_userq_buffer_va_mapped() is using the userq VA to find out
its GEM mapping. But to use amdgpu_vm_bo_lookup_mapping() to find out the userq VA mapping, which requires masking
and aligning the useq input VA to the GEM mapping range first.

> When any of them are not mapped correctly then don't resume the queue.
>
> As second step we then need to figure out how to suspend the queue when any of
> those mappings change.

> Regards,
> Christian.
>
> >
> >>> +
> >>> +   switch (queue->queue_type) {
> >>> +   case AMDGPU_HW_IP_GFX:
> >>> +           if (queue->queue_va == va ||
> >>> +               queue->wptr_va  == va ||
> >>> +               queue->rptr_va  == va ||
> >>> +               queue->shadow_va == va ||
> >>> +               queue->csa_va  == va)
> >>> +                   return true;
> >>> +           break;
> >>> +   case AMDGPU_HW_IP_COMPUTE:
> >>> +             if (queue->queue_va == va ||
> >>> +                 queue->wptr_va == va ||
> >>> +                 queue->rptr_va  == va ||
> >>> +                 queue->eop_va  == va)
> >>> +                   return true;
> >>> +           break;
> >>> +   case AMDGPU_HW_IP_DMA:
> >>> +           if (queue->queue_va == va ||
> >>> +               queue->wptr_va == va ||
> >>> +               queue->rptr_va == va ||
> >>> +               queue->csa_va == va)
> >>> +                   return true;
> >>> +           break;
> >>> +   default:
> >>> +           break;
> >>> +   }
> >>> +
> >>> +   return false;
> >>> +}
> >>> +
> >>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_vm *vm,
> >>> +                           uint64_t va)
> >>
> >> I don't see an user of this function?
> >>
> >>> +{
> >>> +   struct amdgpu_fpriv *fpriv = vm_to_fpriv(vm);
> >>> +   struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> >>> +
> >>> +   if (&fpriv->vm == vm) {
> >>
> >> This check is complete nosense.
> >>
> >>> +           struct amdgpu_usermode_queue *queue;
> >>> +           int queue_id, r = 0;
> >>> +
> >>> +           if (mutex_trylock(&uq_mgr->userq_mutex)) {
> >>> +                   /* If here the userq bo is busy and needs to
> >>> + deactivate and
> >> prevent reusing it.*/
> >>> +                   idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
> >>> +                           struct dma_fence *f = queue->last_fence;
> >>> +
> >>> +
> >>> +                           if
> >> (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue, va)) {
> >>> +                                   dev_dbg(uq_mgr->adev->dev,
> >>> + "queue(id:%d)
> >> not belond to vm:%p\n",
> >>> +                                           queue_id,vm);
> >>> +                                   continue;
> >>> +                           }
> >>> +
> >>> +                           if (f && !dma_fence_is_signaled(f)) {
> >>> +
> >>> +                                   dev_warn(uq_mgr->adev->dev, "try
> >>> + to unmap
> >> the busy queue(id:%d):%p under vm:%p\n",
> >>> +                                           queue_id, queue, vm);
> >>> +                                   /* Need to set a resonable state
> >>> + for avoiding
> >> reusing this queue*/
> >>> +                                   queue->state =
> >> AMDGPU_USERQ_STATE_HUNG;
> >>> +                                   r++;
> >>> +                           }
> >>> +                   }
> >>> +                   mutex_unlock(&uq_mgr->userq_mutex);
> >>> +                   return r;
> >>> +           } else {
> >>> +                   /* do we need a try lock again before return*/
> >>> +                   return -EBUSY;
> >>> +           }
> >>> +
> >>> +   }
> >>> +
> >>> +   return 0;
> >>> +}
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> index 194ec7a6b3b2..08c49d738ec1 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> >>> @@ -31,6 +31,7 @@
> >>>  #define to_ev_fence(f) container_of(f, struct
> >>> amdgpu_eviction_fence,
> >>> base)  #define uq_mgr_to_fpriv(u) container_of(u, struct
> >>> amdgpu_fpriv,
> >>> userq_mgr)  #define work_to_uq_mgr(w, name) container_of(w, struct
> >>> amdgpu_userq_mgr, name)
> >>> +#define vm_to_fpriv(v)  container_of(v, struct amdgpu_fpriv, vm)
> >>
> >> NAK to that, the VM should not be casted to fpriv.
> >>
> >>>
> >>>  enum amdgpu_userq_state {
> >>>     AMDGPU_USERQ_STATE_UNMAPPED = 0, @@ -148,4 +149,6 @@ bool
> >>> amdgpu_userq_buffer_vas_mapped(struct
> >>> amdgpu_vm *vm,  int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm,
> >>> u64 addr);  int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm,
> >>>                     struct amdgpu_usermode_queue *queue);
> >>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_vm *vm,
> >>> +                           uint64_t va);
> >>>  #endif
> >