[PATCH v10 4/6] drm/amdgpu: Implement userqueue signal/wait IOCTL
Christian König
christian.koenig at amd.com
Mon May 13 15:22:58 UTC 2024
Am 10.05.24 um 10:50 schrieb Arunpravin Paneer Selvam:
> This patch introduces new IOCTL for userqueue secure semaphore.
>
> The signal IOCTL called from userspace application creates a drm
> syncobj and array of bo GEM handles and passed in as parameter to
> the driver to install the fence into it.
>
> The wait IOCTL gets an array of drm syncobjs, finds the fences
> attached to the drm syncobjs and obtain the array of
> memory_address/fence_value combintion which are returned to
> userspace.
>
> v2: (Christian)
> - Install fence into GEM BO object.
> - Lock all BO's using the dma resv subsystem
> - Reorder the sequence in signal IOCTL function.
> - Get write pointer from the shadow wptr
> - use userq_fence to fetch the va/value in wait IOCTL.
>
> v3: (Christian)
> - Use drm_exec helper for the proper BO drm reserve and avoid BO
> lock/unlock issues.
> - fence/fence driver reference count logic for signal/wait IOCTLs.
>
> v4: (Christian)
> - Fixed the drm_exec calling sequence
> - use dma_resv_for_each_fence_unlock if BO's are not locked
> - Modified the fence_info array storing logic.
>
> v5: (Christian)
> - Keep fence_drv until wait queue execution.
> - Add dma_fence_wait for other fences.
> - Lock BO's using drm_exec as the number of fences in them could
> change.
> - Install signaled fences as well into BO/Syncobj.
> - Move Syncobj fence installation code after the drm_exec_prepare_array.
> - Directly add dma_resv_usage_rw(args->bo_flags....
> - remove unnecessary dma_fence_put.
>
> v6: (Christian)
> - Add xarray stuff to store the fence_drv
> - Implement a function to iterate over the xarray and drop
> the fence_drv references.
> - Add drm_exec_until_all_locked() wrapper
> - Add a check that if we haven't exceeded the user allocated num_fences
> before adding dma_fence to the fences array.
>
> v7: (Christian)
> - Use memdup_user() for kmalloc_array + copy_from_user
> - Move the fence_drv references from the xarray into the newly created fence
> and drop the fence_drv references when we signal this fence.
> - Move this locking of BOs before the "if (!wait_info->num_fences)",
> this way you need this code block only once.
> - Merge the error handling code and the cleanup + return 0 code.
> - Initializing the xa should probably be done in the userq code.
> - Remove the userq back pointer stored in fence_drv.
> - Pass xarray as parameter in amdgpu_userq_walk_and_drop_fence_drv()
>
> v8: (Christian)
> - Move fence_drv references must come before adding the fence to the list.
> - Use xa_lock_irqsave_nested for nested spinlock operations.
> - userq_mgr should be per fpriv and not one per device.
> - Restructure the interrupt process code for the early exit of the loop.
> - The reference acquired in the syncobj fence replace code needs to be
> kept around.
> - Modify the dma_fence acquire placement in wait IOCTL.
> - Move USERQ_BO_WRITE flag to UAPI header file.
> - drop the fence drv reference after telling the hw to stop accessing it.
> - Add multi sync object support to userq signal IOCTL.
>
> v9: (Christian)
> - Store all the fence_drv ref to other drivers and not ourself.
> - Iterate over uq_fence_drv_xa without holding a lock.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> Suggested-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 431 +++++++++++++++++-
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 6 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 29 +-
> .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 +
> 5 files changed, 462 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 844f7b5f90db..5892a4c1a92e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2918,6 +2918,8 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> };
>
> static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index f7baea2c67ab..339d82d5808f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -25,6 +25,7 @@
> #include <linux/kref.h>
> #include <linux/slab.h>
>
> +#include <drm/drm_exec.h>
> #include <drm/drm_syncobj.h>
>
> #include "amdgpu.h"
> @@ -92,6 +93,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> spin_lock_init(&fence_drv->fence_list_lock);
>
> fence_drv->adev = adev;
> + fence_drv->uq_fence_drv_xa_ref = &userq->uq_fence_drv_xa;
> fence_drv->context = dma_fence_context_alloc(1);
> get_task_comm(fence_drv->timeline_name, current);
>
> @@ -102,8 +104,11 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
>
> void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
> {
> + struct amdgpu_userq_fence_driver *stored_fence_drv;
> struct amdgpu_userq_fence *userq_fence, *tmp;
> + unsigned long index, flags;
> struct dma_fence *fence;
> + struct xarray *xa;
> u64 rptr;
>
> if (!fence_drv)
> @@ -114,15 +119,25 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
> spin_lock(&fence_drv->fence_list_lock);
> list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
> fence = &userq_fence->base;
> + xa = &userq_fence->fence_drv_xa;
>
> - if (rptr >= fence->seqno) {
> - dma_fence_signal(fence);
> - list_del(&userq_fence->link);
> -
> - dma_fence_put(fence);
> - } else {
> + if (rptr < fence->seqno)
> break;
> +
> + dma_fence_signal(fence);
> + /*
> + * Walk over the fence_drv xarray and drop the old wait ioctl
> + * fence_drv references.
> + */
> + xa_lock_irqsave_nested(xa, flags, SINGLE_DEPTH_NESTING);
You can just use xa_lock_irq() here, no need for the _nested() variant.
Otherwise you just eventually confuse lockdep into printing false negatives.
> + xa_for_each(xa, index, stored_fence_drv) {
> + __xa_erase(xa, index);
> + amdgpu_userq_fence_driver_put(stored_fence_drv);
> }
> + xa_unlock_irqrestore(xa, flags);
> +
> + list_del(&userq_fence->link);
> + dma_fence_put(fence);
> }
> spin_unlock(&fence_drv->fence_list_lock);
> }
> @@ -188,9 +203,29 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
> dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
> fence_drv->context, seq);
>
> + xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_LOCK_IRQ);
> +
> amdgpu_userq_fence_driver_get(fence_drv);
> dma_fence_get(fence);
>
> + if (!xa_empty(&userq->uq_fence_drv_xa)) {
> + struct amdgpu_userq_fence_driver *stored_fence_drv;
> + unsigned long index;
> +
> + /*
> + * Move fence_drv references of old signal IOCTL calls to the
> + * newly created userq fence xarray.
> + */
> + xa_lock(&userq->uq_fence_drv_xa);
> + xa_for_each(&userq->uq_fence_drv_xa, index, stored_fence_drv) {
> + __xa_store(&userq_fence->fence_drv_xa, index,
> + stored_fence_drv, GFP_KERNEL);
Again that won't work like this.
__xa_store() can't allocate memory using GFP_KERNEL and also can't drop
the lock of uq_fence_drv_xa.
> + /* Erase fence_drv reference entry from userq xarray */
> + __xa_erase(&userq->uq_fence_drv_xa, index);
> + }
> + xa_unlock(&userq->uq_fence_drv_xa);
> + }
> +
> spin_lock(&fence_drv->fence_list_lock);
> /* Check if hardware has already processed the job */
> if (!dma_fence_is_signaled(fence))
> @@ -240,6 +275,8 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
>
> /* Release the fence driver reference */
> amdgpu_userq_fence_driver_put(fence_drv);
> +
> + xa_destroy(&userq_fence->fence_drv_xa);
> kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
> }
>
> @@ -255,3 +292,385 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = {
> .signaled = amdgpu_userq_fence_signaled,
> .release = amdgpu_userq_fence_release,
> };
> +
> +/**
> + * amdgpu_userq_fence_read_wptr - Read the userq wptr value
> + *
> + * @filp: drm file private data structure
> + * @queue: user mode queue structure pointer
> + * @wptr: write pointer value
> + *
> + * Read the wptr value from userq's MQD. The userq signal IOCTL
> + * creates a dma_fence for the shared buffers that expects the
> + * RPTR value written to seq64 memory >= WPTR.
> + *
> + * Returns wptr value on success, error on failure.
> + */
> +static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
> + struct amdgpu_usermode_queue *queue,
> + u64 *wptr)
> +{
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> + struct amdgpu_bo_va_mapping *mapping;
> + struct amdgpu_vm *vm = &fpriv->vm;
> + struct amdgpu_bo *bo;
> + u64 addr, *ptr;
> + int r;
> +
> + addr = queue->userq_prop->wptr_gpu_addr;
> + addr &= AMDGPU_GMC_HOLE_MASK;
> +
> + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr >> PAGE_SHIFT);
> + if (!mapping)
> + return -EINVAL;
> +
> + bo = mapping->bo_va->base.bo;
> + r = amdgpu_bo_reserve(bo, true);
> + if (r) {
> + DRM_ERROR("Failed to reserve userqueue wptr bo");
> + return r;
> + }
> +
> + r = amdgpu_bo_kmap(bo, (void **)&ptr);
> + if (r) {
> + DRM_ERROR("Failed mapping the userqueue wptr bo");
> + goto map_error;
> + }
> +
> + *wptr = le64_to_cpu(*ptr);
> +
> + amdgpu_bo_kunmap(bo);
> + amdgpu_bo_unreserve(bo);
> +
> + return 0;
> +
> +map_error:
> + amdgpu_bo_unreserve(bo);
> + return r;
> +}
> +
> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
> + struct drm_amdgpu_userq_signal *args = data;
> + struct amdgpu_usermode_queue *queue;
> + struct drm_gem_object **gobj = NULL;
> + struct drm_syncobj **syncobj = NULL;
> + u32 *syncobj_handles, num_syncobj_handles;
> + u32 *bo_handles, num_bo_handles;
> + struct dma_fence *fence;
> + struct drm_exec exec;
> + int r, i, entry;
> + u64 wptr;
> +
> + /* Array of syncobj handles */
> + num_syncobj_handles = args->num_syncobj_handles;
> + syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles_array),
> + sizeof(u32) * num_syncobj_handles);
> + if (IS_ERR(syncobj_handles))
> + return PTR_ERR(syncobj_handles);
> +
> + /* Array of syncobj object handles */
Drop that comment, it's actually incorrect.
The other "Array of ...." comments are not very valuable either.
Rather comment why you do something and not what you do.
> + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
> + if (!syncobj) {
> + r = -ENOMEM;
> + goto free_syncobj_handles;
> + }
> +
> + for (entry = 0; entry < num_syncobj_handles; entry++) {
> + syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
> + if (!syncobj[entry]) {
> + r = -ENOENT;
> + goto free_syncobj_handles;
> + }
> + }
> +
> + /* Array of bo handles */
> + num_bo_handles = args->num_bo_handles;
> + bo_handles = memdup_user(u64_to_user_ptr(args->bo_handles_array),
> + sizeof(u32) * num_bo_handles);
> + if (IS_ERR(bo_handles))
> + goto free_syncobj_handles;
> +
> + /* Array of GEM object handles */
> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> + if (!gobj) {
> + r = -ENOMEM;
> + goto free_bo_handles;
> + }
> +
> + for (entry = 0; entry < num_bo_handles; entry++) {
> + gobj[entry] = drm_gem_object_lookup(filp, bo_handles[entry]);
> + if (!gobj[entry]) {
> + r = -ENOENT;
> + goto put_gobj;
> + }
> + }
> +
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
E.g. here you can add something like /* Lock all BOs with retry handling */
And if you give num_bo_handles as last parameter it would making
allocation of the array more efficient.
> + drm_exec_until_all_locked(&exec) {
> + r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 1);
> + drm_exec_retry_on_contention(&exec);
> + if (r)
> + goto exec_fini;
> + }
> +
> + /*Retrieve the user queue */
> + queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
> + if (!queue) {
> + r = -ENOENT;
> + goto exec_fini;
> + }
> +
> + r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
> + if (r)
> + goto exec_fini;
> +
> + /* Create a new fence */
> + r = amdgpu_userq_fence_create(queue, wptr, &fence);
> + if (r)
> + goto exec_fini;
> +
> + for (i = 0; i < num_bo_handles; i++)
> + dma_resv_add_fence(gobj[i]->resv, fence,
> + dma_resv_usage_rw(args->bo_flags &
> + AMDGPU_USERQ_BO_WRITE));
> +
> + /* Add the created fence to syncobj/BO's */
> + for (i = 0; i < num_syncobj_handles; i++)
> + drm_syncobj_replace_fence(syncobj[i], fence);
I think it would be really nice to have the ability have a timeline
point here.
> +
> + /* drop the reference acquired in fence creation function */
> + dma_fence_put(fence);
> +
> +exec_fini:
> + drm_exec_fini(&exec);
> +put_gobj:
> + while (entry-- > 0)
> + drm_gem_object_put(gobj[entry]);
> + kfree(gobj);
> +free_bo_handles:
> + kfree(bo_handles);
> +free_syncobj_handles:
> + while (i-- > 0)
> + drm_syncobj_put(syncobj[i]);
> + kfree(syncobj_handles);
> +
> + return r;
> +}
> +
> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp)
> +{
> + struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> + struct drm_amdgpu_userq_wait *wait_info = data;
> + u32 *syncobj_handles, *bo_handles;
> + struct dma_fence **fences = NULL;
> + u32 num_syncobj, num_bo_handles;
> + struct drm_gem_object **gobj;
> + struct drm_exec exec;
> + int r, i, entry, cnt;
> + u64 num_fences = 0;
> +
> + num_bo_handles = wait_info->num_bo_handles;
> + bo_handles = memdup_user(u64_to_user_ptr(wait_info->bo_handles_array),
> + sizeof(u32) * num_bo_handles);
> + if (IS_ERR(bo_handles))
> + return PTR_ERR(bo_handles);
> +
> + num_syncobj = wait_info->num_syncobj_handles;
> + syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles_array),
> + sizeof(u32) * num_syncobj);
> + if (IS_ERR(syncobj_handles)) {
> + r = PTR_ERR(syncobj_handles);
> + goto free_bo_handles;
> + }
> +
> + /* Array of GEM object handles */
> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> + if (!gobj) {
> + r = -ENOMEM;
> + goto free_syncobj_handles;
> + }
> +
> + for (entry = 0; entry < num_bo_handles; entry++) {
> + gobj[entry] = drm_gem_object_lookup(filp, bo_handles[entry]);
> + if (!gobj[entry]) {
> + r = -ENOENT;
> + goto put_gobj;
> + }
> + }
> +
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> + drm_exec_until_all_locked(&exec) {
> + r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0);
> + drm_exec_retry_on_contention(&exec);
> + if (r) {
> + drm_exec_fini(&exec);
> + goto put_gobj;
> + }
> + }
> +
> + if (!wait_info->num_fences) {
> + /* Count syncobj's fence */
> + for (i = 0; i < num_syncobj; i++) {
> + struct dma_fence *fence;
> +
> + r = drm_syncobj_find_fence(filp, syncobj_handles[i],
> + 0, 0, &fence);
> + dma_fence_put(fence);
> +
> + if (r || !fence)
> + continue;
> +
> + num_fences++;
> + }
> +
> + /* Count GEM objects fence */
> + for (i = 0; i < num_bo_handles; i++) {
> + struct dma_resv_iter resv_cursor;
> + struct dma_fence *fence;
> +
> + dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv,
> + dma_resv_usage_rw(wait_info->bo_wait_flags &
> + AMDGPU_USERQ_BO_WRITE), fence)
> + num_fences++;
> + }
> +
> + /*
> + * Passing num_fences = 0 means that userspace doesn't want to
> + * retrieve userq_fence_info. If num_fences = 0 we skip filling
> + * userq_fence_info and return the actual number of fences on
> + * args->num_fences.
> + */
> + wait_info->num_fences = num_fences;
> + } else {
> + /* Array of fence info */
> + fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
> + if (!fence_info) {
> + r = -ENOMEM;
> + goto exec_fini;
> + }
> +
> + /* Array of fences */
> + fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
> + if (!fences) {
> + r = -ENOMEM;
> + goto free_fence_info;
> + }
> +
> + /* Retrieve GEM objects fence */
> + for (i = 0; i < num_bo_handles; i++) {
> + struct dma_resv_iter resv_cursor;
> + struct dma_fence *fence;
> +
> + dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv,
> + dma_resv_usage_rw(wait_info->bo_wait_flags &
> + AMDGPU_USERQ_BO_WRITE), fence) {
> + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
> + r = -EINVAL;
> + goto free_fences;
> + }
> +
> + fences[num_fences++] = fence;
> + dma_fence_get(fence);
> + }
> + }
> +
> + /* Retrieve syncobj's fence */
> + for (i = 0; i < num_syncobj; i++) {
> + struct dma_fence *fence;
> +
> + r = drm_syncobj_find_fence(filp, syncobj_handles[i],
> + 0, 0, &fence);
> + if (r || !fence)
> + continue;
> +
> + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
> + r = -EINVAL;
> + goto free_fences;
> + }
> +
> + fences[num_fences++] = fence;
> + }
> +
> + for (i = 0, cnt = 0; i < wait_info->num_fences; i++) {
> + struct amdgpu_userq_fence_driver *fence_drv;
> + struct amdgpu_userq_fence *userq_fence;
> + u32 index;
> +
> + userq_fence = to_amdgpu_userq_fence(fences[i]);
> + if (!userq_fence) {
> + /*
> + * Just waiting on other driver fences should
> + * be good for now
> + */
> + dma_fence_wait(fences[i], false);
Maybe try to wait interruptible here.
Apart from that looks really good to me.
Regards,
Christian.
> + dma_fence_put(fences[i]);
> +
> + continue;
> + }
> +
> + fence_drv = userq_fence->fence_drv;
> + /*
> + * We need to make sure the user queue release their reference
> + * to the fence drivers at some point before queue destruction.
> + * Otherwise, we would gather those references until we don't
> + * have any more space left and crash.
> + */
> + r = xa_alloc(fence_drv->uq_fence_drv_xa_ref, &index, fence_drv,
> + xa_limit_32b, GFP_KERNEL);
> + if (r)
> + goto free_fences;
> +
> + amdgpu_userq_fence_driver_get(fence_drv);
> +
> + /* Store drm syncobj's gpu va address and value */
> + fence_info[cnt].va = fence_drv->gpu_addr;
> + fence_info[cnt].value = fences[i]->seqno;
> +
> + dma_fence_put(fences[i]);
> + /* Increment the actual userq fence count */
> + cnt++;
> + }
> +
> + wait_info->num_fences = cnt;
> + /* Copy userq fence info to user space */
> + if (copy_to_user(u64_to_user_ptr(wait_info->userq_fence_info),
> + fence_info, wait_info->num_fences * sizeof(*fence_info))) {
> + r = -EFAULT;
> + goto free_fences;
> + }
> +
> + kfree(fences);
> + kfree(fence_info);
> + }
> +
> + drm_exec_fini(&exec);
> + for (i = 0; i < num_bo_handles; i++)
> + drm_gem_object_put(gobj[i]);
> +
> + kfree(syncobj_handles);
> + kfree(bo_handles);
> +
> + return 0;
> +
> +free_fences:
> + kfree(fences);
> +free_fence_info:
> + kfree(fence_info);
> +exec_fini:
> + drm_exec_fini(&exec);
> +put_gobj:
> + while (entry-- > 0)
> + drm_gem_object_put(gobj[entry]);
> + kfree(gobj);
> +free_syncobj_handles:
> + kfree(syncobj_handles);
> +free_bo_handles:
> + kfree(bo_handles);
> +
> + return r;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index c3e04cdbb9e7..9c5f72a88440 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -37,6 +37,7 @@ struct amdgpu_userq_fence {
> */
> spinlock_t lock;
> struct list_head link;
> + struct xarray fence_drv_xa;
> struct amdgpu_userq_fence_driver *fence_drv;
> };
>
> @@ -52,6 +53,7 @@ struct amdgpu_userq_fence_driver {
> spinlock_t fence_list_lock;
> struct list_head fences;
> struct amdgpu_device *adev;
> + struct xarray *uq_fence_drv_xa_ref;
> char timeline_name[TASK_COMM_LEN];
> };
>
> @@ -65,5 +67,9 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
> struct amdgpu_usermode_queue *userq);
> void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
> void amdgpu_userq_fence_driver_destroy(struct kref *ref);
> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index 73d225aa3e1e..404c39073661 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -28,6 +28,32 @@
> #include "amdgpu_userqueue.h"
> #include "amdgpu_userq_fence.h"
>
> +static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
> +{
> + struct amdgpu_userq_fence_driver *fence_drv;
> + unsigned long index;
> +
> + if (xa_empty(xa))
> + return;
> +
> + xa_lock(xa);
> + xa_for_each(xa, index, fence_drv) {
> + __xa_erase(xa, index);
> + amdgpu_userq_fence_driver_put(fence_drv);
> + }
> +
> + xa_unlock(xa);
> +}
> +
> +static void
> +amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
> +{
> + amdgpu_userq_walk_and_drop_fence_drv(&userq->uq_fence_drv_xa);
> + xa_destroy(&userq->uq_fence_drv_xa);
> + /* Drop the fence_drv reference held by user queue */
> + amdgpu_userq_fence_driver_put(userq->fence_drv);
> +}
> +
> static void
> amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
> struct amdgpu_usermode_queue *queue,
> @@ -37,7 +63,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
> const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
>
> uq_funcs->mqd_destroy(uq_mgr, queue);
> - amdgpu_userq_fence_driver_put(queue->fence_drv);
> + amdgpu_userq_fence_driver_free(queue);
> idr_remove(&uq_mgr->userq_idr, queue_id);
> kfree(queue);
> }
> @@ -405,6 +431,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
> }
> queue->doorbell_index = index;
>
> + xa_init_flags(&queue->uq_fence_drv_xa, XA_FLAGS_ALLOC);
> r = amdgpu_userq_fence_driver_alloc(adev, queue);
> if (r) {
> DRM_ERROR("Failed to alloc fence driver\n");
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> index d3738f645adc..27e1617b234f 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> @@ -48,6 +48,7 @@ struct amdgpu_usermode_queue {
> struct amdgpu_userq_obj db_obj;
> struct amdgpu_userq_obj fw_obj;
> struct amdgpu_userq_obj wptr_obj;
> + struct xarray uq_fence_drv_xa;
> struct amdgpu_userq_fence_driver *fence_drv;
> };
>
More information about the amd-gfx
mailing list