[PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions

Christian König christian.koenig at amd.com
Mon Feb 27 12:59:47 UTC 2023


Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
> This patch introduces new IOCTL for userqueue secure semaphore.
>
> The signal IOCTL called from userspace application creates a drm
> syncobj and array of bo GEM handles and passed in as parameter to
> the driver to install the fence into it.
>
> The wait IOCTL gets an array of drm syncobjs, finds the fences
> attached to the drm syncobjs and obtain the array of
> memory_address/fence_value combintion which are returned to
> userspace.
>
> v2: Worked on review comments from Christian for the following
>      modifications
>
>      - Install fence into GEM BO object.
>      - Lock all BO's using the dma resv subsystem
>      - Reorder the sequence in signal IOCTL function.
>      - Get write pointer from the shadow wptr
>      - use userq_fence to fetch the va/value in wait IOCTL.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   3 +
>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 258 ++++++++++++++++++
>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |   6 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |   1 +
>   5 files changed, 270 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1c3eba2d0390..255d73795493 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -964,6 +964,8 @@ struct amdgpu_device {
>   	struct amdgpu_mes               mes;
>   	struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
>   
> +	struct amdgpu_userq_mgr         *userq_mgr;
> +
>   	/* df */
>   	struct amdgpu_df                df;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6b7ac1ebd04c..66a7304fabe3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING, amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +
>   };
>   
>   static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 609a7328e9a6..26fd1d4f758a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -249,3 +249,261 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = {
>   	.signaled = amdgpu_userq_fence_signaled,
>   	.release = amdgpu_userq_fence_release,
>   };
> +
> +static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
> +					struct amdgpu_usermode_queue *queue,
> +					u64 *wptr)
> +{
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	struct amdgpu_bo_va_mapping *mapping;
> +	struct amdgpu_vm *vm = &fpriv->vm;
> +	struct amdgpu_bo *bo;
> +	u64 *ptr;
> +	int r;
> +
> +	mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr >> PAGE_SHIFT);
> +	if (!mapping)
> +		return -EINVAL;
> +
> +	bo = mapping->bo_va->base.bo;
> +	r = amdgpu_bo_kmap(bo, (void **)&ptr);

Oh, that's not something you can do that easily.

The BO must be reserved (locked) first if you want to call 
amdgpu_bo_kmap() on it.

> +	if (r) {
> +		DRM_ERROR("Failed mapping the userqueue wptr bo");
> +		return r;
> +	}
> +
> +	*wptr = le64_to_cpu(*ptr);
> +
> +	return 0;
> +}
> +
> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *filp)
> +{
> +	struct drm_amdgpu_userq_signal *args = data;
> +	struct amdgpu_device *adev = drm_to_adev(dev);
> +	struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
> +	struct amdgpu_usermode_queue *queue;
> +	struct drm_syncobj *syncobj = NULL;
> +	struct drm_gem_object **gobj;
> +	u64 num_bo_handles, wptr;
> +	struct dma_fence *fence;
> +	u32 *bo_handles;
> +	bool shared;
> +	int r, i;
> +
> +	/* Retrieve the user queue */
> +	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
> +	if (!queue)
> +		return -ENOENT;
> +
> +	r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
> +	if (r)
> +		return -EINVAL;
> +
> +	/* Find Syncobj if any */
> +	syncobj = drm_syncobj_find(filp, args->handle);
> +
> +	/* Array of bo handles */
> +	num_bo_handles = args->num_bo_handles;
> +	bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), GFP_KERNEL);
> +	if (bo_handles == NULL)
> +		return -ENOMEM;
> +
> +	if (copy_from_user(bo_handles, u64_to_user_ptr(args->bo_handles_array),
> +			   sizeof(u32) * num_bo_handles)) {
> +		r = -EFAULT;
> +		goto err_free_handles;
> +	}
> +
> +	/* Array of GEM object handles */
> +	gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> +	if (gobj == NULL) {
> +		r = -ENOMEM;
> +		goto err_free_handles;
> +	}
> +
> +	for (i = 0; i < num_bo_handles; i++) {
> +		/* Retrieve GEM object */
> +		gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
> +		if (!gobj[i]) {
> +			r = -ENOENT;
> +			goto err_put_gobj;
> +		}
> +
> +		dma_resv_lock(gobj[i]->resv, NULL);
> +		r = dma_resv_reserve_fences(gobj[i]->resv, 1);
> +		if (r) {
> +			dma_resv_unlock(gobj[i]->resv);
> +			goto err_put_gobj;
> +		}
> +		dma_resv_unlock(gobj[i]->resv);
> +	}
> +
> +	/* Create a new fence */
> +	r = amdgpu_userq_fence_create(queue, wptr, &fence);
> +	if (!fence)
> +		goto err_put_gobj;
> +
> +	/* Add the created fence to syncobj/BO's */
> +	if (syncobj)
> +		drm_syncobj_replace_fence(syncobj, fence);
> +
> +	shared = args->bo_flags & AMDGPU_USERQ_BO_READ;
> +	for (i = 0; i < num_bo_handles; i++) {
> +		dma_resv_lock(gobj[i]->resv, NULL);
> +		dma_resv_add_fence(gobj[i]->resv, fence, shared ?
> +				   DMA_RESV_USAGE_READ :
> +				   DMA_RESV_USAGE_WRITE);
> +		dma_resv_unlock(gobj[i]->resv);
> +	}

That will still not work correctly. You've forgotten to call 
dma_resv_reserve_fences().

The tricky part is that you need to do this for all BOs at the same time.

I will work on my drm_exec() patch set once more. That one should make 
this job much easier.

Similar applies to the _wait function, but let's get _signal working first.

Regards,
Christian.

> +
> +	for (i = 0; i < num_bo_handles; i++)
> +		drm_gem_object_put(gobj[i]);
> +
> +	dma_fence_put(fence);
> +
> +	/* Free all handles */
> +	kfree(bo_handles);
> +	kfree(gobj);
> +
> +	return 0;
> +
> +err_put_gobj:
> +	while (i-- > 0)
> +		drm_gem_object_put(gobj[i]);
> +	kfree(gobj);
> +err_free_handles:
> +	kfree(bo_handles);
> +
> +	return r;
> +}
> +
> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> +			    struct drm_file *filp)
> +{
> +	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> +	struct drm_amdgpu_userq_wait *args = data;
> +	struct amdgpu_userq_fence *userq_fence;
> +	u32 *syncobj_handles, *bo_handles;
> +	u64 num_syncobj, num_bo_handles;
> +	struct drm_gem_object **gobj;
> +	struct dma_fence *fence;
> +	bool wait_flag;
> +	int r, i, tmp;
> +
> +	/* Array of Syncobj handles */
> +	num_syncobj = args->count_handles;
> +	syncobj_handles = kmalloc_array(num_syncobj, sizeof(*syncobj_handles), GFP_KERNEL);
> +	if (!syncobj_handles)
> +		return -ENOMEM;
> +
> +	if (copy_from_user(syncobj_handles, u64_to_user_ptr(args->handles),
> +			   sizeof(u32) * num_syncobj)) {
> +		r = -EFAULT;
> +		goto err_free_syncobj_handles;
> +	}
> +
> +	/* Array of bo handles */
> +	num_bo_handles = args->num_bo_handles;
> +	bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles), GFP_KERNEL);
> +	if (!bo_handles)
> +		goto err_free_syncobj_handles;
> +
> +	if (copy_from_user(bo_handles, u64_to_user_ptr(args->bo_handles_array),
> +			   sizeof(u32) * num_bo_handles)) {
> +		r = -EFAULT;
> +		goto err_free_bo_handles;
> +	}
> +
> +	/* Array of fence gpu address */
> +	fence_info = kmalloc_array(num_syncobj + num_bo_handles, sizeof(*fence_info), GFP_KERNEL);
> +	if (!fence_info) {
> +		r = -ENOMEM;
> +		goto err_free_bo_handles;
> +	}
> +
> +	/* Retrieve syncobj's fence */
> +	for (i = 0; i < num_syncobj; i++) {
> +		r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
> +		if (r) {
> +			DRM_ERROR("syncobj %u failed to find the fence!\n", syncobj_handles[i]);
> +			r = -ENOENT;
> +			goto err_free_fence_info;
> +		}
> +
> +		/* Store drm syncobj's gpu va address and value */
> +		userq_fence = to_amdgpu_userq_fence(fence);
> +		fence_info[i].va = userq_fence->fence_drv->gpu_addr;
> +		fence_info[i].value = fence->seqno;
> +		dma_fence_put(fence);
> +	}
> +
> +	tmp = i;
> +
> +	/* Array of GEM object handles */
> +	gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> +	if (gobj == NULL) {
> +		r = -ENOMEM;
> +		goto err_free_fence_info;
> +	}
> +
> +	/* Retrieve GEM objects's fence */
> +	wait_flag = args->bo_wait_flags & AMDGPU_USERQ_BO_READ;
> +	for (i = 0; i < num_bo_handles; i++, tmp++) {
> +		struct dma_fence *bo_fence;
> +
> +		gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
> +		if (!gobj[i]) {
> +			r = -ENOENT;
> +			goto err_put_gobj;
> +		}
> +
> +		dma_resv_lock(gobj[i]->resv, NULL);
> +		r = dma_resv_get_singleton(gobj[i]->resv,
> +					   wait_flag ?
> +					   DMA_RESV_USAGE_READ :
> +					   DMA_RESV_USAGE_WRITE,
> +					   &bo_fence);
> +		if (r) {
> +			dma_resv_unlock(gobj[i]->resv);
> +			goto err_put_gobj;
> +		}
> +		dma_resv_unlock(gobj[i]->resv);
> +		drm_gem_object_put(gobj[i]);
> +
> +		/* Store GEM objects's gpu va address and value */
> +		userq_fence = to_amdgpu_userq_fence(bo_fence);
> +		fence_info[tmp].va = userq_fence->fence_drv->gpu_addr;
> +		fence_info[tmp].value = bo_fence->seqno;
> +		dma_fence_put(bo_fence);
> +	}
> +
> +	if (copy_to_user(u64_to_user_ptr(args->userq_fence_info),
> +	    fence_info, sizeof(fence_info))) {
> +		r = -EFAULT;
> +		goto err_free_gobj;
> +	}
> +
> +	/* Free all handles */
> +	kfree(syncobj_handles);
> +	kfree(bo_handles);
> +	kfree(fence_info);
> +	kfree(gobj);
> +
> +	return 0;
> +
> +err_put_gobj:
> +	while (i-- > 0)
> +		drm_gem_object_put(gobj[i]);
> +err_free_gobj:
> +	kfree(gobj);
> +err_free_fence_info:
> +	kfree(fence_info);
> +err_free_bo_handles:
> +	kfree(bo_handles);
> +err_free_syncobj_handles:
> +	kfree(syncobj_handles);
> +
> +	return r;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> index 230dd54b4cd3..999d1e2baff5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
> @@ -27,6 +27,8 @@
>   
>   #include <linux/types.h>
>   
> +#define AMDGPU_USERQ_BO_READ	0x1
> +
>   struct amdgpu_userq_fence {
>   	struct dma_fence base;
>   	/* userq fence lock */
> @@ -57,5 +59,9 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
>   			      u64 seq, struct dma_fence **f);
>   bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver *fence_drv);
>   void amdgpu_userq_fence_driver_destroy(struct kref *ref);
> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *filp);
> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> +			    struct drm_file *filp);
>   
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index d4317b0f6487..4d3d6777a178 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -457,6 +457,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
>       idr_init_base(&userq_mgr->userq_idr, 1);
>       INIT_LIST_HEAD(&userq_mgr->userq_list);
>       userq_mgr->adev = adev;
> +    adev->userq_mgr = userq_mgr;
>   
>       r = amdgpu_userqueue_setup_mqd_funcs(userq_mgr);
>       if (r) {



More information about the amd-gfx mailing list