[PATCH v3 03/09] drm/amdgpu: Add wait IOCTL timeline syncobj support

Mon Sep 30 12:21:50 UTC 2024

Am 30.09.24 um 13:59 schrieb Arunpravin Paneer Selvam:
> Add user fence wait IOCTL timeline syncobj support.
>
> v2:(Christian)
>    - handle dma_fence_wait() return value.
>    - shorten the variable name syncobj_timeline_points a bit.
>    - move num_points up to avoid padding issues.
>
> v3:(Christian)
>    - Handle timeline drm_syncobj_find_fence() call error
>      handling
>    - Use dma_fence_unwrap_for_each() in timeline fence as
>      there could be more than one fence.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 99 +++++++++++++++++--
>   include/uapi/drm/amdgpu_drm.h                 | 16 ++-
>   2 files changed, 107 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 8f9d2427d380..76552eed6d86 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -24,6 +24,7 @@
>   
>   #include <linux/kref.h>
>   #include <linux/slab.h>
> +#include <linux/dma-fence-unwrap.h>
>   
>   #include <drm/drm_exec.h>
>   #include <drm/drm_syncobj.h>
> @@ -474,11 +475,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>   int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *filp)
>   {
> +	u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles;
> +	u32 num_syncobj, num_bo_handles, num_points;
>   	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
>   	struct drm_amdgpu_userq_wait *wait_info = data;
> -	u32 *syncobj_handles, *bo_handles;
>   	struct dma_fence **fences = NULL;
> -	u32 num_syncobj, num_bo_handles;
>   	struct drm_gem_object **gobj;
>   	struct drm_exec exec;
>   	int r, i, entry, cnt;
> @@ -498,11 +499,26 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   		goto free_bo_handles;
>   	}
>   
> +	num_points = wait_info->num_points;
> +	timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
> +				       sizeof(u32) * num_points);
> +	if (IS_ERR(timeline_handles)) {
> +		r = PTR_ERR(timeline_handles);
> +		goto free_syncobj_handles;
> +	}
> +
> +	timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
> +				      sizeof(u32) * num_points);
> +	if (IS_ERR(timeline_points)) {
> +		r = PTR_ERR(timeline_points);
> +		goto free_timeline_handles;
> +	}
> +
>   	/* Array of GEM object handles */
>   	gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
>   	if (!gobj) {
>   		r = -ENOMEM;
> -		goto free_syncobj_handles;
> +		goto free_timeline_points;
>   	}
>   
>   	for (entry = 0; entry < num_bo_handles; entry++) {
> @@ -524,15 +540,40 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	if (!wait_info->num_fences) {
> +		if (num_points) {
> +			struct dma_fence_unwrap iter;
> +			struct dma_fence *fence;
> +			struct dma_fence *f;
> +
> +			for (i = 0; i < num_points; i++) {
> +				r = drm_syncobj_find_fence(filp, timeline_handles[i],
> +							   timeline_points[i],
> +							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +							   &fence);
> +				if (r)
> +					goto exec_fini;
> +
> +				num_fences++;
> +
> +				dma_fence_unwrap_for_each(f, &iter, fence)
> +					num_fences++;

This duplicates the num_fences increment, just drop the first one since 
fence is always included in the iteration as well.

> +
> +				dma_fence_put(fence);
> +			}
> +		}
> +
>   		/* Count syncobj's fence */
>   		for (i = 0; i < num_syncobj; i++) {
>   			struct dma_fence *fence;
>   
>   			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
> -						   0, 0, &fence);
> +						   0,
> +						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +						   &fence);
>   			if (r)
>   				goto exec_fini;
>   
> +			dma_fence_put(fence);
>   			num_fences++;
>   			dma_fence_put(fence);
>   		}
> @@ -588,12 +629,46 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   			}
>   		}
>   
> +		if (num_points) {
> +			struct dma_fence_unwrap iter;
> +			struct dma_fence *fence;
> +			struct dma_fence *f;
> +
> +			for (i = 0; i < num_points; i++) {
> +				r = drm_syncobj_find_fence(filp, timeline_handles[i],
> +							   timeline_points[i],
> +							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +							   &fence);
> +				if (r)
> +					goto free_fences;
> +
> +				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
> +					r = -EINVAL;
> +					goto free_fences;
> +				}
> +
> +				fences[num_fences++] = fence;

Same here.

Apart from that looks good to me. We could cleanup the code a bit more, 
but that can come later on.

Regards,
Christian.

> +
> +				dma_fence_unwrap_for_each(f, &iter, fence) {
> +					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
> +						r = -EINVAL;
> +						goto free_fences;
> +					}
> +
> +					dma_fence_get(f);
> +					fences[num_fences++] = f;
> +				}
> +			}
> +		}
> +
>   		/* Retrieve syncobj's fence */
>   		for (i = 0; i < num_syncobj; i++) {
>   			struct dma_fence *fence;
>   
>   			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
> -						   0, 0, &fence);
> +						   0,
> +						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> +						   &fence);
>   			if (r)
>   				goto free_fences;
>   
> @@ -616,9 +691,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   				 * Just waiting on other driver fences should
>   				 * be good for now
>   				 */
> -				dma_fence_wait(fences[i], false);
> -				dma_fence_put(fences[i]);
> +				r = dma_fence_wait(fences[i], true);
> +				if (r) {
> +					dma_fence_put(fences[i]);
> +					goto free_fences;
> +				}
>   
> +				dma_fence_put(fences[i]);
>   				continue;
>   			}
>   
> @@ -664,6 +743,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   		drm_gem_object_put(gobj[i]);
>   	kfree(gobj);
>   
> +	kfree(timeline_points);
> +	kfree(timeline_handles);
>   	kfree(syncobj_handles);
>   	kfree(bo_handles);
>   
> @@ -681,6 +762,10 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>   	while (entry-- > 0)
>   		drm_gem_object_put(gobj[entry]);
>   	kfree(gobj);
> +free_timeline_points:
> +	kfree(timeline_points);
> +free_timeline_handles:
> +	kfree(timeline_handles);
>   free_syncobj_handles:
>   	kfree(syncobj_handles);
>   free_bo_handles:
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index af42798e901d..3b24e0cb1b51 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -521,12 +521,26 @@ struct drm_amdgpu_userq_wait {
>   	 * matching fence wait info pair in @userq_fence_info.
>   	 */
>   	__u32	bo_wait_flags;
> -	__u32	pad;
> +	/**
> +	 * @num_points: A count that represents the number of timeline syncobj handles in
> +	 * syncobj_handles_array.
> +	 */
> +	__u32	num_points;
>   	/**
>   	 * @syncobj_handles_array: An array of syncobj handles defined to get the
>   	 * fence wait information of every syncobj handles in the array.
>   	 */
>   	__u64	syncobj_handles_array;
> +	/**
> +	 * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the
> +	 * fence wait information of every timeline syncobj handles in the array.
> +	 */
> +	__u64   syncobj_timeline_handles;
> +	/**
> +	 * @syncobj_timeline_points: An array of timeline syncobj points defined to get the
> +	 * fence wait points of every timeline syncobj handles in the syncobj_handles_array.
> +	 */
> +	__u64	syncobj_timeline_points;
>   	/**
>   	 * @bo_handles_array: An array of GEM BO handles defined to fetch the fence
>   	 * wait information of every BO handles in the array.