[PATCH v3 03/09] drm/amdgpu: Add wait IOCTL timeline syncobj support
Christian König
christian.koenig at amd.com
Mon Sep 30 12:21:50 UTC 2024
Am 30.09.24 um 13:59 schrieb Arunpravin Paneer Selvam:
> Add user fence wait IOCTL timeline syncobj support.
>
> v2:(Christian)
> - handle dma_fence_wait() return value.
> - shorten the variable name syncobj_timeline_points a bit.
> - move num_points up to avoid padding issues.
>
> v3:(Christian)
> - Handle timeline drm_syncobj_find_fence() call error
> handling
> - Use dma_fence_unwrap_for_each() in timeline fence as
> there could be more than one fence.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 99 +++++++++++++++++--
> include/uapi/drm/amdgpu_drm.h | 16 ++-
> 2 files changed, 107 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> index 8f9d2427d380..76552eed6d86 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
> @@ -24,6 +24,7 @@
>
> #include <linux/kref.h>
> #include <linux/slab.h>
> +#include <linux/dma-fence-unwrap.h>
>
> #include <drm/drm_exec.h>
> #include <drm/drm_syncobj.h>
> @@ -474,11 +475,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
> int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp)
> {
> + u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles;
> + u32 num_syncobj, num_bo_handles, num_points;
> struct drm_amdgpu_userq_fence_info *fence_info = NULL;
> struct drm_amdgpu_userq_wait *wait_info = data;
> - u32 *syncobj_handles, *bo_handles;
> struct dma_fence **fences = NULL;
> - u32 num_syncobj, num_bo_handles;
> struct drm_gem_object **gobj;
> struct drm_exec exec;
> int r, i, entry, cnt;
> @@ -498,11 +499,26 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> goto free_bo_handles;
> }
>
> + num_points = wait_info->num_points;
> + timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
> + sizeof(u32) * num_points);
> + if (IS_ERR(timeline_handles)) {
> + r = PTR_ERR(timeline_handles);
> + goto free_syncobj_handles;
> + }
> +
> + timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
> + sizeof(u32) * num_points);
> + if (IS_ERR(timeline_points)) {
> + r = PTR_ERR(timeline_points);
> + goto free_timeline_handles;
> + }
> +
> /* Array of GEM object handles */
> gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
> if (!gobj) {
> r = -ENOMEM;
> - goto free_syncobj_handles;
> + goto free_timeline_points;
> }
>
> for (entry = 0; entry < num_bo_handles; entry++) {
> @@ -524,15 +540,40 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> }
>
> if (!wait_info->num_fences) {
> + if (num_points) {
> + struct dma_fence_unwrap iter;
> + struct dma_fence *fence;
> + struct dma_fence *f;
> +
> + for (i = 0; i < num_points; i++) {
> + r = drm_syncobj_find_fence(filp, timeline_handles[i],
> + timeline_points[i],
> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> + &fence);
> + if (r)
> + goto exec_fini;
> +
> + num_fences++;
> +
> + dma_fence_unwrap_for_each(f, &iter, fence)
> + num_fences++;
This duplicates the num_fences increment, just drop the first one since
fence is always included in the iteration as well.
> +
> + dma_fence_put(fence);
> + }
> + }
> +
> /* Count syncobj's fence */
> for (i = 0; i < num_syncobj; i++) {
> struct dma_fence *fence;
>
> r = drm_syncobj_find_fence(filp, syncobj_handles[i],
> - 0, 0, &fence);
> + 0,
> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> + &fence);
> if (r)
> goto exec_fini;
>
> + dma_fence_put(fence);
> num_fences++;
> dma_fence_put(fence);
> }
> @@ -588,12 +629,46 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> }
> }
>
> + if (num_points) {
> + struct dma_fence_unwrap iter;
> + struct dma_fence *fence;
> + struct dma_fence *f;
> +
> + for (i = 0; i < num_points; i++) {
> + r = drm_syncobj_find_fence(filp, timeline_handles[i],
> + timeline_points[i],
> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> + &fence);
> + if (r)
> + goto free_fences;
> +
> + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
> + r = -EINVAL;
> + goto free_fences;
> + }
> +
> + fences[num_fences++] = fence;
Same here.
Apart from that looks good to me. We could cleanup the code a bit more,
but that can come later on.
Regards,
Christian.
> +
> + dma_fence_unwrap_for_each(f, &iter, fence) {
> + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
> + r = -EINVAL;
> + goto free_fences;
> + }
> +
> + dma_fence_get(f);
> + fences[num_fences++] = f;
> + }
> + }
> + }
> +
> /* Retrieve syncobj's fence */
> for (i = 0; i < num_syncobj; i++) {
> struct dma_fence *fence;
>
> r = drm_syncobj_find_fence(filp, syncobj_handles[i],
> - 0, 0, &fence);
> + 0,
> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
> + &fence);
> if (r)
> goto free_fences;
>
> @@ -616,9 +691,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> * Just waiting on other driver fences should
> * be good for now
> */
> - dma_fence_wait(fences[i], false);
> - dma_fence_put(fences[i]);
> + r = dma_fence_wait(fences[i], true);
> + if (r) {
> + dma_fence_put(fences[i]);
> + goto free_fences;
> + }
>
> + dma_fence_put(fences[i]);
> continue;
> }
>
> @@ -664,6 +743,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> drm_gem_object_put(gobj[i]);
> kfree(gobj);
>
> + kfree(timeline_points);
> + kfree(timeline_handles);
> kfree(syncobj_handles);
> kfree(bo_handles);
>
> @@ -681,6 +762,10 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
> while (entry-- > 0)
> drm_gem_object_put(gobj[entry]);
> kfree(gobj);
> +free_timeline_points:
> + kfree(timeline_points);
> +free_timeline_handles:
> + kfree(timeline_handles);
> free_syncobj_handles:
> kfree(syncobj_handles);
> free_bo_handles:
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index af42798e901d..3b24e0cb1b51 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -521,12 +521,26 @@ struct drm_amdgpu_userq_wait {
> * matching fence wait info pair in @userq_fence_info.
> */
> __u32 bo_wait_flags;
> - __u32 pad;
> + /**
> + * @num_points: A count that represents the number of timeline syncobj handles in
> + * syncobj_handles_array.
> + */
> + __u32 num_points;
> /**
> * @syncobj_handles_array: An array of syncobj handles defined to get the
> * fence wait information of every syncobj handles in the array.
> */
> __u64 syncobj_handles_array;
> + /**
> + * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the
> + * fence wait information of every timeline syncobj handles in the array.
> + */
> + __u64 syncobj_timeline_handles;
> + /**
> + * @syncobj_timeline_points: An array of timeline syncobj points defined to get the
> + * fence wait points of every timeline syncobj handles in the syncobj_handles_array.
> + */
> + __u64 syncobj_timeline_points;
> /**
> * @bo_handles_array: An array of GEM BO handles defined to fetch the fence
> * wait information of every BO handles in the array.
More information about the amd-gfx
mailing list