[PATCH 7/7] drm/syncobj: Add a fast path to drm_syncobj_array_find

Maíra Canal mcanal at igalia.com
Mon Mar 24 23:06:47 UTC 2025


Hi Tvrtko,

Some nits inline, mostly personal comments. In any case,

Reviewed-by: Maíra Canal <mcanal at igalia.com>


On 18/03/25 12:54, Tvrtko Ursulin wrote:
> Running the Cyberpunk 2077 benchmark we can observe that the lookup helper
> is relatively hot, but the 97% of the calls are for a single object. (~3%
> for two points, and never more than three points. While a more trivial
> workload like vkmark under Plasma is even more skewed to single point
> lookups.)
> 
> Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a
> pre-allocated stack array for those cases.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> ---
>   drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
>   1 file changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 94932b89298f..233bdef53c87 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>   static int drm_syncobj_array_find(struct drm_file *file_private,
>   				  u32 __user *handles,
>   				  uint32_t count,
> +				  struct drm_syncobj **stack_syncobjs,
> +				  u32 stack_count,
>   				  struct drm_syncobj ***syncobjs_out)
>   {
>   	struct drm_syncobj **syncobjs;
> @@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct drm_file *file_private,
>   	if (!access_ok(handles, count * sizeof(*handles)))
>   		return -EFAULT;
>   
> -	syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
> -	if (!syncobjs)
> -		return -ENOMEM;
> +	if (count > stack_count) {

I believe it's worth adding a comment mentioning that using the stack
syncobj is a fast-path that covers most cases.

> +		syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
> +		if (!syncobjs)
> +			return -ENOMEM;
> +	} else {
> +		syncobjs = stack_syncobjs;
> +	}
>   
>   	for (i = 0; i < count; i++) {
>   		u64 handle;
> @@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct drm_file *file_private,
>   			drm_syncobj_put(syncobjs[i]);
>   		i--;
>   	}
> -	kfree(syncobjs);
> +
> +	if (syncobjs != stack_syncobjs)

Again, I have a slight preference to make `syncobjs = NULL` and avoid
this if condition. But it's just a personal preference.

> +		kfree(syncobjs);
>   
>   	return ret;
>   }
>   
>   static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
> -				   uint32_t count)
> +				   uint32_t count,
> +				   struct drm_syncobj **stack_syncobjs)

IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
intuitive.

>   {
>   	uint32_t i;
>   
>   	for (i = 0; i < count; i++)
>   		drm_syncobj_put(syncobjs[i]);
> -	kfree(syncobjs);
> +
> +	if (syncobjs != stack_syncobjs)
> +		kfree(syncobjs);
>   }
>   
>   int
>   drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   		       struct drm_file *file_private)
>   {
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj_wait *args = data;
>   	ktime_t deadline, *pdeadline = NULL;
>   	u32 count = args->count_handles;
> @@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     count,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   						 &first,
>   						 pdeadline);
>   
> -	drm_syncobj_array_free(syncobjs, count);
> +	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>   
>   	if (timeout < 0)
>   		return timeout;
> @@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   				struct drm_file *file_private)
>   {
>   	struct drm_syncobj_timeline_wait *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];

Zero initialize it?

Best Regards,
- Maíra

>   	ktime_t deadline, *pdeadline = NULL;
>   	u32 count = args->count_handles;
>   	struct drm_syncobj **syncobjs;
> @@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     count,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   						 &first,
>   						 pdeadline);
>   
> -	drm_syncobj_array_free(syncobjs, count);
> +	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>   
>   	if (timeout < 0)
>   		return timeout;
> @@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			struct drm_file *file_private)
>   {
>   	struct drm_syncobj_array *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	uint32_t i;
>   	int ret;
> @@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     args->count_handles,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	for (i = 0; i < args->count_handles; i++)
>   		drm_syncobj_replace_fence(syncobjs[i], NULL);
>   
> -	drm_syncobj_array_free(syncobjs, args->count_handles);
> +	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
>   
>   	return 0;
>   }
> @@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			 struct drm_file *file_private)
>   {
>   	struct drm_syncobj_array *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	uint32_t i;
>   	int ret;
> @@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     args->count_handles,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			break;
>   	}
>   
> -	drm_syncobj_array_free(syncobjs, args->count_handles);
> +	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
>   
>   	return ret;
>   }
> @@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   	struct drm_syncobj_timeline_array *args = data;
>   	uint64_t __user *points = u64_to_user_ptr(args->points);
>   	uint32_t i, j, count = args->count_handles;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	struct dma_fence_chain **chains;
>   	int ret;
> @@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     count,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   err_chains:
>   	kfree(chains);
>   out:
> -	drm_syncobj_array_free(syncobjs, count);
> +	drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>   
>   	return ret;
>   }
> @@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private)
>   {
>   	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj *stack_syncobjs[4];
>   	struct drm_syncobj **syncobjs;
>   	uint64_t __user *points = u64_to_user_ptr(args->points);
>   	uint32_t i;
> @@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   	ret = drm_syncobj_array_find(file_private,
>   				     u64_to_user_ptr(args->handles),
>   				     args->count_handles,
> +				     stack_syncobjs,
> +				     ARRAY_SIZE(stack_syncobjs),
>   				     &syncobjs);
>   	if (ret < 0)
>   		return ret;
> @@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			break;
>   		}
>   	}
> -	drm_syncobj_array_free(syncobjs, args->count_handles);
> +	drm_syncobj_array_free(syncobjs, args->count_handles, stack_syncobjs);
>   
>   	return ret;
>   }



More information about the dri-devel mailing list