[PATCH 7/7] drm/syncobj: Add a fast path to drm_syncobj_array_find
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Tue Mar 25 09:54:43 UTC 2025
On 24/03/2025 23:06, Maíra Canal wrote:
> Hi Tvrtko,
>
> Some nits inline, mostly personal comments. In any case,
>
> Reviewed-by: Maíra Canal <mcanal at igalia.com>
>
>
> On 18/03/25 12:54, Tvrtko Ursulin wrote:
>> Running the Cyberpunk 2077 benchmark we can observe that the lookup
>> helper
>> is relatively hot, but the 97% of the calls are for a single object. (~3%
>> for two points, and never more than three points. While a more trivial
>> workload like vkmark under Plasma is even more skewed to single point
>> lookups.)
>>
>> Therefore lets add a fast path to bypass the kmalloc_array/kfree and
>> use a
>> pre-allocated stack array for those cases.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> ---
>> drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
>> 1 file changed, 41 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/
>> drm_syncobj.c
>> index 94932b89298f..233bdef53c87 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>> static int drm_syncobj_array_find(struct drm_file *file_private,
>> u32 __user *handles,
>> uint32_t count,
>> + struct drm_syncobj **stack_syncobjs,
>> + u32 stack_count,
>> struct drm_syncobj ***syncobjs_out)
>> {
>> struct drm_syncobj **syncobjs;
>> @@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct
>> drm_file *file_private,
>> if (!access_ok(handles, count * sizeof(*handles)))
>> return -EFAULT;
>> - syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>> - if (!syncobjs)
>> - return -ENOMEM;
>> + if (count > stack_count) {
>
> I believe it's worth adding a comment mentioning that using the stack
> syncobj is a fast-path that covers most cases.
Yep. But it didn't feel like here is the place so I added comments to
where callers size the arrays. That however means there are two
duplicated comments. Okay with you?
>> + syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>> + if (!syncobjs)
>> + return -ENOMEM;
>> + } else {
>> + syncobjs = stack_syncobjs;
>> + }
>> for (i = 0; i < count; i++) {
>> u64 handle;
>> @@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct
>> drm_file *file_private,
>> drm_syncobj_put(syncobjs[i]);
>> i--;
>> }
>> - kfree(syncobjs);
>> +
>> + if (syncobjs != stack_syncobjs)
>
> Again, I have a slight preference to make `syncobjs = NULL` and avoid
> this if condition. But it's just a personal preference.
Pending clarifications from the other patch.
>
>> + kfree(syncobjs);
>> return ret;
>> }
>> static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
>> - uint32_t count)
>> + uint32_t count,
>> + struct drm_syncobj **stack_syncobjs)
>
> IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
> intuitive.
But count is not directly related to the size of the stack array in this
function. I could make it a boolean perhaps like this:
static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
uint32_t count,
bool free_array)
And then in the callers:
drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs);
Would that be clearer?
>
>> {
>> uint32_t i;
>> for (i = 0; i < count; i++)
>> drm_syncobj_put(syncobjs[i]);
>> - kfree(syncobjs);
>> +
>> + if (syncobjs != stack_syncobjs)
>> + kfree(syncobjs);
>> }
>> int
>> drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *file_private)
>> {
>> + struct drm_syncobj *stack_syncobjs[4];
>> struct drm_syncobj_wait *args = data;
>> ktime_t deadline, *pdeadline = NULL;
>> u32 count = args->count_handles;
>> @@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev,
>> void *data,
>> ret = drm_syncobj_array_find(file_private,
>> u64_to_user_ptr(args->handles),
>> count,
>> + stack_syncobjs,
>> + ARRAY_SIZE(stack_syncobjs),
>> &syncobjs);
>> if (ret < 0)
>> return ret;
>> @@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev,
>> void *data,
>> &first,
>> pdeadline);
>> - drm_syncobj_array_free(syncobjs, count);
>> + drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>> if (timeout < 0)
>> return timeout;
>> @@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct
>> drm_device *dev, void *data,
>> struct drm_file *file_private)
>> {
>> struct drm_syncobj_timeline_wait *args = data;
>> + struct drm_syncobj *stack_syncobjs[4];
>
> Zero initialize it?
Do you see it is required?
Regards,
Tvrtko
>> ktime_t deadline, *pdeadline = NULL;
>> u32 count = args->count_handles;
>> struct drm_syncobj **syncobjs;
>> @@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct
>> drm_device *dev, void *data,
>> ret = drm_syncobj_array_find(file_private,
>> u64_to_user_ptr(args->handles),
>> count,
>> + stack_syncobjs,
>> + ARRAY_SIZE(stack_syncobjs),
>> &syncobjs);
>> if (ret < 0)
>> return ret;
>> @@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct
>> drm_device *dev, void *data,
>> &first,
>> pdeadline);
>> - drm_syncobj_array_free(syncobjs, count);
>> + drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>> if (timeout < 0)
>> return timeout;
>> @@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev,
>> void *data,
>> struct drm_file *file_private)
>> {
>> struct drm_syncobj_array *args = data;
>> + struct drm_syncobj *stack_syncobjs[4];
>> struct drm_syncobj **syncobjs;
>> uint32_t i;
>> int ret;
>> @@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev,
>> void *data,
>> ret = drm_syncobj_array_find(file_private,
>> u64_to_user_ptr(args->handles),
>> args->count_handles,
>> + stack_syncobjs,
>> + ARRAY_SIZE(stack_syncobjs),
>> &syncobjs);
>> if (ret < 0)
>> return ret;
>> @@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev,
>> void *data,
>> for (i = 0; i < args->count_handles; i++)
>> drm_syncobj_replace_fence(syncobjs[i], NULL);
>> - drm_syncobj_array_free(syncobjs, args->count_handles);
>> + drm_syncobj_array_free(syncobjs, args->count_handles,
>> stack_syncobjs);
>> return 0;
>> }
>> @@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev,
>> void *data,
>> struct drm_file *file_private)
>> {
>> struct drm_syncobj_array *args = data;
>> + struct drm_syncobj *stack_syncobjs[4];
>> struct drm_syncobj **syncobjs;
>> uint32_t i;
>> int ret;
>> @@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev,
>> void *data,
>> ret = drm_syncobj_array_find(file_private,
>> u64_to_user_ptr(args->handles),
>> args->count_handles,
>> + stack_syncobjs,
>> + ARRAY_SIZE(stack_syncobjs),
>> &syncobjs);
>> if (ret < 0)
>> return ret;
>> @@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device *dev,
>> void *data,
>> break;
>> }
>> - drm_syncobj_array_free(syncobjs, args->count_handles);
>> + drm_syncobj_array_free(syncobjs, args->count_handles,
>> stack_syncobjs);
>> return ret;
>> }
>> @@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct
>> drm_device *dev, void *data,
>> struct drm_syncobj_timeline_array *args = data;
>> uint64_t __user *points = u64_to_user_ptr(args->points);
>> uint32_t i, j, count = args->count_handles;
>> + struct drm_syncobj *stack_syncobjs[4];
>> struct drm_syncobj **syncobjs;
>> struct dma_fence_chain **chains;
>> int ret;
>> @@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct
>> drm_device *dev, void *data,
>> ret = drm_syncobj_array_find(file_private,
>> u64_to_user_ptr(args->handles),
>> count,
>> + stack_syncobjs,
>> + ARRAY_SIZE(stack_syncobjs),
>> &syncobjs);
>> if (ret < 0)
>> return ret;
>> @@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct
>> drm_device *dev, void *data,
>> err_chains:
>> kfree(chains);
>> out:
>> - drm_syncobj_array_free(syncobjs, count);
>> + drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>> return ret;
>> }
>> @@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device
>> *dev, void *data,
>> struct drm_file *file_private)
>> {
>> struct drm_syncobj_timeline_array *args = data;
>> + struct drm_syncobj *stack_syncobjs[4];
>> struct drm_syncobj **syncobjs;
>> uint64_t __user *points = u64_to_user_ptr(args->points);
>> uint32_t i;
>> @@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device
>> *dev, void *data,
>> ret = drm_syncobj_array_find(file_private,
>> u64_to_user_ptr(args->handles),
>> args->count_handles,
>> + stack_syncobjs,
>> + ARRAY_SIZE(stack_syncobjs),
>> &syncobjs);
>> if (ret < 0)
>> return ret;
>> @@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device
>> *dev, void *data,
>> break;
>> }
>> }
>> - drm_syncobj_array_free(syncobjs, args->count_handles);
>> + drm_syncobj_array_free(syncobjs, args->count_handles,
>> stack_syncobjs);
>> return ret;
>> }
>
More information about the dri-devel
mailing list