[PATCH 7/7] drm/syncobj: Add a fast path to drm_syncobj_array_find
Maíra Canal
mcanal at igalia.com
Tue Mar 25 20:12:50 UTC 2025
Hi Tvrtko,
On 25/03/25 06:54, Tvrtko Ursulin wrote:
>
> On 24/03/2025 23:06, Maíra Canal wrote:
>> Hi Tvrtko,
>>
>> Some nits inline, mostly personal comments. In any case,
>>
>> Reviewed-by: Maíra Canal <mcanal at igalia.com>
>>
>>
>> On 18/03/25 12:54, Tvrtko Ursulin wrote:
>>> Running the Cyberpunk 2077 benchmark we can observe that the lookup
>>> helper
>>> is relatively hot, but the 97% of the calls are for a single object.
>>> (~3%
>>> for two points, and never more than three points. While a more trivial
>>> workload like vkmark under Plasma is even more skewed to single point
>>> lookups.)
>>>
>>> Therefore lets add a fast path to bypass the kmalloc_array/kfree and
>>> use a
>>> pre-allocated stack array for those cases.
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>>> ---
>>> drivers/gpu/drm/drm_syncobj.c | 53 +++++++++++++++++++++++++++--------
>>> 1 file changed, 41 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/
>>> drm_syncobj.c
>>> index 94932b89298f..233bdef53c87 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1223,6 +1223,8 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
>>> static int drm_syncobj_array_find(struct drm_file *file_private,
>>> u32 __user *handles,
>>> uint32_t count,
>>> + struct drm_syncobj **stack_syncobjs,
>>> + u32 stack_count,
>>> struct drm_syncobj ***syncobjs_out)
>>> {
>>> struct drm_syncobj **syncobjs;
>>> @@ -1232,9 +1234,13 @@ static int drm_syncobj_array_find(struct
>>> drm_file *file_private,
>>> if (!access_ok(handles, count * sizeof(*handles)))
>>> return -EFAULT;
>>> - syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>>> - if (!syncobjs)
>>> - return -ENOMEM;
>>> + if (count > stack_count) {
>>
>> I believe it's worth adding a comment mentioning that using the stack
>> syncobj is a fast-path that covers most cases.
>
> Yep. But it didn't feel like here is the place so I added comments to
> where callers size the arrays. That however means there are two
> duplicated comments. Okay with you?
Sure.
>
>>> + syncobjs = kmalloc_array(count, sizeof(*syncobjs), GFP_KERNEL);
>>> + if (!syncobjs)
>>> + return -ENOMEM;
>>> + } else {
>>> + syncobjs = stack_syncobjs;
>>> + }
>>> for (i = 0; i < count; i++) {
>>> u64 handle;
>>> @@ -1260,25 +1266,31 @@ static int drm_syncobj_array_find(struct
>>> drm_file *file_private,
>>> drm_syncobj_put(syncobjs[i]);
>>> i--;
>>> }
>>> - kfree(syncobjs);
>>> +
>>> + if (syncobjs != stack_syncobjs)
>>
>> Again, I have a slight preference to make `syncobjs = NULL` and avoid
>> this if condition. But it's just a personal preference.
>
> Pending clarifications from the other patch.
Nvm, it wasn't a good idea.
>
>>
>>> + kfree(syncobjs);
>>> return ret;
>>> }
>>> static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
>>> - uint32_t count)
>>> + uint32_t count,
>>> + struct drm_syncobj **stack_syncobjs)
>>
>> IMO, I think the order `syncobjs, stack_syncobjs, count` is a bit more
>> intuitive.
>
> But count is not directly related to the size of the stack array in this
> function. I could make it a boolean perhaps like this:>
> static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
> uint32_t count,
> bool free_array)
>
> And then in the callers:
>
> drm_syncobj_array_free(syncobjs, count, syncobjs != stack_syncobjs);
>
> Would that be clearer?
Yeah, it does.
>
>>
>>> {
>>> uint32_t i;
>>> for (i = 0; i < count; i++)
>>> drm_syncobj_put(syncobjs[i]);
>>> - kfree(syncobjs);
>>> +
>>> + if (syncobjs != stack_syncobjs)
>>> + kfree(syncobjs);
>>> }
>>> int
>>> drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>> struct drm_file *file_private)
>>> {
>>> + struct drm_syncobj *stack_syncobjs[4];
>>> struct drm_syncobj_wait *args = data;
>>> ktime_t deadline, *pdeadline = NULL;
>>> u32 count = args->count_handles;
>>> @@ -1304,6 +1316,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev,
>>> void *data,
>>> ret = drm_syncobj_array_find(file_private,
>>> u64_to_user_ptr(args->handles),
>>> count,
>>> + stack_syncobjs,
>>> + ARRAY_SIZE(stack_syncobjs),
>>> &syncobjs);
>>> if (ret < 0)
>>> return ret;
>>> @@ -1321,7 +1335,7 @@ drm_syncobj_wait_ioctl(struct drm_device *dev,
>>> void *data,
>>> &first,
>>> pdeadline);
>>> - drm_syncobj_array_free(syncobjs, count);
>>> + drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>> if (timeout < 0)
>>> return timeout;
>>> @@ -1336,6 +1350,7 @@ drm_syncobj_timeline_wait_ioctl(struct
>>> drm_device *dev, void *data,
>>> struct drm_file *file_private)
>>> {
>>> struct drm_syncobj_timeline_wait *args = data;
>>> + struct drm_syncobj *stack_syncobjs[4];
>>
>> Zero initialize it?
>
> Do you see it is required?
Not required, I was just suggesting to double-check if it wasn't needed
indeed.
Best Regards,
- Maíra
>
> Regards,
>
> Tvrtko
>
>>> ktime_t deadline, *pdeadline = NULL;
>>> u32 count = args->count_handles;
>>> struct drm_syncobj **syncobjs;
>>> @@ -1361,6 +1376,8 @@ drm_syncobj_timeline_wait_ioctl(struct
>>> drm_device *dev, void *data,
>>> ret = drm_syncobj_array_find(file_private,
>>> u64_to_user_ptr(args->handles),
>>> count,
>>> + stack_syncobjs,
>>> + ARRAY_SIZE(stack_syncobjs),
>>> &syncobjs);
>>> if (ret < 0)
>>> return ret;
>>> @@ -1378,7 +1395,7 @@ drm_syncobj_timeline_wait_ioctl(struct
>>> drm_device *dev, void *data,
>>> &first,
>>> pdeadline);
>>> - drm_syncobj_array_free(syncobjs, count);
>>> + drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>> if (timeout < 0)
>>> return timeout;
>>> @@ -1496,6 +1513,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev,
>>> void *data,
>>> struct drm_file *file_private)
>>> {
>>> struct drm_syncobj_array *args = data;
>>> + struct drm_syncobj *stack_syncobjs[4];
>>> struct drm_syncobj **syncobjs;
>>> uint32_t i;
>>> int ret;
>>> @@ -1512,6 +1530,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev,
>>> void *data,
>>> ret = drm_syncobj_array_find(file_private,
>>> u64_to_user_ptr(args->handles),
>>> args->count_handles,
>>> + stack_syncobjs,
>>> + ARRAY_SIZE(stack_syncobjs),
>>> &syncobjs);
>>> if (ret < 0)
>>> return ret;
>>> @@ -1519,7 +1539,7 @@ drm_syncobj_reset_ioctl(struct drm_device *dev,
>>> void *data,
>>> for (i = 0; i < args->count_handles; i++)
>>> drm_syncobj_replace_fence(syncobjs[i], NULL);
>>> - drm_syncobj_array_free(syncobjs, args->count_handles);
>>> + drm_syncobj_array_free(syncobjs, args->count_handles,
>>> stack_syncobjs);
>>> return 0;
>>> }
>>> @@ -1529,6 +1549,7 @@ drm_syncobj_signal_ioctl(struct drm_device
>>> *dev, void *data,
>>> struct drm_file *file_private)
>>> {
>>> struct drm_syncobj_array *args = data;
>>> + struct drm_syncobj *stack_syncobjs[4];
>>> struct drm_syncobj **syncobjs;
>>> uint32_t i;
>>> int ret;
>>> @@ -1545,6 +1566,8 @@ drm_syncobj_signal_ioctl(struct drm_device
>>> *dev, void *data,
>>> ret = drm_syncobj_array_find(file_private,
>>> u64_to_user_ptr(args->handles),
>>> args->count_handles,
>>> + stack_syncobjs,
>>> + ARRAY_SIZE(stack_syncobjs),
>>> &syncobjs);
>>> if (ret < 0)
>>> return ret;
>>> @@ -1555,7 +1578,7 @@ drm_syncobj_signal_ioctl(struct drm_device
>>> *dev, void *data,
>>> break;
>>> }
>>> - drm_syncobj_array_free(syncobjs, args->count_handles);
>>> + drm_syncobj_array_free(syncobjs, args->count_handles,
>>> stack_syncobjs);
>>> return ret;
>>> }
>>> @@ -1567,6 +1590,7 @@ drm_syncobj_timeline_signal_ioctl(struct
>>> drm_device *dev, void *data,
>>> struct drm_syncobj_timeline_array *args = data;
>>> uint64_t __user *points = u64_to_user_ptr(args->points);
>>> uint32_t i, j, count = args->count_handles;
>>> + struct drm_syncobj *stack_syncobjs[4];
>>> struct drm_syncobj **syncobjs;
>>> struct dma_fence_chain **chains;
>>> int ret;
>>> @@ -1586,6 +1610,8 @@ drm_syncobj_timeline_signal_ioctl(struct
>>> drm_device *dev, void *data,
>>> ret = drm_syncobj_array_find(file_private,
>>> u64_to_user_ptr(args->handles),
>>> count,
>>> + stack_syncobjs,
>>> + ARRAY_SIZE(stack_syncobjs),
>>> &syncobjs);
>>> if (ret < 0)
>>> return ret;
>>> @@ -1622,7 +1648,7 @@ drm_syncobj_timeline_signal_ioctl(struct
>>> drm_device *dev, void *data,
>>> err_chains:
>>> kfree(chains);
>>> out:
>>> - drm_syncobj_array_free(syncobjs, count);
>>> + drm_syncobj_array_free(syncobjs, count, stack_syncobjs);
>>> return ret;
>>> }
>>> @@ -1631,6 +1657,7 @@ int drm_syncobj_query_ioctl(struct drm_device
>>> *dev, void *data,
>>> struct drm_file *file_private)
>>> {
>>> struct drm_syncobj_timeline_array *args = data;
>>> + struct drm_syncobj *stack_syncobjs[4];
>>> struct drm_syncobj **syncobjs;
>>> uint64_t __user *points = u64_to_user_ptr(args->points);
>>> uint32_t i;
>>> @@ -1651,6 +1678,8 @@ int drm_syncobj_query_ioctl(struct drm_device
>>> *dev, void *data,
>>> ret = drm_syncobj_array_find(file_private,
>>> u64_to_user_ptr(args->handles),
>>> args->count_handles,
>>> + stack_syncobjs,
>>> + ARRAY_SIZE(stack_syncobjs),
>>> &syncobjs);
>>> if (ret < 0)
>>> return ret;
>>> @@ -1694,7 +1723,7 @@ int drm_syncobj_query_ioctl(struct drm_device
>>> *dev, void *data,
>>> break;
>>> }
>>> }
>>> - drm_syncobj_array_free(syncobjs, args->count_handles);
>>> + drm_syncobj_array_free(syncobjs, args->count_handles,
>>> stack_syncobjs);
>>> return ret;
>>> }
>>
>
More information about the dri-devel
mailing list