[PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence

Paneer Selvam, Arunpravin arunpravin.paneerselvam at amd.com
Fri Dec 20 10:45:03 UTC 2024




On 12/20/2024 4:08 PM, Christian König wrote:
> Hi Arun,
>
> Am 20.12.24 um 11:34 schrieb Paneer Selvam, Arunpravin:
>> Hi Christian,
>>
>>
>> On 12/19/2024 4:11 PM, Christian König wrote:
>>>
>>>
>>> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>>>> Fix out-of-bounds issue in userq fence create when
>>>> accessing the userq xa structure. Added a lock to
>>>> protect the race condition.
>>>>
>>>> v2:(Christian)
>>>>    - Acquire xa lock only for the xa_for_each blocks and
>>>>      not for the kvmalloc_array() memory allocation part.
>>>>
>>>> v3:
>>>>    - Replacing the kvmalloc_array() storage with xa_alloc()
>>>>      solves the problem.
>>>>
>>>> BUG: KASAN: slab-out-of-bounds in 
>>>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000006] Call Trace:
>>>> [  +0.000005]  <TASK>
>>>> [  +0.000005]  dump_stack_lvl+0x6c/0x90
>>>> [  +0.000011]  print_report+0xc4/0x5e0
>>>> [  +0.000009]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  ? kasan_complete_mode_report_info+0x26/0x1d0
>>>> [  +0.000007]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000405]  kasan_report+0xdf/0x120
>>>> [  +0.000009]  ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000405]  __asan_report_store8_noabort+0x17/0x20
>>>> [  +0.000007]  amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>>> [  +0.000406]  ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>>>> [  +0.000408]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>>>> [  +0.000013]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>>>> [  +0.000412]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [  +0.000404]  ? try_to_wake_up+0x165/0x1840
>>>> [  +0.000010]  ? __pfx_futex_wake_mark+0x10/0x10
>>>> [  +0.000011]  drm_ioctl_kernel+0x178/0x2f0 [drm]
>>>> [  +0.000050]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [  +0.000404]  ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>>>> [  +0.000043]  ? __kasan_check_read+0x11/0x20
>>>> [  +0.000007]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000007]  ? __kasan_check_write+0x14/0x20
>>>> [  +0.000008]  drm_ioctl+0x513/0xd20 [drm]
>>>> [  +0.000040]  ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>>> [  +0.000407]  ? __pfx_drm_ioctl+0x10/0x10 [drm]
>>>> [  +0.000044]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000007]  ? _raw_spin_lock_irqsave+0x99/0x100
>>>> [  +0.000007]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>>>> [  +0.000006]  ? __rseq_handle_notify_resume+0x188/0xc30
>>>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000008]  ? srso_return_thunk+0x5/0x5f
>>>> [  +0.000006]  ? _raw_spin_unlock_irqrestore+0x27/0x50
>>>> [  +0.000010]  amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>>>> [  +0.000388]  __x64_sys_ioctl+0x135/0x1b0
>>>> [  +0.000009]  x64_sys_call+0x1205/0x20d0
>>>> [  +0.000007]  do_syscall_64+0x4d/0x120
>>>> [  +0.000008]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>>> [  +0.000007] RIP: 0033:0x7f7c3d31a94f
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>> <Arunpravin.PaneerSelvam at amd.com>
>>>> ---
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 43 
>>>> +++++++------------
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  3 +-
>>>>   2 files changed, 17 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> index 2e7271362ace..4289bed6c1f7 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct 
>>>> amdgpu_device *adev,
>>>>     void amdgpu_userq_fence_driver_process(struct 
>>>> amdgpu_userq_fence_driver *fence_drv)
>>>>   {
>>>> +    struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>>       struct amdgpu_userq_fence *userq_fence, *tmp;
>>>>       struct dma_fence *fence;
>>>> +    unsigned long index;
>>>>       u64 rptr;
>>>> -    int i;
>>>>         if (!fence_drv)
>>>>           return;
>>>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct 
>>>> amdgpu_userq_fence_driver *fence_d
>>>>             dma_fence_signal(fence);
>>>>   -        for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>>>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>>>> +        xa_for_each(&userq_fence->fence_drv_xa, index, 
>>>> stored_fence_drv)
>>>> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>>>>             list_del(&userq_fence->link);
>>>>           dma_fence_put(fence);
>>>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct 
>>>> amdgpu_usermode_queue *userq,
>>>>       dma_fence_init(fence, &amdgpu_userq_fence_ops, 
>>>> &userq_fence->lock,
>>>>                  fence_drv->context, seq);
>>>>   +    xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>>>> +
>>>>       amdgpu_userq_fence_driver_get(fence_drv);
>>>>       dma_fence_get(fence);
>>>>         if (!xa_empty(&userq->fence_drv_xa)) {
>>>>           struct amdgpu_userq_fence_driver *stored_fence_drv;
>>>> -        unsigned long index, count = 0;
>>>> -        int i = 0;
>>>> -
>>>> -        xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>>>> -            count++;
>>>> -
>>>> -        userq_fence->fence_drv_array =
>>>> -            kvmalloc_array(count,
>>>> -                       sizeof(struct amdgpu_userq_fence_driver *),
>>>> -                       GFP_KERNEL);
>>>> -
>>>> -        if (userq_fence->fence_drv_array) {
>>>> -            xa_for_each(&userq->fence_drv_xa, index, 
>>>> stored_fence_drv) {
>>>> -                userq_fence->fence_drv_array[i] = stored_fence_drv;
>>>> -                xa_erase(&userq->fence_drv_xa, index);
>>>> -                i++;
>>>> -            }
>>>> +        unsigned long index_uq;
>>>> +        u32 index_uf;
>>>> +        int err;
>>>> +
>>>> +        xa_for_each(&userq->fence_drv_xa, index_uq, 
>>>> stored_fence_drv) {
>>>> +            err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>>>> +                       stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>>>
>>> That is even worse than what was discussed before. Now you have two 
>>> XAs and the second is incorrectly using GFP_KERNEL.
>>
>> I think the problem here is, the WAIT IOCTL updates the 
>> userq->fence_drv_xa entries between the 2 xa_for_each blocks
>> exactly at kvmalloc_array memory allocation. Though, we are locking 
>> the first and second xa_for_each blocks and having the
>> GFP_ATOMIC in place didnt help to resolve the problem.
>
> Yeah, I agree on the problem. But I don't understand why using 
> GFP_ATOMIC didn't solved the issue.
>
>>
>> For example,
>> kvmalloc_array() is allocating the memory for the count value(say 5) 
>> and before we acquire the second
>> xa_for_each block lock, the count modified to (say 7) by the WAIT 
>> IOCTL xa_alloc() function (by acquiring the same lock),
>> and we would be iterating for the new count. But the memory allocated 
>> would be for 5 entries.
>>
>> xa_lock()
>> first xa_for_each block to count the entries
>> xa_unlock()
>
> When you use GFP_ATOMIC you can drop this xa_unlock().
>
>>
>> kvmalloc_array allocates for count 5
>>
>> xa_lock()
>
> And that xa_lock() and so make sure that the xa isn't modified in 
> between.
Got it. Thanks!

Regards,
Arun.
>
> Regards,
> Christian.
>
>> second xa_for_each block to move the entries to allocated memory
>> here the count increased to 7
>> xa_unlock
>>
>> Thanks,
>> Arun.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> +            if (err)
>>>> +                return err;
>>>>           }
>>>> -
>>>> -        userq_fence->fence_drv_array_count = i;
>>>> -    } else {
>>>> -        userq_fence->fence_drv_array = NULL;
>>>> -        userq_fence->fence_drv_array_count = 0;
>>>> +        xa_destroy(&userq->fence_drv_xa);
>>>>       }
>>>>         /* Check if hardware has already processed the job */
>>>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct 
>>>> rcu_head *rcu)
>>>>         /* Release the fence driver reference */
>>>>       amdgpu_userq_fence_driver_put(fence_drv);
>>>> -
>>>> -    kvfree(userq_fence->fence_drv_array);
>>>>       kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>>>   }
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> index f1a90840ac1f..3283e5573d10 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>>>        */
>>>>       spinlock_t lock;
>>>>       struct list_head link;
>>>> -    unsigned long fence_drv_array_count;
>>>> +    struct xarray fence_drv_xa;
>>>>       struct amdgpu_userq_fence_driver *fence_drv;
>>>> -    struct amdgpu_userq_fence_driver **fence_drv_array;
>>>>   };
>>>>     struct amdgpu_userq_fence_driver {
>>>
>>
>



More information about the amd-gfx mailing list