[PATCH v3 1/3] drm/amdgpu: Fix out-of-bounds issue in user fence
Christian König
christian.koenig at amd.com
Fri Dec 20 10:38:10 UTC 2024
Hi Arun,
Am 20.12.24 um 11:34 schrieb Paneer Selvam, Arunpravin:
> Hi Christian,
>
>
> On 12/19/2024 4:11 PM, Christian König wrote:
>>
>>
>> Am 19.12.24 um 11:38 schrieb Arunpravin Paneer Selvam:
>>> Fix out-of-bounds issue in userq fence create when
>>> accessing the userq xa structure. Added a lock to
>>> protect the race condition.
>>>
>>> v2:(Christian)
>>> - Acquire xa lock only for the xa_for_each blocks and
>>> not for the kvmalloc_array() memory allocation part.
>>>
>>> v3:
>>> - Replacing the kvmalloc_array() storage with xa_alloc()
>>> solves the problem.
>>>
>>> BUG: KASAN: slab-out-of-bounds in
>>> amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000006] Call Trace:
>>> [ +0.000005] <TASK>
>>> [ +0.000005] dump_stack_lvl+0x6c/0x90
>>> [ +0.000011] print_report+0xc4/0x5e0
>>> [ +0.000009] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0
>>> [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000405] kasan_report+0xdf/0x120
>>> [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000405] __asan_report_store8_noabort+0x17/0x20
>>> [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu]
>>> [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu]
>>> [ +0.000408] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm]
>>> [ +0.000013] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu]
>>> [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [ +0.000404] ? try_to_wake_up+0x165/0x1840
>>> [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10
>>> [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm]
>>> [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>>> [ +0.000043] ? __kasan_check_read+0x11/0x20
>>> [ +0.000007] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000007] ? __kasan_check_write+0x14/0x20
>>> [ +0.000008] drm_ioctl+0x513/0xd20 [drm]
>>> [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu]
>>> [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm]
>>> [ +0.000044] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100
>>> [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
>>> [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30
>>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000008] ? srso_return_thunk+0x5/0x5f
>>> [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50
>>> [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu]
>>> [ +0.000388] __x64_sys_ioctl+0x135/0x1b0
>>> [ +0.000009] x64_sys_call+0x1205/0x20d0
>>> [ +0.000007] do_syscall_64+0x4d/0x120
>>> [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>> [ +0.000007] RIP: 0033:0x7f7c3d31a94f
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam
>>> <Arunpravin.PaneerSelvam at amd.com>
>>> ---
>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 43
>>> +++++++------------
>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +-
>>> 2 files changed, 17 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> index 2e7271362ace..4289bed6c1f7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>> @@ -122,10 +122,11 @@ int amdgpu_userq_fence_driver_alloc(struct
>>> amdgpu_device *adev,
>>> void amdgpu_userq_fence_driver_process(struct
>>> amdgpu_userq_fence_driver *fence_drv)
>>> {
>>> + struct amdgpu_userq_fence_driver *stored_fence_drv;
>>> struct amdgpu_userq_fence *userq_fence, *tmp;
>>> struct dma_fence *fence;
>>> + unsigned long index;
>>> u64 rptr;
>>> - int i;
>>> if (!fence_drv)
>>> return;
>>> @@ -141,8 +142,8 @@ void amdgpu_userq_fence_driver_process(struct
>>> amdgpu_userq_fence_driver *fence_d
>>> dma_fence_signal(fence);
>>> - for (i = 0; i < userq_fence->fence_drv_array_count; i++)
>>> - amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
>>> + xa_for_each(&userq_fence->fence_drv_xa, index,
>>> stored_fence_drv)
>>> + amdgpu_userq_fence_driver_put(stored_fence_drv);
>>> list_del(&userq_fence->link);
>>> dma_fence_put(fence);
>>> @@ -221,34 +222,24 @@ int amdgpu_userq_fence_create(struct
>>> amdgpu_usermode_queue *userq,
>>> dma_fence_init(fence, &amdgpu_userq_fence_ops,
>>> &userq_fence->lock,
>>> fence_drv->context, seq);
>>> + xa_init_flags(&userq_fence->fence_drv_xa, XA_FLAGS_ALLOC);
>>> +
>>> amdgpu_userq_fence_driver_get(fence_drv);
>>> dma_fence_get(fence);
>>> if (!xa_empty(&userq->fence_drv_xa)) {
>>> struct amdgpu_userq_fence_driver *stored_fence_drv;
>>> - unsigned long index, count = 0;
>>> - int i = 0;
>>> -
>>> - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
>>> - count++;
>>> -
>>> - userq_fence->fence_drv_array =
>>> - kvmalloc_array(count,
>>> - sizeof(struct amdgpu_userq_fence_driver *),
>>> - GFP_KERNEL);
>>> -
>>> - if (userq_fence->fence_drv_array) {
>>> - xa_for_each(&userq->fence_drv_xa, index,
>>> stored_fence_drv) {
>>> - userq_fence->fence_drv_array[i] = stored_fence_drv;
>>> - xa_erase(&userq->fence_drv_xa, index);
>>> - i++;
>>> - }
>>> + unsigned long index_uq;
>>> + u32 index_uf;
>>> + int err;
>>> +
>>> + xa_for_each(&userq->fence_drv_xa, index_uq,
>>> stored_fence_drv) {
>>> + err = xa_alloc_irq(&userq_fence->fence_drv_xa, &index_uf,
>>> + stored_fence_drv, xa_limit_32b, GFP_KERNEL);
>>
>> That is even worse than what was discussed before. Now you have two
>> XAs and the second is incorrectly using GFP_KERNEL.
>
> I think the problem here is, the WAIT IOCTL updates the
> userq->fence_drv_xa entries between the 2 xa_for_each blocks
> exactly at kvmalloc_array memory allocation. Though, we are locking
> the first and second xa_for_each blocks and having the
> GFP_ATOMIC in place didnt help to resolve the problem.
Yeah, I agree on the problem. But I don't understand why using
GFP_ATOMIC didn't solved the issue.
>
> For example,
> kvmalloc_array() is allocating the memory for the count value(say 5)
> and before we acquire the second
> xa_for_each block lock, the count modified to (say 7) by the WAIT
> IOCTL xa_alloc() function (by acquiring the same lock),
> and we would be iterating for the new count. But the memory allocated
> would be for 5 entries.
>
> xa_lock()
> first xa_for_each block to count the entries
> xa_unlock()
When you use GFP_ATOMIC you can drop this xa_unlock().
>
> kvmalloc_array allocates for count 5
>
> xa_lock()
And that xa_lock() and so make sure that the xa isn't modified in between.
Regards,
Christian.
> second xa_for_each block to move the entries to allocated memory
> here the count increased to 7
> xa_unlock
>
> Thanks,
> Arun.
>>
>> Regards,
>> Christian.
>>
>>> + if (err)
>>> + return err;
>>> }
>>> -
>>> - userq_fence->fence_drv_array_count = i;
>>> - } else {
>>> - userq_fence->fence_drv_array = NULL;
>>> - userq_fence->fence_drv_array_count = 0;
>>> + xa_destroy(&userq->fence_drv_xa);
>>> }
>>> /* Check if hardware has already processed the job */
>>> @@ -300,8 +291,6 @@ static void amdgpu_userq_fence_free(struct
>>> rcu_head *rcu)
>>> /* Release the fence driver reference */
>>> amdgpu_userq_fence_driver_put(fence_drv);
>>> -
>>> - kvfree(userq_fence->fence_drv_array);
>>> kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> index f1a90840ac1f..3283e5573d10 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>> @@ -37,9 +37,8 @@ struct amdgpu_userq_fence {
>>> */
>>> spinlock_t lock;
>>> struct list_head link;
>>> - unsigned long fence_drv_array_count;
>>> + struct xarray fence_drv_xa;
>>> struct amdgpu_userq_fence_driver *fence_drv;
>>> - struct amdgpu_userq_fence_driver **fence_drv_array;
>>> };
>>> struct amdgpu_userq_fence_driver {
>>
>
More information about the amd-gfx
mailing list