[PATCH v2] drm/amdgpu: Fix one use-after-free of VM
Paul Menzel
pmenzel at molgen.mpg.de
Thu Apr 14 05:45:34 UTC 2022
Dear Xinhui,
Thank you for rerolling the patch.
Am 14.04.22 um 07:03 schrieb xinhui pan:
> VM might already be freed when amdgpu_vm_tlb_seq_cb() is called.
> We see the calltrace below.
>
> Fix it by keeping the last flush fence around and wait for it to signal
Nit: Please add a dot/period to the end of sentences.
>
> BUG kmalloc-4k (Not tainted): Poison overwritten
>
> 0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c
> instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
> age=44 cpu=0 pid=2343
> __slab_alloc.isra.0+0x4f/0x90
> kmem_cache_alloc_trace+0x6b8/0x7a0
> amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
> drm_file_alloc+0x222/0x3e0 [drm]
> drm_open+0x11d/0x410 [drm]
> Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1
> pid=2485
> kfree+0x4a2/0x580
> amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu]
> drm_file_free+0x24e/0x3c0 [drm]
> drm_close_helper.isra.0+0x90/0xb0 [drm]
> drm_release+0x97/0x1a0 [drm]
> __fput+0xb6/0x280
> ____fput+0xe/0x10
> task_work_run+0x64/0xb0
The v2 annotation is missing.
> Suggested-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 +
> 2 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 645ce28277c2..cd5aa7edd451 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> if (flush_tlb || params.table_freed) {
> tlb_cb->vm = vm;
> - if (!fence || !*fence ||
> - dma_fence_add_callback(*fence, &tlb_cb->cb,
> - amdgpu_vm_tlb_seq_cb))
> + if (fence && *fence &&
> + !dma_fence_add_callback(*fence, &tlb_cb->cb,
> + amdgpu_vm_tlb_seq_cb)) {
> + dma_fence_put(vm->last_tlb_flush);
> + vm->last_tlb_flush = dma_fence_get(*fence);
> + } else
> amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
The Linux kernel coding style uses braces for all branches of a
conditional statement, if one branch uses braces. [1]
Kind regards,
Paul
> tlb_cb = NULL;
> }
> @@ -2094,6 +2097,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> vm->update_funcs = &amdgpu_vm_sdma_funcs;
> vm->last_update = NULL;
> vm->last_unlocked = dma_fence_get_stub();
> + vm->last_tlb_flush = dma_fence_get_stub();
>
> mutex_init(&vm->eviction_lock);
> vm->evicting = false;
> @@ -2132,6 +2136,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> vm->root.bo = NULL;
>
> error_free_delayed:
> + dma_fence_put(vm->last_tlb_flush);
> dma_fence_put(vm->last_unlocked);
> drm_sched_entity_destroy(&vm->delayed);
>
> @@ -2248,6 +2253,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> struct amdgpu_bo_va_mapping *mapping, *tmp;
> bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
> struct amdgpu_bo *root;
> + unsigned long flags;
> int i;
>
> amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
> @@ -2257,6 +2263,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> amdgpu_vm_set_pasid(adev, vm, 0);
> dma_fence_wait(vm->last_unlocked, false);
> dma_fence_put(vm->last_unlocked);
> + dma_fence_wait(vm->last_tlb_flush, false);
> + /* Make sure that all fence callbacks have completed */
> + spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
> + spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
> + dma_fence_put(vm->last_tlb_flush);
>
> list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
> if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 1a814fbffff8..6b06a214f05f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -286,6 +286,7 @@ struct amdgpu_vm {
>
> /* Last finished delayed update */
> atomic64_t tlb_seq;
> + struct dma_fence *last_tlb_flush;
>
> /* Last unlocked submission to the scheduler entities */
> struct dma_fence *last_unlocked;
[1]:
https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
More information about the amd-gfx
mailing list