[PATCH 1/1] drm/xe: Untangle vm_bind_ioctl cleanup order and fix double free bug
Manszewski, Christoph
christoph.manszewski at intel.com
Wed Aug 13 08:28:35 UTC 2025
Hi Matt,
On 12.08.2025 22:16, Matthew Brost wrote:
> On Tue, Aug 12, 2025 at 03:17:28PM +0200, Christoph Manszewski wrote:
>> If the argument check during an array bind fails, the bind_ops are freed
>> twice as seen in [1]. While at it, fix the ordering of resource cleanup
>> to align with reverse allocation order.
>>
>> [1]:
>> ==================================================================
>> BUG: KASAN: double-free in xe_vm_bind_ioctl+0x1b2/0x21f0 [xe]
>> Free of addr ffff88813bb9b800 by task xe_vm/14198
>>
>> CPU: 5 UID: 0 PID: 14198 Comm: xe_vm Not tainted 6.16.0-xe-eudebug-cmanszew+ #520 PREEMPT(full)
>> Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.2411.A02.2110081023 10/08/2021
>> Call Trace:
>> <TASK>
>> dump_stack_lvl+0x82/0xd0
>> print_report+0xcb/0x610
>> ? __virt_addr_valid+0x19a/0x300
>> ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe]
>> kasan_report_invalid_free+0xc8/0xf0
>> ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe]
>> ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe]
>> check_slab_allocation+0x102/0x130
>> kfree+0x10d/0x440
>> ? should_fail_ex+0x57/0x2f0
>> ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe]
>> xe_vm_bind_ioctl+0x1b2/0x21f0 [xe]
>> ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe]
>> ? __lock_acquire+0xab9/0x27f0
>> ? lock_acquire+0x165/0x300
>> ? drm_dev_enter+0x53/0xe0 [drm]
>> ? find_held_lock+0x2b/0x80
>> ? drm_dev_exit+0x30/0x50 [drm]
>> ? drm_ioctl_kernel+0x128/0x1c0 [drm]
>> drm_ioctl_kernel+0x128/0x1c0 [drm]
>> ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe]
>> ? find_held_lock+0x2b/0x80
>> ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm]
>> ? should_fail_ex+0x57/0x2f0
>> ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe]
>> drm_ioctl+0x352/0x620 [drm]
>> ? __pfx_drm_ioctl+0x10/0x10 [drm]
>> ? __pfx_rpm_resume+0x10/0x10
>> ? do_raw_spin_lock+0x11a/0x1b0
>> ? find_held_lock+0x2b/0x80
>> ? __pm_runtime_resume+0x61/0xc0
>> ? rcu_is_watching+0x20/0x50
>> ? trace_irq_enable.constprop.0+0xac/0xe0
>> xe_drm_ioctl+0x91/0xc0 [xe]
>> __x64_sys_ioctl+0xb2/0x100
>> ? rcu_is_watching+0x20/0x50
>> do_syscall_64+0x68/0x2e0
>> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> RIP: 0033:0x7fa9acb24ded
>> Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
>> RSP: 002b:00007fff189e1370 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
>> RAX: ffffffffffffffda RBX: ffffffffffffff68 RCX: 00007fa9acb24ded
>> RDX: 00007fff189e19f0 RSI: 0000000040886445 RDI: 0000000000000003
>> RBP: 00007fff189e13c0 R08: 0000000000000000 R09: 00005561d3fb47d0
>> R10: 00007fa9ace29be0 R11: 0000000000000246 R12: 00007fff189e19f0
>> R13: 0000000040886445 R14: 0000000000000003 R15: 0000000000000003
>> </TASK>
>>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: Francois Dugast <francois.dugast at intel.com>
>> Signed-off-by: Christoph Manszewski <christoph.manszewski at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_vm.c | 18 +++++++++++-------
>> 1 file changed, 11 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index d40d2d43c041..0bfc32fd8399 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -3442,6 +3442,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
>> free_bind_ops:
>> if (args->num_binds > 1)
>> kvfree(*bind_ops);
>> + bind_ops = NULL;
>
> This should be *bind_ops = NULL, right?
Oops, yes indeed!
>
> Or we could just simply delete the kvfree here are let's there be a
> single free at the caller too. I'd slightly leanr towards this option.
Hmm, isn't it a bit weird to expect the caller to free allocated memory
if the allocation function failed?
>
>> return err;
>> }
>>
>> @@ -3548,7 +3549,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> struct xe_exec_queue *q = NULL;
>> u32 num_syncs, num_ufence = 0;
>> struct xe_sync_entry *syncs = NULL;
>> - struct drm_xe_vm_bind_op *bind_ops;
>> + struct drm_xe_vm_bind_op *bind_ops = NULL;
>> struct xe_vma_ops vops;
>> struct dma_fence *fence;
>> int err;
>> @@ -3566,7 +3567,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> q = xe_exec_queue_lookup(xef, args->exec_queue_id);
>> if (XE_IOCTL_DBG(xe, !q)) {
>> err = -ENOENT;
>
> All of the following changes are not strickly required, given kvfree is
> NULL safe? Rather this is just a preferred style cleanup. Since this is
> a fixes patch we'd want to backport, I'd split this patch into 2 distink
> patches - one which fixes the double free, one which cleans up the
> style.
Ok, I will split into two patches then.
Thanks,
Christoph
>
> Matt
>
>> - goto put_vm;
>> + goto free_bind_ops;
>> }
>>
>> if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
>> @@ -3612,7 +3613,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
>> if (!ops) {
>> err = -ENOMEM;
>> - goto release_vm_lock;
>> + goto free_bos;
>> }
>> }
>>
>> @@ -3746,17 +3747,20 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> put_obj:
>> for (i = 0; i < args->num_binds; ++i)
>> xe_bo_put(bos[i]);
>> +
>> + kvfree(ops);
>> +free_bos:
>> + kvfree(bos);
>> release_vm_lock:
>> up_write(&vm->lock);
>> put_exec_queue:
>> if (q)
>> xe_exec_queue_put(q);
>> -put_vm:
>> - xe_vm_put(vm);
>> - kvfree(bos);
>> - kvfree(ops);
>> +free_bind_ops:
>> if (args->num_binds > 1)
>> kvfree(bind_ops);
>> +put_vm:
>> + xe_vm_put(vm);
>> return err;
>> }
>>
>> --
>> 2.47.1
>>
More information about the Intel-xe
mailing list