[Intel-gfx] [PATCH] kernfs: Move faulting copy_user operations outside of the mutex
Joonas Lahtinen
joonas.lahtinen at linux.intel.com
Thu Mar 24 15:01:37 UTC 2016
On to, 2016-03-24 at 14:38 +0000, Chris Wilson wrote:
> A fault in a user provided buffer may lead anywhere, and lockdep warns
> that we have a potential deadlock between the mm->mmap_sem and the
> kernfs file mutex:
>
> [ 82.811702] ======================================================
> [ 82.811705] [ INFO: possible circular locking dependency detected ]
> [ 82.811709] 4.5.0-rc4-gfxbench+ #1 Not tainted
> [ 82.811711] -------------------------------------------------------
> [ 82.811714] kms_setmode/5859 is trying to acquire lock:
> [ 82.811717] (&dev->struct_mutex){+.+.+.}, at: [] drm_gem_mmap+0x1a1/0x270
> [ 82.811731]
> but task is already holding lock:
> [ 82.811734] (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0
> [ 82.811745]
> which lock already depends on the new lock.
>
> [ 82.811749]
> the existing dependency chain (in reverse order) is:
> [ 82.811752]
> -> #3 (&mm->mmap_sem){++++++}:
> [ 82.811761] [] lock_acquire+0xc3/0x1d0
> [ 82.811766] [] __might_fault+0x75/0xa0
> [ 82.811771] [] kernfs_fop_write+0x8a/0x180
> [ 82.811787] [] __vfs_write+0x23/0xe0
> [ 82.811792] [] vfs_write+0xa4/0x190
> [ 82.811797] [] SyS_write+0x44/0xb0
> [ 82.811801] [] entry_SYSCALL_64_fastpath+0x16/0x73
> [ 82.811807]
> -> #2 (s_active#6){++++.+}:
> [ 82.811814] [] lock_acquire+0xc3/0x1d0
> [ 82.811819] [] __kernfs_remove+0x210/0x2f0
> [ 82.811823] [] kernfs_remove_by_name_ns+0x40/0xa0
> [ 82.811828] [] sysfs_remove_file_ns+0x10/0x20
> [ 82.811832] [] device_del+0x124/0x250
> [ 82.811837] [] device_unregister+0x19/0x60
> [ 82.811841] [] cpu_cache_sysfs_exit+0x51/0xb0
> [ 82.811846] [] cacheinfo_cpu_callback+0x38/0x70
> [ 82.811851] [] notifier_call_chain+0x39/0xa0
> [ 82.811856] [] __raw_notifier_call_chain+0x9/0x10
> [ 82.811860] [] cpu_notify+0x1e/0x40
> [ 82.811865] [] cpu_notify_nofail+0x9/0x20
> [ 82.811869] [] _cpu_down+0x233/0x340
> [ 82.811874] [] disable_nonboot_cpus+0xc9/0x350
> [ 82.811878] [] suspend_devices_and_enter+0x5a1/0xb50
> [ 82.811883] [] pm_suspend+0x543/0x8d0
> [ 82.811888] [] state_store+0x77/0xe0
> [ 82.811892] [] kobj_attr_store+0xf/0x20
> [ 82.811897] [] sysfs_kf_write+0x40/0x50
> [ 82.811902] [] kernfs_fop_write+0x13c/0x180
> [ 82.811906] [] __vfs_write+0x23/0xe0
> [ 82.811910] [] vfs_write+0xa4/0x190
> [ 82.811914] [] SyS_write+0x44/0xb0
> [ 82.811918] [] entry_SYSCALL_64_fastpath+0x16/0x73
> [ 82.811923]
> -> #1 (cpu_hotplug.lock){+.+.+.}:
> [ 82.811929] [] lock_acquire+0xc3/0x1d0
> [ 82.811933] [] mutex_lock_nested+0x62/0x3b0
> [ 82.811940] [] get_online_cpus+0x61/0x80
> [ 82.811944] [] stop_machine+0x1b/0xe0
> [ 82.811949] [] gen8_ggtt_insert_entries__BKL+0x2d/0x30 [i915]
> [ 82.812009] [] ggtt_bind_vma+0x46/0x70 [i915]
> [ 82.812045] [] i915_vma_bind+0x140/0x290 [i915]
> [ 82.812081] [] i915_gem_object_do_pin+0x899/0xb00 [i915]
> [ 82.812117] [] i915_gem_object_pin+0x35/0x40 [i915]
> [ 82.812154] [] intel_init_pipe_control+0xbe/0x210 [i915]
> [ 82.812192] [] intel_logical_rings_init+0xe2/0xde0 [i915]
> [ 82.812232] [] i915_gem_init+0xf3/0x130 [i915]
> [ 82.812278] [] i915_driver_load+0xf2d/0x1770 [i915]
> [ 82.812318] [] drm_dev_register+0xa4/0xb0
> [ 82.812323] [] drm_get_pci_dev+0xce/0x1e0
> [ 82.812328] [] i915_pci_probe+0x2f/0x50 [i915]
> [ 82.812360] [] pci_device_probe+0x87/0xf0
> [ 82.812366] [] driver_probe_device+0x229/0x450
> [ 82.812371] [] __driver_attach+0x83/0x90
> [ 82.812375] [] bus_for_each_dev+0x61/0xa0
> [ 82.812380] [] driver_attach+0x19/0x20
> [ 82.812384] [] bus_add_driver+0x1ef/0x290
> [ 82.812388] [] driver_register+0x5b/0xe0
> [ 82.812393] [] __pci_register_driver+0x5b/0x60
> [ 82.812398] [] drm_pci_init+0xd6/0x100
> [ 82.812402] [] 0xffffffffa027c094
> [ 82.812406] [] do_one_initcall+0xae/0x1d0
> [ 82.812412] [] do_init_module+0x5b/0x1cb
> [ 82.812417] [] load_module+0x1c20/0x2480
> [ 82.812422] [] SyS_finit_module+0x7e/0xa0
> [ 82.812428] [] entry_SYSCALL_64_fastpath+0x16/0x73
> [ 82.812433]
> -> #0 (&dev->struct_mutex){+.+.+.}:
> [ 82.812439] [] __lock_acquire+0x1fc9/0x20f0
> [ 82.812443] [] lock_acquire+0xc3/0x1d0
> [ 82.812456] [] drm_gem_mmap+0x1c7/0x270
> [ 82.812460] [] mmap_region+0x334/0x580
> [ 82.812466] [] do_mmap+0x364/0x410
> [ 82.812470] [] vm_mmap_pgoff+0x6d/0xa0
> [ 82.812474] [] SyS_mmap_pgoff+0x184/0x220
> [ 82.812479] [] SyS_mmap+0x1d/0x20
> [ 82.812484] [] entry_SYSCALL_64_fastpath+0x16/0x73
> [ 82.812489]
> other info that might help us debug this:
>
> [ 82.812493] Chain exists of:
> &dev->struct_mutex --> s_active#6 --> &mm->mmap_sem
>
> [ 82.812502] Possible unsafe locking scenario:
>
> [ 82.812506] CPU0 CPU1
> [ 82.812508] ---- ----
> [ 82.812510] lock(&mm->mmap_sem);
> [ 82.812514] lock(s_active#6);
> [ 82.812519] lock(&mm->mmap_sem);
> [ 82.812522] lock(&dev->struct_mutex);
> [ 82.812526]
> *** DEADLOCK ***
>
> [ 82.812531] 1 lock held by kms_setmode/5859:
> [ 82.812533] #0: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0
> [ 82.812541]
> stack backtrace:
> [ 82.812547] CPU: 0 PID: 5859 Comm: kms_setmode Not tainted 4.5.0-rc4-gfxbench+ #1
> [ 82.812550] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0040.2015.0814.1353 08/14/2015
> [ 82.812553] 0000000000000000 ffff880079407bf0 ffffffff813f8505 ffffffff825fb270
> [ 82.812560] ffffffff825c4190 ffff880079407c30 ffffffff810c84ac ffff880079407c90
> [ 82.812566] ffff8800797ed328 ffff8800797ecb00 0000000000000001 ffff8800797ed350
> [ 82.812573] Call Trace:
> [ 82.812578] [] dump_stack+0x67/0x92
> [ 82.812582] [] print_circular_bug+0x1fc/0x310
> [ 82.812586] [] __lock_acquire+0x1fc9/0x20f0
> [ 82.812590] [] lock_acquire+0xc3/0x1d0
> [ 82.812594] [] ? drm_gem_mmap+0x1a1/0x270
> [ 82.812599] [] drm_gem_mmap+0x1c7/0x270
> [ 82.812603] [] ? drm_gem_mmap+0x1a1/0x270
> [ 82.812608] [] mmap_region+0x334/0x580
> [ 82.812612] [] do_mmap+0x364/0x410
> [ 82.812616] [] vm_mmap_pgoff+0x6d/0xa0
> [ 82.812629] [] SyS_mmap_pgoff+0x184/0x220
> [ 82.812633] [] SyS_mmap+0x1d/0x20
> [ 82.812637] [] entry_SYSCALL_64_fastpath+0x16/0x73
>
> Highly unlikely though this scenario is, we can avoid the issue entirely
> by moving the copy operation from out under the mutex by always allocating
> a temporary buffer for the operation (rather than reuse the preallocated
> buf which requires the mutex for serialisation).
>
> The locked section was extended by the addition of the preallocated buf
> to speed up md user operations in
>
> commit 2b75869bba676c248d8d25ae6d2bd9221dfffdb6
> Author: NeilBrown <neilb at suse.de>
> Date: Mon Oct 13 16:41:28 2014 +1100
>
> sysfs/kernfs: allow attributes to request write buffer be pre-allocated.
>
> Reported-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
This looks pretty much like we discussed, so:
Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Ignore-Cc: Tejun Heo <tj at kernel.org>
> Ignore-Cc: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
> Ignore-Cc: NeilBrown <neilb at suse.de>
> ---
> fs/kernfs/file.c | 51 ++++++++++++++++++++++++++++----------------------
> include/linux/kernfs.h | 1 +
> 2 files changed, 30 insertions(+), 22 deletions(-)
>
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 7247252ee9b1..e1574008adc9 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -190,15 +190,16 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
> char *buf;
>
> buf = of->prealloc_buf;
> - if (!buf)
> + if (buf)
> + mutex_lock(&of->prealloc_mutex);
> + else
> buf = kmalloc(len, GFP_KERNEL);
> if (!buf)
> return -ENOMEM;
>
> /*
> * @of->mutex nests outside active ref and is used both to ensure that
> - * the ops aren't called concurrently for the same open file, and
> - * to provide exclusive access to ->prealloc_buf (when that exists).
> + * the ops aren't called concurrently for the same open file.
> */
> mutex_lock(&of->mutex);
> if (!kernfs_get_active(of->kn)) {
> @@ -214,21 +215,23 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
> else
> len = -EINVAL;
>
> + kernfs_put_active(of->kn);
> + mutex_unlock(&of->mutex);
> +
> if (len < 0)
> - goto out_unlock;
> + goto out_free;
>
> if (copy_to_user(user_buf, buf, len)) {
> len = -EFAULT;
> - goto out_unlock;
> + goto out_free;
> }
>
> *ppos += len;
>
> - out_unlock:
> - kernfs_put_active(of->kn);
> - mutex_unlock(&of->mutex);
> out_free:
> - if (buf != of->prealloc_buf)
> + if (buf == of->prealloc_buf)
> + mutex_unlock(&of->prealloc_mutex);
> + else
> kfree(buf);
> return len;
> }
> @@ -284,15 +287,22 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
> }
>
> buf = of->prealloc_buf;
> - if (!buf)
> + if (buf)
> + mutex_lock(&of->prealloc_mutex);
> + else
> buf = kmalloc(len + 1, GFP_KERNEL);
> if (!buf)
> return -ENOMEM;
>
> + if (copy_from_user(buf, user_buf, len)) {
> + len = -EFAULT;
> + goto out_free;
> + }
> + buf[len] = '\0'; /* guarantee string termination */
> +
> /*
> * @of->mutex nests outside active ref and is used both to ensure that
> - * the ops aren't called concurrently for the same open file, and
> - * to provide exclusive access to ->prealloc_buf (when that exists).
> + * the ops aren't called concurrently for the same open file.
> */
> mutex_lock(&of->mutex);
> if (!kernfs_get_active(of->kn)) {
> @@ -301,26 +311,22 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
> goto out_free;
> }
>
> - if (copy_from_user(buf, user_buf, len)) {
> - len = -EFAULT;
> - goto out_unlock;
> - }
> - buf[len] = '\0'; /* guarantee string termination */
> -
> ops = kernfs_ops(of->kn);
> if (ops->write)
> len = ops->write(of, buf, len, *ppos);
> else
> len = -EINVAL;
>
> + kernfs_put_active(of->kn);
> + mutex_unlock(&of->mutex);
> +
> if (len > 0)
> *ppos += len;
>
> -out_unlock:
> - kernfs_put_active(of->kn);
> - mutex_unlock(&of->mutex);
> out_free:
> - if (buf != of->prealloc_buf)
> + if (buf == of->prealloc_buf)
> + mutex_unlock(&of->prealloc_mutex);
> + else
> kfree(buf);
> return len;
> }
> @@ -687,6 +693,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
> error = -ENOMEM;
> if (!of->prealloc_buf)
> goto err_free;
> + mutex_init(&of->prealloc_mutex);
> }
>
> /*
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index af51df35d749..019ef3416900 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -177,6 +177,7 @@ struct kernfs_open_file {
>
> /* private fields, do not use outside kernfs proper */
> struct mutex mutex;
> + struct mutex prealloc_mutex;
> int event;
> struct list_head list;
> char *prealloc_buf;
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
More information about the Intel-gfx
mailing list