[Intel-gfx] [PATCH] kernfs: Move faulting copy_user operations outside of the mutex

Joonas Lahtinen joonas.lahtinen at linux.intel.com
Thu Mar 24 15:01:37 UTC 2016


On to, 2016-03-24 at 14:38 +0000, Chris Wilson wrote:
> A fault in a user provided buffer may lead anywhere, and lockdep warns
> that we have a potential deadlock between the mm->mmap_sem and the
> kernfs file mutex:
> 
> [   82.811702] ======================================================
> [   82.811705] [ INFO: possible circular locking dependency detected ]
> [   82.811709] 4.5.0-rc4-gfxbench+ #1 Not tainted
> [   82.811711] -------------------------------------------------------
> [   82.811714] kms_setmode/5859 is trying to acquire lock:
> [   82.811717]  (&dev->struct_mutex){+.+.+.}, at: [] drm_gem_mmap+0x1a1/0x270
> [   82.811731]
> but task is already holding lock:
> [   82.811734]  (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0
> [   82.811745]
> which lock already depends on the new lock.
> 
> [   82.811749]
> the existing dependency chain (in reverse order) is:
> [   82.811752]
> -> #3 (&mm->mmap_sem){++++++}:
> [   82.811761]        [] lock_acquire+0xc3/0x1d0
> [   82.811766]        [] __might_fault+0x75/0xa0
> [   82.811771]        [] kernfs_fop_write+0x8a/0x180
> [   82.811787]        [] __vfs_write+0x23/0xe0
> [   82.811792]        [] vfs_write+0xa4/0x190
> [   82.811797]        [] SyS_write+0x44/0xb0
> [   82.811801]        [] entry_SYSCALL_64_fastpath+0x16/0x73
> [   82.811807]
> -> #2 (s_active#6){++++.+}:
> [   82.811814]        [] lock_acquire+0xc3/0x1d0
> [   82.811819]        [] __kernfs_remove+0x210/0x2f0
> [   82.811823]        [] kernfs_remove_by_name_ns+0x40/0xa0
> [   82.811828]        [] sysfs_remove_file_ns+0x10/0x20
> [   82.811832]        [] device_del+0x124/0x250
> [   82.811837]        [] device_unregister+0x19/0x60
> [   82.811841]        [] cpu_cache_sysfs_exit+0x51/0xb0
> [   82.811846]        [] cacheinfo_cpu_callback+0x38/0x70
> [   82.811851]        [] notifier_call_chain+0x39/0xa0
> [   82.811856]        [] __raw_notifier_call_chain+0x9/0x10
> [   82.811860]        [] cpu_notify+0x1e/0x40
> [   82.811865]        [] cpu_notify_nofail+0x9/0x20
> [   82.811869]        [] _cpu_down+0x233/0x340
> [   82.811874]        [] disable_nonboot_cpus+0xc9/0x350
> [   82.811878]        [] suspend_devices_and_enter+0x5a1/0xb50
> [   82.811883]        [] pm_suspend+0x543/0x8d0
> [   82.811888]        [] state_store+0x77/0xe0
> [   82.811892]        [] kobj_attr_store+0xf/0x20
> [   82.811897]        [] sysfs_kf_write+0x40/0x50
> [   82.811902]        [] kernfs_fop_write+0x13c/0x180
> [   82.811906]        [] __vfs_write+0x23/0xe0
> [   82.811910]        [] vfs_write+0xa4/0x190
> [   82.811914]        [] SyS_write+0x44/0xb0
> [   82.811918]        [] entry_SYSCALL_64_fastpath+0x16/0x73
> [   82.811923]
> -> #1 (cpu_hotplug.lock){+.+.+.}:
> [   82.811929]        [] lock_acquire+0xc3/0x1d0
> [   82.811933]        [] mutex_lock_nested+0x62/0x3b0
> [   82.811940]        [] get_online_cpus+0x61/0x80
> [   82.811944]        [] stop_machine+0x1b/0xe0
> [   82.811949]        [] gen8_ggtt_insert_entries__BKL+0x2d/0x30 [i915]
> [   82.812009]        [] ggtt_bind_vma+0x46/0x70 [i915]
> [   82.812045]        [] i915_vma_bind+0x140/0x290 [i915]
> [   82.812081]        [] i915_gem_object_do_pin+0x899/0xb00 [i915]
> [   82.812117]        [] i915_gem_object_pin+0x35/0x40 [i915]
> [   82.812154]        [] intel_init_pipe_control+0xbe/0x210 [i915]
> [   82.812192]        [] intel_logical_rings_init+0xe2/0xde0 [i915]
> [   82.812232]        [] i915_gem_init+0xf3/0x130 [i915]
> [   82.812278]        [] i915_driver_load+0xf2d/0x1770 [i915]
> [   82.812318]        [] drm_dev_register+0xa4/0xb0
> [   82.812323]        [] drm_get_pci_dev+0xce/0x1e0
> [   82.812328]        [] i915_pci_probe+0x2f/0x50 [i915]
> [   82.812360]        [] pci_device_probe+0x87/0xf0
> [   82.812366]        [] driver_probe_device+0x229/0x450
> [   82.812371]        [] __driver_attach+0x83/0x90
> [   82.812375]        [] bus_for_each_dev+0x61/0xa0
> [   82.812380]        [] driver_attach+0x19/0x20
> [   82.812384]        [] bus_add_driver+0x1ef/0x290
> [   82.812388]        [] driver_register+0x5b/0xe0
> [   82.812393]        [] __pci_register_driver+0x5b/0x60
> [   82.812398]        [] drm_pci_init+0xd6/0x100
> [   82.812402]        [] 0xffffffffa027c094
> [   82.812406]        [] do_one_initcall+0xae/0x1d0
> [   82.812412]        [] do_init_module+0x5b/0x1cb
> [   82.812417]        [] load_module+0x1c20/0x2480
> [   82.812422]        [] SyS_finit_module+0x7e/0xa0
> [   82.812428]        [] entry_SYSCALL_64_fastpath+0x16/0x73
> [   82.812433]
> -> #0 (&dev->struct_mutex){+.+.+.}:
> [   82.812439]        [] __lock_acquire+0x1fc9/0x20f0
> [   82.812443]        [] lock_acquire+0xc3/0x1d0
> [   82.812456]        [] drm_gem_mmap+0x1c7/0x270
> [   82.812460]        [] mmap_region+0x334/0x580
> [   82.812466]        [] do_mmap+0x364/0x410
> [   82.812470]        [] vm_mmap_pgoff+0x6d/0xa0
> [   82.812474]        [] SyS_mmap_pgoff+0x184/0x220
> [   82.812479]        [] SyS_mmap+0x1d/0x20
> [   82.812484]        [] entry_SYSCALL_64_fastpath+0x16/0x73
> [   82.812489]
> other info that might help us debug this:
> 
> [   82.812493] Chain exists of:
>   &dev->struct_mutex --> s_active#6 --> &mm->mmap_sem
> 
> [   82.812502]  Possible unsafe locking scenario:
> 
> [   82.812506]        CPU0                    CPU1
> [   82.812508]        ----                    ----
> [   82.812510]   lock(&mm->mmap_sem);
> [   82.812514]                                lock(s_active#6);
> [   82.812519]                                lock(&mm->mmap_sem);
> [   82.812522]   lock(&dev->struct_mutex);
> [   82.812526]
>  *** DEADLOCK ***
> 
> [   82.812531] 1 lock held by kms_setmode/5859:
> [   82.812533]  #0:  (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0
> [   82.812541]
> stack backtrace:
> [   82.812547] CPU: 0 PID: 5859 Comm: kms_setmode Not tainted 4.5.0-rc4-gfxbench+ #1
> [   82.812550] Hardware name:                  /NUC5CPYB, BIOS PYBSWCEL.86A.0040.2015.0814.1353 08/14/2015
> [   82.812553]  0000000000000000 ffff880079407bf0 ffffffff813f8505 ffffffff825fb270
> [   82.812560]  ffffffff825c4190 ffff880079407c30 ffffffff810c84ac ffff880079407c90
> [   82.812566]  ffff8800797ed328 ffff8800797ecb00 0000000000000001 ffff8800797ed350
> [   82.812573] Call Trace:
> [   82.812578]  [] dump_stack+0x67/0x92
> [   82.812582]  [] print_circular_bug+0x1fc/0x310
> [   82.812586]  [] __lock_acquire+0x1fc9/0x20f0
> [   82.812590]  [] lock_acquire+0xc3/0x1d0
> [   82.812594]  [] ? drm_gem_mmap+0x1a1/0x270
> [   82.812599]  [] drm_gem_mmap+0x1c7/0x270
> [   82.812603]  [] ? drm_gem_mmap+0x1a1/0x270
> [   82.812608]  [] mmap_region+0x334/0x580
> [   82.812612]  [] do_mmap+0x364/0x410
> [   82.812616]  [] vm_mmap_pgoff+0x6d/0xa0
> [   82.812629]  [] SyS_mmap_pgoff+0x184/0x220
> [   82.812633]  [] SyS_mmap+0x1d/0x20
> [   82.812637]  [] entry_SYSCALL_64_fastpath+0x16/0x73
> 
> Highly unlikely though this scenario is, we can avoid the issue entirely
> by moving the copy operation from out under the mutex by always allocating
> a temporary buffer for the operation (rather than reuse the preallocated
> buf which requires the mutex for serialisation).
> 
> The locked section was extended by the addition of the preallocated buf
> to speed up md user operations in
> 
> commit 2b75869bba676c248d8d25ae6d2bd9221dfffdb6
> Author: NeilBrown <neilb at suse.de>
> Date:   Mon Oct 13 16:41:28 2014 +1100
> 
>     sysfs/kernfs: allow attributes to request write buffer be pre-allocated.
> 
> Reported-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

This looks pretty much like we discussed, so:

Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

> Ignore-Cc: Tejun Heo <tj at kernel.org>
> Ignore-Cc: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
> Ignore-Cc: NeilBrown <neilb at suse.de>
> ---
>  fs/kernfs/file.c       | 51 ++++++++++++++++++++++++++++----------------------
>  include/linux/kernfs.h |  1 +
>  2 files changed, 30 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 7247252ee9b1..e1574008adc9 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -190,15 +190,16 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
>  	char *buf;
>  
>  	buf = of->prealloc_buf;
> -	if (!buf)
> +	if (buf)
> +		mutex_lock(&of->prealloc_mutex);
> +	else
>  		buf = kmalloc(len, GFP_KERNEL);
>  	if (!buf)
>  		return -ENOMEM;
>  
>  	/*
>  	 * @of->mutex nests outside active ref and is used both to ensure that
> -	 * the ops aren't called concurrently for the same open file, and
> -	 * to provide exclusive access to ->prealloc_buf (when that exists).
> +	 * the ops aren't called concurrently for the same open file.
>  	 */
>  	mutex_lock(&of->mutex);
>  	if (!kernfs_get_active(of->kn)) {
> @@ -214,21 +215,23 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
>  	else
>  		len = -EINVAL;
>  
> +	kernfs_put_active(of->kn);
> +	mutex_unlock(&of->mutex);
> +
>  	if (len < 0)
> -		goto out_unlock;
> +		goto out_free;
>  
>  	if (copy_to_user(user_buf, buf, len)) {
>  		len = -EFAULT;
> -		goto out_unlock;
> +		goto out_free;
>  	}
>  
>  	*ppos += len;
>  
> - out_unlock:
> -	kernfs_put_active(of->kn);
> -	mutex_unlock(&of->mutex);
>   out_free:
> -	if (buf != of->prealloc_buf)
> +	if (buf == of->prealloc_buf)
> +		mutex_unlock(&of->prealloc_mutex);
> +	else
>  		kfree(buf);
>  	return len;
>  }
> @@ -284,15 +287,22 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
>  	}
>  
>  	buf = of->prealloc_buf;
> -	if (!buf)
> +	if (buf)
> +		mutex_lock(&of->prealloc_mutex);
> +	else
>  		buf = kmalloc(len + 1, GFP_KERNEL);
>  	if (!buf)
>  		return -ENOMEM;
>  
> +	if (copy_from_user(buf, user_buf, len)) {
> +		len = -EFAULT;
> +		goto out_free;
> +	}
> +	buf[len] = '\0';	/* guarantee string termination */
> +
>  	/*
>  	 * @of->mutex nests outside active ref and is used both to ensure that
> -	 * the ops aren't called concurrently for the same open file, and
> -	 * to provide exclusive access to ->prealloc_buf (when that exists).
> +	 * the ops aren't called concurrently for the same open file.
>  	 */
>  	mutex_lock(&of->mutex);
>  	if (!kernfs_get_active(of->kn)) {
> @@ -301,26 +311,22 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
>  		goto out_free;
>  	}
>  
> -	if (copy_from_user(buf, user_buf, len)) {
> -		len = -EFAULT;
> -		goto out_unlock;
> -	}
> -	buf[len] = '\0';	/* guarantee string termination */
> -
>  	ops = kernfs_ops(of->kn);
>  	if (ops->write)
>  		len = ops->write(of, buf, len, *ppos);
>  	else
>  		len = -EINVAL;
>  
> +	kernfs_put_active(of->kn);
> +	mutex_unlock(&of->mutex);
> +
>  	if (len > 0)
>  		*ppos += len;
>  
> -out_unlock:
> -	kernfs_put_active(of->kn);
> -	mutex_unlock(&of->mutex);
>  out_free:
> -	if (buf != of->prealloc_buf)
> +	if (buf == of->prealloc_buf)
> +		mutex_unlock(&of->prealloc_mutex);
> +	else
>  		kfree(buf);
>  	return len;
>  }
> @@ -687,6 +693,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
>  		error = -ENOMEM;
>  		if (!of->prealloc_buf)
>  			goto err_free;
> +		mutex_init(&of->prealloc_mutex);
>  	}
>  
>  	/*
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index af51df35d749..019ef3416900 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -177,6 +177,7 @@ struct kernfs_open_file {
>  
>  	/* private fields, do not use outside kernfs proper */
>  	struct mutex		mutex;
> +	struct mutex		prealloc_mutex;
>  	int			event;
>  	struct list_head	list;
>  	char			*prealloc_buf;
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


More information about the Intel-gfx mailing list