[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP
Peter Wu
peter at lekensteyn.nl
Tue Jul 12 20:18:35 UTC 2016
On Tue, Jul 12, 2016 at 09:16:22PM +0200, Lukas Wunner wrote:
> On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu wrote:
> > The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is called
> > while nouveau was runtime suspended, a deadlock would occur due to
> > nouveau_fbcon_set_suspend also trying to obtain console_lock().
> >
> > Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> > i915 code (which was done for performance reasons though).
> >
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> > Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> > ---
> > Tested on top of v4.7-rc5, the deadlock is gone.
>
> Hm, how did you trigger this deadlock?
>
> Thanks,
> Lukas
Here is a small Python script with hardcoded values:
#!/usr/bin/env python3
# see drivers/video/fbdev/core/fbmem.c ->
# drivers/video/console/fbcon.c for FB_EVENT_SET_CONSOLE_MAP
import array, fcntl
FBIOPUT_CON2FBMAP = 0x4610
console, framebuffer = 6, 1
with open("/dev/fb0") as f:
info = array.array('I', [console, framebuffer])
fcntl.ioctl(f, FBIOPUT_CON2FBMAP, info)
Ensure that the nouveau card is sleeping, then invoke:
python3 con2fbmap.py
If you check /proc/`pidof python3`/stack or the dmesg spew 120 seconds
later, you will see a trace like this on a kernel without this patch:
[ 60.738089] snd_hda_intel 0000:01:00.1: Disabling via vga_switcheroo
[ 60.739810] nouveau 0000:01:00.0: DRM: suspending console...
[ 60.740090] nouveau 0000:01:00.0: DRM: suspending display...
[ 60.740581] nouveau 0000:01:00.0: DRM: evicting buffers...
[ 60.740718] nouveau 0000:01:00.0: DRM: waiting for kernel channels to go idle...
[ 60.741096] nouveau 0000:01:00.0: DRM: suspending client object trees...
[ 60.748015] nouveau 0000:01:00.0: DRM: suspending kernel object tree...
[ 62.598156] nouveau 0000:01:00.0: power state changed by ACPI to D3cold
[ 66.883880] nouveau 0000:01:00.0: power state changed by ACPI to D0
[ 66.883987] nouveau 0000:01:00.0: restoring config space at offset 0x4 (was 0x100403, writing 0x100407)
[ 66.884017] nouveau 0000:01:00.0: calling nv_msi_ht_cap_quirk_leaf+0x0/0x30
[ 66.884032] nouveau 0000:01:00.0: DRM: resuming kernel object tree...
[ 66.995505] nouveau 0000:01:00.0: priv: GPC0: 419df4 00000000 (1f40820e)
[ 66.995512] nouveau 0000:01:00.0: priv: GPC1: 419df4 00000000 (1f40820e)
[ 67.014829] nouveau 0000:01:00.0: DRM: resuming client object trees...
[ 67.014905] nouveau 0000:01:00.0: DRM: resuming display...
[ 67.014962] nouveau 0000:01:00.0: DRM: resuming console...
[ 240.619840] INFO: task con2fb:482 blocked for more than 120 seconds.
[ 240.619844] Not tainted 4.7.0-rc1kasan-00011-g5c72d90 #2
[ 240.619845] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 240.619858] con2fb D ffff880769467378 25464 482 447 0x00000000
[ 240.619864] ffff880769467378 ffffffff845eb340 ffffffff83ed1708 00ff880769467330
[ 240.619868] ffff8807762a06e0 ffff8807762a0708 ffff88077629fdd8 ffff88077629fdc0
[ 240.619872] ffff880772bc6200 ffff88076f221880 ffff880769460000 ffffed00ed28c001
[ 240.619874] Call Trace:
[ 240.619881] [<ffffffff82e904c5>] schedule+0x95/0x1b0
[ 240.619885] [<ffffffff82e9b740>] schedule_timeout+0x3d0/0x8b0
[ 240.619889] [<ffffffff82e9b370>] ? usleep_range+0xe0/0xe0
[ 240.619894] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.619897] [<ffffffff811f0918>] ? mark_held_locks+0xc8/0x120
[ 240.619901] [<ffffffff82e9d0cc>] ? _raw_spin_unlock_irq+0x2c/0x30
[ 240.619904] [<ffffffff811f0d69>] ? trace_hardirqs_on_caller+0x3f9/0x580
[ 240.619907] [<ffffffff82e98bdf>] __down+0xff/0x1d0
[ 240.619911] [<ffffffff82e98ae0>] ? ww_mutex_unlock+0x270/0x270
[ 240.619925] [<ffffffff82225c32>] ? _dev_info+0xc2/0xf0
[ 240.619929] [<ffffffff811e56b3>] down+0x63/0x80
[ 240.619933] [<ffffffff8120796e>] console_lock+0x1e/0x70
[ 240.620012] [<ffffffffa1842d61>] nouveau_fbcon_set_suspend+0x71/0x390 [nouveau]
[ 240.620085] [<ffffffffa17f8a22>] nouveau_do_resume+0x2e2/0x380 [nouveau]
[ 240.620157] [<ffffffffa17f92de>] nouveau_pmops_runtime_resume+0xce/0x210 [nouveau]
[ 240.620163] [<ffffffff81c2be80>] ? pci_restore_standard_config+0x70/0x70
[ 240.620167] [<ffffffff81c2bfb0>] pci_pm_runtime_resume+0x130/0x220
[ 240.620171] [<ffffffff81c2be80>] ? pci_restore_standard_config+0x70/0x70
[ 240.620175] [<ffffffff82249d12>] __rpm_callback+0x62/0xe0
[ 240.620179] [<ffffffff81c2be80>] ? pci_restore_standard_config+0x70/0x70
[ 240.620182] [<ffffffff82249ef8>] rpm_callback+0x168/0x210
[ 240.620186] [<ffffffff81c2be80>] ? pci_restore_standard_config+0x70/0x70
[ 240.620189] [<ffffffff8224b6b3>] rpm_resume+0xbc3/0x1880
[ 240.620193] [<ffffffff8224aaf0>] ? pm_runtime_autosuspend_expiration+0x60/0x60
[ 240.620196] [<ffffffff8224f11a>] ? __pm_runtime_resume+0x6a/0xa0
[ 240.620200] [<ffffffff8224f128>] __pm_runtime_resume+0x78/0xa0
[ 240.620270] [<ffffffffa1840ad0>] nouveau_fbcon_open+0xd0/0x120 [nouveau]
[ 240.620274] [<ffffffff81c93577>] con2fb_acquire_newinfo+0xc7/0x2c0
[ 240.620277] [<ffffffff81c95e18>] set_con2fb_map+0x728/0xcb0
[ 240.620281] [<ffffffff81c96e4c>] fbcon_event_notify+0xaac/0x1f90
[ 240.620285] [<ffffffff81162cc9>] notifier_call_chain+0xc9/0x130
[ 240.620288] [<ffffffff81163110>] __blocking_notifier_call_chain+0x70/0xb0
[ 240.620292] [<ffffffff81163166>] blocking_notifier_call_chain+0x16/0x20
[ 240.620295] [<ffffffff81ca0c8b>] fb_notifier_call_chain+0x1b/0x20
[ 240.620298] [<ffffffff81ca939a>] do_fb_ioctl+0x93a/0xa80
[ 240.620301] [<ffffffff81513377>] ? mntput+0x57/0x70
[ 240.620305] [<ffffffff81ca8a60>] ? fb_read+0x5f0/0x5f0
[ 240.620309] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.620312] [<ffffffff811f2475>] ? __lock_acquire+0x1055/0x2ed0
[ 240.620316] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.620319] [<ffffffff811f2475>] ? __lock_acquire+0x1055/0x2ed0
[ 240.620323] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.620327] [<ffffffff811f1420>] ? debug_check_no_locks_freed+0x280/0x280
[ 240.620331] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.620335] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.620340] [<ffffffff8149714d>] ? cmpxchg_double_slab.isra.54+0x10d/0x130
[ 240.620344] [<ffffffff82e9d106>] ? _raw_spin_unlock_irqrestore+0x36/0x50
[ 240.620347] [<ffffffff811f0d69>] ? trace_hardirqs_on_caller+0x3f9/0x580
[ 240.620351] [<ffffffff81ca95ac>] fb_ioctl+0xcc/0x140
[ 240.620355] [<ffffffff814e9372>] do_vfs_ioctl+0x192/0x1000
[ 240.620359] [<ffffffff814e0191>] ? putname+0xc1/0xf0
[ 240.620362] [<ffffffff814e91e0>] ? ioctl_preallocate+0x1e0/0x1e0
[ 240.620365] [<ffffffff814e0191>] ? putname+0xc1/0xf0
[ 240.620369] [<ffffffff81226899>] ? rcu_read_lock_sched_held+0xe9/0x110
[ 240.620373] [<ffffffff81498ffe>] ? kmem_cache_free+0x1fe/0x280
[ 240.620376] [<ffffffff814e0191>] ? putname+0xc1/0xf0
[ 240.620380] [<ffffffff814abbbd>] ? do_sys_open+0x25d/0x340
[ 240.620384] [<ffffffff82e9d692>] ? entry_SYSCALL_64_fastpath+0x5/0xa8
[ 240.620387] [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[ 240.620390] [<ffffffff81509839>] ? __fget_light+0x139/0x200
[ 240.620393] [<ffffffff814ea259>] SyS_ioctl+0x79/0x90
[ 240.620397] [<ffffffff82e9d6a5>] entry_SYSCALL_64_fastpath+0x18/0xa8
[ 240.620401] 3 locks held by con2fb/482:
[ 240.620409] #0: (console_lock){+.+.+.}, at: [<ffffffff81ca9376>] do_fb_ioctl+0x916/0xa80
[ 240.620416] #1: (&fb_info->lock){+.+.+.}, at: [<ffffffff81ca1d5d>] lock_fb_info+0x1d/0x70
[ 240.620423] #2: ((fb_notifier_list).rwsem){.+.+.+}, at: [<ffffffff811630fb>] __blocking_notifier_call_chain+0x5b/0xb0
Peter
> > ---
> > drivers/gpu/drm/nouveau/nouveau_drm.c | 4 +--
> > drivers/gpu/drm/nouveau/nouveau_drv.h | 1 +
> > drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54 ++++++++++++++++++++++++++++-----
> > drivers/gpu/drm/nouveau/nouveau_fbcon.h | 2 +-
> > 4 files changed, 50 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > index 11f8dd9..f9a2c10 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
> >
> > if (dev->mode_config.num_crtc) {
> > NV_INFO(drm, "suspending console...\n");
> > - nouveau_fbcon_set_suspend(dev, 1);
> > + nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
> > NV_INFO(drm, "suspending display...\n");
> > ret = nouveau_display_suspend(dev, runtime);
> > if (ret)
> > @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
> > NV_INFO(drm, "resuming display...\n");
> > nouveau_display_resume(dev, runtime);
> > NV_INFO(drm, "resuming console...\n");
> > - nouveau_fbcon_set_suspend(dev, 0);
> > + nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
> > }
> >
> > return 0;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > index 822a021..a743d19 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > @@ -147,6 +147,7 @@ struct nouveau_drm {
> > struct nouveau_channel *channel;
> > struct nvkm_gpuobj *notify;
> > struct nouveau_fbdev *fbcon;
> > + struct work_struct fbdev_suspend_work;
> > struct nvif_object nvsw;
> > struct nvif_object ntfy;
> > struct nvif_notify flip;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > index d1f248f..089156a 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
> > .fb_probe = nouveau_fbcon_create,
> > };
> >
> > +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> > +{
> > + nouveau_fbcon_set_suspend(container_of(work,
> > + struct nouveau_drm,
> > + fbdev_suspend_work)->dev,
> > + FBINFO_STATE_RUNNING,
> > + true);
> > +}
> > +
> > void
> > -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> > +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool synchronous)
> > {
> > struct nouveau_drm *drm = nouveau_drm(dev);
> > - if (drm->fbcon) {
> > - console_lock();
> > - if (state == FBINFO_STATE_RUNNING)
> > - nouveau_fbcon_accel_restore(dev);
> > - drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > + if (!drm->fbcon)
> > + return;
> > +
> > + if (synchronous) {
> > + /* Flush any pending work to turn the console on, and then
> > + * wait to turn it off. It must be synchronous as we are
> > + * about to suspend or unload the driver.
> > + *
> > + * Note that from within the work-handler, we cannot flush
> > + * ourselves, so only flush outstanding work upon suspend!
> > + */
> > if (state != FBINFO_STATE_RUNNING)
> > - nouveau_fbcon_accel_save_disable(dev);
> > - console_unlock();
> > + flush_work(&drm->fbdev_suspend_work);
> > + console_lock();
> > + } else {
> > + /*
> > + * The console lock can be pretty contented on resume due
> > + * to all the printk activity. Try to keep it out of the hot
> > + * path of resume if possible. This also prevents a deadlock
> > + * with FBIOPUT_CON2FBMAP.
> > + */
> > + WARN_ON(state != FBINFO_STATE_RUNNING);
> > + if (!console_trylock()) {
> > + schedule_work(&drm->fbdev_suspend_work);
> > + return;
> > + }
> > }
> > +
> > + if (state == FBINFO_STATE_RUNNING)
> > + nouveau_fbcon_accel_restore(dev);
> > + drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > + if (state != FBINFO_STATE_RUNNING)
> > + nouveau_fbcon_accel_save_disable(dev);
> > + console_unlock();
> > }
> >
> > int
> > @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
> > fbcon->dev = dev;
> > drm->fbcon = fbcon;
> >
> > + INIT_WORK(&drm->fbdev_suspend_work, nouveau_fbcon_suspend_worker);
> > +
> > drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
> >
> > ret = drm_fb_helper_init(dev, &fbcon->helper,
> > @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
> > if (!drm->fbcon)
> > return;
> >
> > + flush_work(&drm->fbdev_suspend_work);
> > +
> > nouveau_fbcon_accel_fini(dev);
> > nouveau_fbcon_destroy(dev, drm->fbcon);
> > kfree(drm->fbcon);
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > index ca77ad0..34b2504 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > @@ -66,7 +66,7 @@ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
> >
> > int nouveau_fbcon_init(struct drm_device *dev);
> > void nouveau_fbcon_fini(struct drm_device *dev);
> > -void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
> > +void nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool synchronous);
> > void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
> > void nouveau_fbcon_accel_restore(struct drm_device *dev);
> >
> > --
> > 2.8.3
> >
> > _______________________________________________
> > Nouveau mailing list
> > Nouveau at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/nouveau
More information about the dri-devel
mailing list