ast: cursor flashing softlockups
David Daney
ddaney at caviumnetworks.com
Tue May 17 17:39:15 UTC 2016
I can confirm this. The cursor is blinking along nicely with a 200mS
on/off time then with this patch installed:
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 6e92917..7855446 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -402,6 +402,8 @@ static void cursor_timer_handler(unsigned long dev_addr)
struct fbcon_ops *ops = info->fbcon_par;
queue_work(system_power_efficient_wq, &info->queue);
+ if (WARN_ON(ops->cur_blink_jiffies < 10))
+ ops->cur_blink_jiffies = 200;
mod_timer(&ops->cursor_timer, jiffies + ops->cur_blink_jiffies);
}
@@ -417,6 +419,8 @@ static void fbcon_add_cursor_timer(struct fb_info *info)
init_timer(&ops->cursor_timer);
ops->cursor_timer.function = cursor_timer_handler;
+ if (WARN_ON(ops->cur_blink_jiffies < 10))
+ ops->cur_blink_jiffies = 200;
ops->cursor_timer.expires = jiffies + ops->cur_blink_jiffies;
ops->cursor_timer.data = (unsigned long ) info;
add_timer(&ops->cursor_timer);
@@ -1096,6 +1100,7 @@ static void fbcon_init(struct vc_data *vc, int init)
ops = info->fbcon_par;
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
+ WARN_ON(ops->cur_blink_jiffies < 10);
p->con_rotate = initial_rotation;
set_blitting_type(vc, info);
@@ -1310,6 +1315,7 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
int c = scr_readw((u16 *) vc->vc_pos);
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
+ WARN_ON(ops->cur_blink_jiffies < 10);
if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1)
return;
-------------------------------------------------------------
I get:
[ 29.386066] ------------[ cut here ]------------
[ 29.386080] WARNING: CPU: 0 PID: 1688 at
drivers/video/console/fbcon.c:1103 fbcon_init+0x47c/0x4b8
[ 29.386145] Modules linked in: vfat(E) fat(E) aes_ce_blk(E)
ablk_helper(E) cryptd(E) aes_ce_cipher(E) ghash_ce(E) sha2_ce(E)
sha1_ce(E) sg(E) ip_tables(E) xfs(E) libcrc32c(E) nicvf(E) ast(E)
i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E)
sysimgblt(E) fb_sys_fops(E) ttm(E) drm(E) i2c_core(E) nicpf(E)
thunder_bgx(E) mdio_thunder(E) mdio_cavium(E) dm_mirror(E)
dm_region_hash(E) dm_log(E) dm_mod(E)
[ 29.386147]
[ 29.386153] CPU: 0 PID: 1688 Comm: systemd-logind Tainted: G
E 4.6.0-rc3-arm64next+ #278
[ 29.386155] Hardware name: www.cavium.com ThunderX CRB-2S/ThunderX
CRB-2S, BIOS 0.3 Apr 25 2016
[ 29.386159] task: fffffe0fd1109d80 ti: fffffe0fd119c000 task.ti:
fffffe0fd119c000
[ 29.386163] PC is at fbcon_init+0x47c/0x4b8
[ 29.386167] LR is at fbcon_init+0x144/0x4b8
[ 29.386170] pc : [<fffffc0008468930>] lr : [<fffffc00084685f8>]
pstate: 80000145
[ 29.386172] sp : fffffe0fd119f900
[ 29.386179] x29: fffffe0fd119f900 x28: fffffe0fdc059c00
[ 29.386186] x27: fffffe001c7c8800 x26: fffffe0fd6027910
[ 29.386192] x25: fffffe0fd6027800 x24: fffffc0009b56f98
[ 29.386198] x23: 0000000000000001 x22: fffffc0008d1a000
[ 29.386205] x21: fffffe0ff4062000 x20: fffffc0009b57018
[ 29.386211] x19: fffffc0009b56000 x18: 000002aae7bd51f0
[ 29.386222] x17: 000003ffb542f9f0 x16: fffffc0008242b80
[ 29.386228] x15: 000002aae7bdb328 x14: 0000000000000006
[ 29.386234] x13: 000002aae7bc72c8 x12: 0000000000000000
[ 29.386240] x11: 0000000000000000 x10: 0000000000000000
[ 29.386247] x9 : 0000000000000000 x8 : fffffe001c7c8b18
[ 29.386253] x7 : 0000000000000000 x6 : 000000000000007f
[ 29.386259] x5 : fffffc0008c91640 x4 : 0000000000000000
[ 29.386266] x3 : 0000000000000800 x2 : fffffe001c7c8ce8
[ 29.386272] x1 : fffffe0fd4df4e00 x0 : 0000000000000000
[ 29.386274]
[ 29.386277] ---[ end trace 6c32ddc01008c9ba ]---
[ 29.386280] Call trace:
[ 29.386284] Exception stack(0xfffffe0fd119f730 to 0xfffffe0fd119f850)
[ 29.386288] f720: fffffc0009b56000
fffffc0009b57018
[ 29.386292] f740: fffffe0fd119f900 fffffc0008468930 0000000080000145
000000000000003d
[ 29.386296] f760: fffffc0008c36ce8 fffffc00081c2a30 fffffe0fd119f7f0
fffffc00080f47d8
[ 29.386299] f780: fffffe0fd119f800 fffffc00080f47d8 000000000000010c
fffffe0fd1109d80
[ 29.386303] f7a0: 0000000000000000 000000000025810c fffffe0fd110a5a0
0000000000000000
[ 29.386307] f7c0: fffffc000880d2e8 0000000000000000 fffffe0fd119f850
fffffc00080f47d8
[ 29.386311] f7e0: 0000000000000000 fffffe0fd4df4e00 fffffe001c7c8ce8
0000000000000800
[ 29.386314] f800: 0000000000000000 fffffc0008c91640 000000000000007f
0000000000000000
[ 29.386318] f820: fffffe001c7c8b18 0000000000000000 0000000000000000
0000000000000000
[ 29.386321] f840: 0000000000000000 000002aae7bc72c8
[ 29.386327] [<fffffc0008468930>] fbcon_init+0x47c/0x4b8
[ 29.386332] [<fffffc00084d6e3c>] visual_init+0xbc/0x114
[ 29.386336] [<fffffc00084d9938>] vc_allocate+0x108/0x1e0
[ 29.386340] [<fffffc00084d9a44>] con_install+0x34/0x100
[ 29.386347] [<fffffc00084c2f44>] tty_init_dev+0x74/0x1a8
[ 29.386351] [<fffffc00084c3560>] tty_open+0x4e8/0x5a8
[ 29.386357] [<fffffc0008248a78>] chrdev_open+0xa8/0x1ac
[ 29.386361] [<fffffc0008241288>] do_dentry_open+0x1e8/0x300
[ 29.386364] [<fffffc0008242694>] vfs_open+0x6c/0x7c
[ 29.386370] [<fffffc0008252864>] do_last+0x12c/0xc24
[ 29.386374] [<fffffc00082533d8>] path_openat+0x7c/0x2b8
[ 29.386377] [<fffffc00082546d4>] do_filp_open+0x78/0xd4
[ 29.386381] [<fffffc0008242a74>] do_sys_open+0x150/0x214
[ 29.386384] [<fffffc0008242bbc>] SyS_openat+0x3c/0x48
[ 29.386389] [<fffffc000808538c>] __sys_trace_return+0x0/0x4
.
.
.
[ OK ] Started Command Scheduler.
Starting Command Scheduler...
[ 33.395141] ------------[ cut here ]------------
[ 33.399773] WARNING: CPU: 0 PID: 0 at
drivers/video/console/fbcon.c:405 cursor_timer_handler+0x5c/0x6c
[ 33.409072] Modules linked in: ip6t_REJECT(E) nf_reject_ipv6(E)
ipt_REJECT(E) nf_reject_ipv4(E) xt_conntrack(E) ebtable_nat(E)
ebtable_broute(E) bridge(E) stp(E) llc(E) ebtable_filter(E) ebtables(E)
ip6table_nat(E) nf_conntrack_ipv6(E) nf_defrag_ipv6(E) nf_nat_ipv6(E)
ip6table_mangle(E) ip6table_security(E) ip6table_raw(E)
ip6table_filter(E) ip6_tables(E) iptable_nat(E) nf_conntrack_ipv4(E)
nf_defrag_ipv4(E) nf_nat_ipv4(E) nf_nat(E) nf_conntrack(E)
iptable_mangle(E) iptable_security(E) iptable_raw(E) iptable_filter(E)
vfat(E) fat(E) aes_ce_blk(E) ablk_helper(E) cryptd(E) aes_ce_cipher(E)
ghash_ce(E) sha2_ce(E) sha1_ce(E) sg(E) ip_tables(E) xfs(E) libcrc32c(E)
nicvf(E) ast(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E)
sysfillrect(E) sysimgblt(E) fb_sys_fops(E) ttm(E) drm(E) i2c_core(E)
nicpf(E) thunder_bgx(E) mdio_thunder(E) mdio_cavium(E) dm_mirror(E)
dm_region_hash(E) dm_log(E) dm_mod(E)
[ 33.488864]
[ 33.490358] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W E
4.6.0-rc3-arm64next+ #278
[ 33.498877] Hardware name: www.cavium.com ThunderX CRB-2S/ThunderX
CRB-2S, BIOS 0.3 Apr 25 2016
[ 33.507571] task: fffffc0008c79c80 ti: fffffc0008c50000 task.ti:
fffffc0008c50000
[ 33.515053] PC is at cursor_timer_handler+0x5c/0x6c
[ 33.519930] LR is at cursor_timer_handler+0x30/0x6c
[ 33.524804] pc : [<fffffc0008467490>] lr : [<fffffc0008467464>]
pstate: 80000145
[ 33.532198] sp : ffffff0ff9dc3da0
[ 33.535513] x29: ffffff0ff9dc3da0 x28: fffffc0008c70be8
[ 33.540836] x27: fffffc0008c70000 x26: fffffc0008d68fb3
[ 33.546158] x25: fffffc0008c70000 x24: fffffc0008d6a000
[ 33.551483] x23: fffffe0fd6027800 x22: fffffc0008467434
[ 33.556809] x21: 0000000000000101 x20: fffffe0fd6027800
[ 33.562138] x19: fffffe0fdc059c00 x18: 000002aadcf70050
[ 33.567464] x17: 000003ff95cfa228 x16: fffffc000829628c
[ 33.572786] x15: 00000000000000dd x14: ffffffc2c3bfa000
[ 33.578114] x13: 003d093352dc0000 x12: 00000000003d0900
[ 33.583440] x11: 0000000000000000 x10: 00003d09334ec400
[ 33.588765] x9 : fffffc0008c50000 x8 : fffffc0008851320
[ 33.594090] x7 : 00000000ad583ee9 x6 : 0000000000000001
[ 33.599412] x5 : fffffc0008c79c80 x4 : fffffc0009012c28
[ 33.604743] x3 : 0000000000000004 x2 : 0000000000000002
[ 33.610074] x1 : 0000000000000000 x0 : 0000000000000001
[ 33.615395]
[ 33.616886] ---[ end trace 6c32ddc01008c9c3 ]---
[ 33.621506] Call trace:
[ 33.623951] Exception stack(0xffffff0ff9dc3bd0 to 0xffffff0ff9dc3cf0)
[ 33.630390] 3bc0: fffffe0fdc059c00
fffffe0fd6027800
[ 33.638221] 3be0: ffffff0ff9dc3da0 fffffc0008467490 0000000080000145
000000000000003d
[ 33.646050] 3c00: fffffe0fd46b5700 ffffff0ff9dd8f00 fffffc0008d69de0
0000000000000000
[ 33.653882] 3c20: fffffc0008c36ce8 fffffc0008c24b28 fffffc0008c70be8
fffffc0008da5b48
[ 33.661708] 3c40: ffffff0ff9dc3cb0 fffffc0008814614 ffffff0ff9dd8f00
fffffc0008d68ea4
[ 33.669535] 3c60: fffffc0008d69de0 0000000000000000 fffffc0008c36ce8
fffffc0008c24b28
[ 33.677361] 3c80: 0000000000000001 0000000000000000 0000000000000002
0000000000000004
[ 33.685191] 3ca0: fffffc0009012c28 fffffc0008c79c80 0000000000000001
00000000ad583ee9
[ 33.693017] 3cc0: fffffc0008851320 fffffc0008c50000 00003d09334ec400
0000000000000000
[ 33.700845] 3ce0: 00000000003d0900 003d093352dc0000
[ 33.705726] [<fffffc0008467490>] cursor_timer_handler+0x5c/0x6c
[ 33.711648] [<fffffc0008117bd8>] call_timer_fn+0xc0/0x3e4
[ 33.717047] [<fffffc0008118114>] run_timer_softirq+0x1ec/0x39c
[ 33.722879] [<fffffc0008081794>] __do_softirq+0x104/0x598
[ 33.728278] [<fffffc00080a2ca4>] irq_exit+0x9c/0xc0
[ 33.733155] [<fffffc0008102384>] __handle_domain_irq+0x98/0xfc
[ 33.739009] [<fffffc0008081594>] gic_handle_irq+0x94/0x190
[ 33.744492] Exception stack(0xffffff0ff9dc3fc0 to 0xffffff0ff9dc40e0)
[ 33.750930] 3fc0: fffffc0008c53da0 fffffc0008d6a000 fffffc0008c53ec0
fffffc00080860c0
[ 33.758756] 3fe0: 0000000060000145 fffffc0008a33c98 ffffff0ff9dc0020
0000000000000000
[ 33.766582] 4000: fffffc0008c53ec0 fffffc0008c53da0 0000000000000000
0000000000000000
[ 33.774408] 4020: fffffe0fee5412d0 0000000000000000 0000000000000000
0000000000000000
[ 33.782234] 4040: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 33.790060] 4060: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 33.797887] 4080: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 33.805715] 40a0: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 33.813547] 40c0: 0000000000000000 0000000000000000 0000000000000000
0000000000000000
[ 33.821374] [<fffffc0008084de4>] el1_irq+0xa4/0x114
[ 33.826251] [<fffffc00080860c0>] arch_cpu_idle+0x40/0x1fc
[ 33.831651] [<fffffc00080e9970>] default_idle_call+0x20/0x34
[ 33.837310] [<fffffc00080e9ae4>] cpu_startup_entry+0x160/0x314
[ 33.843142] [<fffffc000880d968>] rest_init+0x150/0x160
[ 33.848283] [<fffffc0008b70b90>] start_kernel+0x3b8/0x3cc
[ 33.853681] [<fffffc0008b701c4>] __primary_switched+0x30/0x6c
On 05/17/2016 07:29 AM, Peter Hurley wrote:
> [ +to Scot Doyle ]
>
> Scot, please take a look at this soft lockup.
>
> Regards,
> Peter Hurley
>
>
> Hi Ming,
>
> On 05/17/2016 02:12 AM, Ming Lei wrote:
>> Hi,
>>
>> On Tue, May 17, 2016 at 4:07 AM, Dann Frazier
>> <dann.frazier at canonical.com> wrote:
>>> Hi,
>>> I'm observing a soft lockup issue w/ the ASPEED controller on an
>>> arm64 server platform. This was originally seen on Ubuntu's 4.4
>>> kernel, but it is reproducible w/ vanilla 4.6-rc7 as well.
>>>
>>> [ 32.792656] NMI watchdog: BUG: soft lockup - CPU#38 stuck for 22s!
>>> [swapper/38:0]
>>>
>>> I observe this just once each time I boot into debian-installer (I'm
>>> using a serial console, but the ast module gets loaded during
>>> startup).
>>
>> I have figured out that it is caused by 'mod_timer(timer, jiffies)' and
>> 'ops->cur_blink_jiffies' is observed as zero in cursor_timer_handler()
>> when the issue happened.
>
> Thanks for tracking this down.
>
> This softlockup looks to be caused by:
>
> commit 27a4c827c34ac4256a190cc9d24607f953c1c459
> Author: Scot Doyle <lkml14 at scotdoyle.com>
> Date: Thu Mar 26 13:56:38 2015 +0000
>
> fbcon: use the cursor blink interval provided by vt
>
> vt now provides a cursor blink interval via vc_data. Use this
> interval instead of the currently hardcoded 200 msecs. Store it in
> fbcon_ops to avoid locking the console in cursor_timer_handler().
>
> Signed-off-by: Scot Doyle <lkml14 at scotdoyle.com>
> Acked-by: Pavel Machek <pavel at ucw.cz>
> Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
>
> and
>
> commit bd63364caa8df38bad2b25b11b2a1b849475cce5
> Author: Scot Doyle <lkml14 at scotdoyle.com>
> Date: Thu Mar 26 13:54:39 2015 +0000
>
> vt: add cursor blink interval escape sequence
>
> Add an escape sequence to specify the current console's cursor blink
> interval. The interval is specified as a number of milliseconds until
> the next cursor display state toggle, from 50 to 65535. /proc/loadavg
> did not show a difference with a one msec interval, but the lower
> bound is set to 50 msecs since slower hardware wasn't tested.
>
> Store the interval in the vc_data structure for later access by fbcon,
> initializing the value to fbcon's current hardcoded value of 200 msecs.
>
> Signed-off-by: Scot Doyle <lkml14 at scotdoyle.com>
> Acked-by: Pavel Machek <pavel at ucw.cz>
> Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
>
>
>
>> Looks it is a real fbcon/vt issue, see following:
>>
>> fbcon_init()
>> <-.con_init
>> <-visual_init()
>>
>> reset_terminal()
>> <-vc_init()
>>
>> vc->vc_cur_blink_ms is just set in reset_terminal() from vc_init() path,
>> and ops->cur_blink_jiffies is figured out from vc->vc_cur_blink_ms
>> in fbcon_init().
>>
>> And visual_init() is always run before vc_init(), so ops->cur_blink_jiffies
>> is initialized as zero and cause the soft lockup issue finally.
>>
>> Thanks,
>> Ming
>>
>>>
>>> perf shows that the CPU caught by the NMI is typically in code
>>> updating the cursor timer:
>>>
>>> - 16.92% swapper [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore
>>> - _raw_spin_unlock_irqrestore
>>> + 16.87% mod_timer
>>> + 0.05% cursor_timer_handler
>>> - 12.15% swapper [kernel.kallsyms] [k] queue_work_on
>>> - queue_work_on
>>> + 12.00% cursor_timer_handler
>>> + 0.15% call_timer_fn
>>> + 10.98% swapper [kernel.kallsyms] [k] run_timer_softirq
>>> - 2.23% swapper [kernel.kallsyms] [k] mod_timer
>>> - mod_timer
>>> + 1.97% cursor_timer_handler
>>> + 0.26% call_timer_fn
>>>
>>> During the same period, I can see that another CPU is actively
>>> executing the timer function:
>>>
>>> - 42.18% kworker/u96:2 [kernel.kallsyms] [k] ww_mutex_unlock
>>> - ww_mutex_unlock
>>> - 40.70% ast_dirty_update
>>> ast_imageblit
>>> soft_cursor
>>> bit_cursor
>>> fb_flashcursor
>>> process_one_work
>>> worker_thread
>>> kthread
>>> ret_from_fork
>>> + 1.48% ast_imageblit
>>> - 40.15% kworker/u96:2 [kernel.kallsyms] [k] __memcpy_toio
>>> - __memcpy_toio
>>> + 31.54% ast_dirty_update
>>> + 8.61% ast_imageblit
>>>
>>> Using the graph function tracer on fb_flashcursor(), I see that
>>> ast_dirty_update usually takes around 60 us, in which it makes 16
>>> calls to __memcpy_toio(). However, there is always one instance on
>>> every boot of the installer where ast_dirty_update() takes ~98 *ms* to
>>> complete, during which it makes 743 calls to __memcpy_toio(). While
>>> that doesn't directly account for the full 22s, I do wonder if that
>>> maybe a smoking gun.
>>>
>>> fyi, this is being tracked at: https://bugs.launchpad.net/bugs/1574814
>>>
>>> -dann
>
More information about the dri-devel
mailing list