[systemd-devel] /proc and /sys get unmounted during boot from NFS, which results in boot error

Olaf Leidinger oleid at mescharet.de
Wed Feb 4 06:29:52 PST 2015


Hey!

> Maybe something from your initrd is acting weird?

As far as I can tell, nothing from the initrd is running. It's
basically ArchLinux's mkinitcpio run on gentoo - thus, pretty standard stuff.

> It might be interesting to strace PID 1 while you are doing this. And
> if that doesn't help strace the other processes you run, to see if any
> of them unmounts anything.


Good idea! Actually, I found out some more stuff.

1. It's not calling "ls /proc", it seems as if the auto-complete
   function of the running shell triggers the problem.

2. It's not restricted to auto-completing /proc, /home triggers it as
   well.

3. But it's not bash's fault, tcsh triggers it, too.

4. Auto-completing one level deeper, e.g. /usr/bin, doesn't trigger
   the unmount.


( note: I've two shells running: one started by the emergency shell and
  one started by the small dropbear server I manually started 
  for debugging purpose. Everything but dropbear is watched by strace.
  The problem, of course, also occurs without dropbear.)

$ strace -f -F -p 1 -p 179 -p191 -p177
Process 1 attached
Process 179 attached
Process 191 attached
Process 177 attached
[pid   177] wait4(-1,  <unfinished ...>
[pid   191] rt_sigsuspend([] <unfinished ...>
[pid   179] read(16,  <unfinished ...>
[pid     1] fstat(14, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
[pid     1] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7faf0bd52000
[pid     1] read(14, "strace\n", 1024)  = 7
[pid     1] close(14)                   = 0
[pid     1] munmap(0x7faf0bd52000, 4096) = 0
[pid     1] writev(3, [{"<31>", 4}, {"systemd", 7}, {"[1]: ", 5}, {"Child 202 (strace) died (code=ex"..., 55}, {"\n", 1}], 5) = 72
[pid     1] open("/proc/202/cgroup", O_RDONLY|O_CLOEXEC) = 14
[pid     1] fstat(14, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
[pid     1] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7faf0bd52000
[pid     1] read(14, "8:memory:/\n7:freezer:/\n6:devices"..., 1024) = 101
[pid     1] close(14)                   = 0
[pid     1] munmap(0x7faf0bd52000, 4096) = 0
[pid     1] waitid(P_PID, 202, {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=202, si_status=0, si_utime=0, si_stime=0}, WEXITED, NULL) = 0
[pid     1] waitid(P_ALL, 0, {}, WNOHANG|WEXITED|WNOWAIT, NULL) = 0
[pid     1] epoll_wait(4,  <unfinished ...>
[pid   179] <... read resumed> "\n", 1) = 1

[...]

( this was the only output from systemd, so I guess it's not guilty).


#### auto-completing /proc

[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "l", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, "s", 1)             = 1
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "s", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, " ", 1)             = 1
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, " ", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, "/", 1)             = 1
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "/", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, "p", 1)             = 1
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "p", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, "r", 1)             = 1
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "r", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, "o", 1)             = 1
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "o", 1)            = 1
[pid   179] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] read(0, "\t", 1)            = 1

### tab is pushed here
[pid   179] openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
[pid   179] getdents(3, /* 22 entries */, 32768) = 560
[pid   179] getdents(3, /* 0 entries */, 32768) = 0
[pid   179] close(3)                    = 0
[pid   179] stat("/proc", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "c/", 2)  



# after a reboot: autocomplete, that doesn't trigger
[pid   179] read(0, "\t", 1)            = 1
[pid   179] stat("/usr", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid   179] openat(AT_FDCWD, "/usr/", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
[pid   179] getdents(3, /* 16 entries */, 32768) = 464
[pid   179] getdents(3, /* 0 entries */, 32768) = 0
[pid   179] close(3)                    = 0
[pid   179] stat("/usr/bin", {st_mode=S_IFDIR|0755, st_size=110592, ...}) = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, [INT], [], 8) = 0
[pid   179] write(2, "n/", 2) 


############# tcsh ###########
# autocomplete, that *does* trigger the problem on *tcsh*

[pid   179] openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
[pid   179] lseek(3, 0, SEEK_SET)       = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] getdents(3, /* 22 entries */, 32768) = 560
[pid   179] getdents(3, /* 0 entries */, 32768) = 0
[pid   179] close(3)                    = 0

# autocomplete for tcsh, that doesn't trigger the problem

[pid   179] openat(AT_FDCWD, "/usr/", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
[pid   179] lseek(3, 0, SEEK_SET)       = 0
[pid   179] rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
[pid   179] getdents(3, /* 16 entries */, 32768) = 464
[pid   179] getdents(3, /* 0 entries */, 32768) = 0
[pid   179] close(3)                    = 0


So somehow these system calls must trigger the unmount. Probably some process which 
gets started by systemd performs similar system calls and thus spoils my boot.

I guess the next thing would be to try a different kernel, but that's going to be
off-topic.

Thanks for your suggestions, Lennart!


Best,

Olaf


More information about the systemd-devel mailing list