[systemd-devel] [PATCH v2] systemd-bootchart: Prevent closing random file descriptors
Alexander Sverdlin
alexander.sverdlin at gmail.com
Sun Mar 29 11:41:56 PDT 2015
If the kernel has no CONFIG_SCHED_DEBUG option set, systemd-bootchart produces
empty .svg file. The reason for this is very fragile file descriptor logic in
log_sample() and main() (/* do some cleanup, close fd's */ block). There are
many places where file descriptors are closed on failure (missing SCHED_DEBUG
provokes it), but there are several problems with it:
- following iterations in the loop see that the descriptor is non zero and do
not open the corresponding file again;
- "some cleanup" code closes already closed files and the descriptors are reused
already, in particular for resulting .svg file;
- static "vmstat" and "schedstat" variables in log_sample() made the situation
even worse.
These are the strace fragments:
[...]
close(7) = -1 EBADF (Bad file descriptor)
close(-1) = -1 EBADF (Bad file descriptor)
pread(7, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor)
close(7) = -1 EBADF (Bad file descriptor)
close(-1) = -1 EBADF (Bad file descriptor)
pread(7, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor)
close(7) = -1 EBADF (Bad file descriptor)
close(-1) = -1 EBADF (Bad file descriptor)
getdents64(4, /* 0 entries */, 32768) = 0
clock_gettime(CLOCK_MONOTONIC, {24, 783843501}) = 0
nanosleep({0, 5221792}, NULL) = 0
clock_gettime(CLOCK_MONOTONIC, {24, 789726835}) = 0
lseek(4, 0, SEEK_SET) = 0
pread(5, "nr_free_pages 52309\nnr_alloc_bat"..., 4095, 0) = 685
pread(6, "version 15\ntimestamp 4294939775\n"..., 4095, 0) = 86
getdents64(4, /* 99 entries */, 32768) = 2680
pread(7, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor)
close(7) = -1 EBADF (Bad file descriptor)
close(-1) = -1 EBADF (Bad file descriptor)
pread(8, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor)
close(8) = -1 EBADF (Bad file descriptor)
close(-1) = -1 EBADF (Bad file descriptor)
pread(9, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor)
close(9) = -1 EBADF (Bad file descriptor)
[...]
where it obviously tries to close same and reused decriptors many times, also
passing return code "-1" instead of descriptor...
[...]
close(7) = -1 EBADF (Bad file descriptor)
close(-1) = -1 EBADF (Bad file descriptor)
pipe2([7, 8], O_CLOEXEC) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb6fd0068) = 192
close(8) = 0
fcntl64(7, F_SETFD, 0) = 0
fstat64(7, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb6fd2000
read(7, "[ 0.074507] calling vfp_init"..., 4096) = 4096
[...]
read(7, "s)\n[ 6.228910] UBIFS: reserve"..., 4096) = 4096
read(7, "trary Executable File Formats Fi"..., 4096) = 1616
read(7, "", 4096) = 0
close(7) = 0
wait4(192, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 192
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=192, si_uid=0, si_status=0, si_utime=9, si_stime=9} ---
munmap(0xb6fd2000, 4096) = 0
fstat64(7, 0xbea62850) = -1 EBADF (Bad file descriptor)
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb6fce000
write(7, "<?xml version=\"1.0\" standalone=\""..., 38) = -1 EBADF (Bad file descriptor)
write(7, "<!DOCTYPE svg PUBLIC \"-//W3C//DT"..., 47) = -1 EBADF (Bad file descriptor)
write(7, "\"http://www.w3.org/Graphics/SVG/"..., 52) = -1 EBADF (Bad file descriptor)
[...]
here .svg file had descriptor 7, but was closed by log_sample(), then even reused
to read /proc entry and finally systemd-bootchart even tries to write .svg file, but
descriptor is long time lost already.
Fix it by ensuring that the variables are always initialized to -1 and set again
to -1 after close(). Change the logic for opening files, because "0" is also a
valid file descriptor.
The fix is important even with CONFIG_SCHED_DEBUG option enabled, because very
first failure to open /proc/<pid>/* if process exited will result in some other
victim descriptor being closed later and will therefore disturb the whole
collected statistics.
---
Changes in v2:
- "closed" state of file descriptors changed to "-1" instead of "0", as proposed by
Daniel Mack
- Thunderbird repaired and doesn't brake patches any more
src/bootchart/bootchart.c | 4 ++--
src/bootchart/store.c | 27 +++++++++++++++++++--------
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/src/bootchart/bootchart.c b/src/bootchart/bootchart.c
index 71dffc9..f50479d 100644
--- a/src/bootchart/bootchart.c
+++ b/src/bootchart/bootchart.c
@@ -448,9 +448,9 @@ int main(int argc, char *argv[]) {
ps = ps_first;
while (ps->next_ps) {
ps = ps->next_ps;
- if (ps->schedstat)
+ if (ps->schedstat >= 0)
close(ps->schedstat);
- if (ps->sched)
+ if (ps->sched >= 0)
close(ps->sched);
if (ps->smaps)
fclose(ps->smaps);
diff --git a/src/bootchart/store.c b/src/bootchart/store.c
index dfa681f..3900936 100644
--- a/src/bootchart/store.c
+++ b/src/bootchart/store.c
@@ -108,8 +108,8 @@ static int pid_cmdline_strscpy(char *buffer, size_t buf_len, int pid) {
}
void log_sample(int sample, struct list_sample_data **ptr) {
- static int vmstat;
- static int schedstat;
+ static int vmstat = -1;
+ static int schedstat = -1;
char buf[4096];
char key[256];
char val[256];
@@ -140,7 +140,7 @@ void log_sample(int sample, struct list_sample_data **ptr) {
rewinddir(proc);
}
- if (!vmstat) {
+ if (vmstat < 0) {
/* block stuff */
vmstat = openat(procfd, "vmstat", O_RDONLY);
if (vmstat == -1) {
@@ -152,6 +152,7 @@ void log_sample(int sample, struct list_sample_data **ptr) {
n = pread(vmstat, buf, sizeof(buf) - 1, 0);
if (n <= 0) {
close(vmstat);
+ vmstat = -1;
return;
}
buf[n] = '\0';
@@ -172,7 +173,7 @@ vmstat_next:
break;
}
- if (!schedstat) {
+ if (schedstat < 0) {
/* overall CPU utilization */
schedstat = openat(procfd, "schedstat", O_RDONLY);
if (schedstat == -1) {
@@ -184,6 +185,7 @@ vmstat_next:
n = pread(schedstat, buf, sizeof(buf) - 1, 0);
if (n <= 0) {
close(schedstat);
+ schedstat = -1;
return;
}
buf[n] = '\0';
@@ -260,6 +262,8 @@ schedstat_next:
}
ps = ps->next_ps;
ps->pid = pid;
+ ps->sched = -1;
+ ps->schedstat = -1;
ps->sample = new0(struct ps_sched_struct, 1);
if (!ps->sample) {
@@ -276,7 +280,7 @@ schedstat_next:
ps->sample->waittime = atoll(wt);
/* get name, start time */
- if (!ps->sched) {
+ if (ps->sched < 0) {
sprintf(filename, "%d/sched", pid);
ps->sched = openat(procfd, filename, O_RDONLY);
if (ps->sched == -1)
@@ -286,6 +290,7 @@ schedstat_next:
s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
close(ps->sched);
+ ps->sched = -1;
continue;
}
buf[s] = '\0';
@@ -377,7 +382,7 @@ schedstat_next:
* iteration */
/* rt, wt */
- if (!ps->schedstat) {
+ if (ps->schedstat < 0) {
sprintf(filename, "%d/schedstat", pid);
ps->schedstat = openat(procfd, filename, O_RDONLY);
if (ps->schedstat == -1)
@@ -387,8 +392,11 @@ schedstat_next:
if (s <= 0) {
/* clean up our file descriptors - assume that the process exited */
close(ps->schedstat);
- if (ps->sched)
+ ps->schedstat = -1;
+ if (ps->sched) {
close(ps->sched);
+ ps->sched = -1;
+ }
//if (ps->smaps)
// fclose(ps->smaps);
continue;
@@ -488,8 +496,11 @@ catch_rename:
if (s <= 0) {
/* clean up file descriptors */
close(ps->sched);
- if (ps->schedstat)
+ ps->sched = -1;
+ if (ps->schedstat) {
close(ps->schedstat);
+ ps->schedstat = -1;
+ }
//if (ps->smaps)
// fclose(ps->smaps);
continue;
More information about the systemd-devel
mailing list