[systemd-devel] [PATCH] [RFC] Optionaly save coredump to file

Oleksii Shevchuk alxchk at gmail.com
Thu Feb 7 13:07:01 PST 2013


Abstract.
Systemd/Journal provides convient interface to store all system events in one
place. While this works in general, some cases are broken. The main problem
is - once caught by journal message stays there till global rotation. So, using
journal for saving cores has next side effect: serial of crashes, and your logs
erased by rotation.

Proposition.
Let's save core dumps to internal subdirectory of journal.
The data stored to core files usefull untill analyzed, and can be deleted after
that moment.

Pros are:
     1. You don't pollute journal
     2. Coredumps can be deleted after analyzing
     3. Creating coredump can trigger .path unit for authomatic analyzis and cleanup
     4. You even can do cleanup via tmpfiles configuration

Cons are:
     1. Additional subdir in journal

Realization.
I try to hack existing coredump.c with next behavior.

1. systemd-journal dumping to /var/lib/systemd/coredump or to /dev/null
2. other comm dumping to /var/log/journal/%MACHINE-ID%/coredump/
     as unit.COREDUMP-ID128 with message id fc2e22bc6ee647b6b90729ab34a250b2
     and field COREDUMP_FILE with relative path to coredump file
   if coredump couldn't be created, then comm dumping to journal
     with message id fc2e22bc6ee647b6b90729ab34a250b1
     and field COREDUMP (with data) limited to 24Mb
---
 src/journal/coredump.c | 153 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 104 insertions(+), 49 deletions(-)

diff --git a/src/journal/coredump.c b/src/journal/coredump.c
index 021b4c6..89a10d2 100644
--- a/src/journal/coredump.c
+++ b/src/journal/coredump.c
@@ -35,6 +35,7 @@
 #include "macro.h"
 #include "mkdir.h"
 #include "special.h"
+#include "sd-id128.h"
 #include "cgroup-util.h"
 
 #define COREDUMP_MAX (24*1024*1024)
@@ -49,17 +50,66 @@ enum {
         _ARG_MAX
 };
 
-static int divert_coredump(void) {
-        _cleanup_fclose_ FILE *f = NULL;
+static char * make_core_file(const char * unit) {
+        int r;
+        int coreself = streq(unit, SPECIAL_JOURNALD_SERVICE);
 
-        log_info("Detected coredump of the journal daemon itself, diverting coredump to /var/lib/systemd/coredump/.");
+        _cleanup_fclose_ FILE * corefile = NULL;
+        _cleanup_free_   char * corepath = NULL;
 
-        mkdir_p_label("/var/lib/systemd/coredump", 0755);
+        char * corelink = NULL;
 
-        f = fopen("/var/lib/systemd/coredump/core.systemd-journald", "we");
-        if (!f) {
+        if (coreself) {
+                log_set_target(LOG_TARGET_KMSG);
+                log_open();
+                /* Make sure we don't make use of the journal,
+                 * if it's the journal which is crashing */
+                log_info("Detected coredump of the journal daemon itself, diverting coredump to /var/lib/systemd/coredump/.");
+                mkdir_p_label("/var/lib/systemd/coredump", 0755);
+                corelink = strdup("/var/lib/systemd/coredump/core.systemd-journald");
+                corepath = strdup(corelink);
+        } else {
+                _cleanup_free_ char * c;
+                char buffer[33];
+                sd_id128_t coreid;
+                sd_id128_t machineid;
+
+                r = sd_id128_get_machine(&machineid);
+                if (r)
+                        goto exit;
+
+                c = sd_id128_to_string(machineid, buffer);
+                c = strjoin("/var/log/journal/", c, "/coredump", NULL);
+                if (! c)
+                        goto exit;
+
+                r = access(c, X_OK | W_OK);
+                if (r)
+                        goto exit;
+
+                r = sd_id128_randomize(&coreid);
+                if (r)
+                        goto exit;
+
+                corelink = strjoin(unit, ".", sd_id128_to_string(coreid, buffer), NULL);
+                if (! corelink)
+                        goto exit;
+
+                corepath = strjoin(c, "/", corelink, NULL);
+                if (! corepath)
+                        goto exit;
+        }
+
+        corefile = fopen(corepath, "w");
+        if (! corefile) {
                 log_error("Failed to create coredump file: %m");
-                return -errno;
+
+                if (! coreself) {
+                        free(corelink);
+                        corelink = NULL;
+                }
+
+                goto exit;
         }
 
         for (;;) {
@@ -68,29 +118,29 @@ static int divert_coredump(void) {
 
                 l = fread(buffer, 1, sizeof(buffer), stdin);
                 if (l <= 0) {
-                        if (ferror(f)) {
+                        if (ferror(corefile)) {
                                 log_error("Failed to read coredump: %m");
-                                return -errno;
+                                goto exit;
                         }
 
                         break;
                 }
 
-                q = fwrite(buffer, 1, l, f);
+                q = fwrite(buffer, 1, l, corefile);
                 if (q != l) {
                         log_error("Failed to write coredump: %m");
-                        return -errno;
+                        goto exit;
                 }
         }
 
-        fflush(f);
+        fflush(corefile);
 
-        if (ferror(f)) {
+        if (ferror(corefile)) {
                 log_error("Failed to write coredump: %m");
-                return -errno;
         }
 
-        return 0;
+ exit:
+        return corelink;
 }
 
 int main(int argc, char* argv[]) {
@@ -102,8 +152,11 @@ int main(int argc, char* argv[]) {
         gid_t gid;
         struct iovec iovec[14];
         _cleanup_free_ char *core_pid = NULL, *core_uid = NULL, *core_gid = NULL, *core_signal = NULL,
-                *core_timestamp = NULL, *core_comm = NULL, *core_exe = NULL, *core_unit = NULL,
-                *core_session = NULL, *core_message = NULL, *core_cmdline = NULL, *t = NULL;
+                *core_timestamp = NULL, *core_comm = NULL, *core_exe = NULL, *core_unit = NULL, *unit = NULL,
+                *core_session = NULL, *core_message = NULL, *core_cmdline = NULL,
+                *corelink = NULL, *core_corelink = NULL;
+
+        char * t;
 
         prctl(PR_SET_DUMPABLE, 0);
 
@@ -125,21 +178,10 @@ int main(int argc, char* argv[]) {
                 goto finish;
         }
 
-        if (cg_pid_get_unit(pid, &t) >= 0) {
-
-                if (streq(t, SPECIAL_JOURNALD_SERVICE)) {
-                        /* Make sure we don't make use of the journal,
-                         * if it's the journal which is crashing */
-                        log_set_target(LOG_TARGET_KMSG);
-                        log_open();
-
-                        r = divert_coredump();
-                        goto finish;
-                }
-
-                core_unit = strappend("COREDUMP_UNIT=", t);
-        } else if (cg_pid_get_user_unit(pid, &t) >= 0)
-                core_unit = strappend("COREDUMP_USER_UNIT=", t);
+        if (cg_pid_get_unit(pid, &unit) >= 0) {
+                core_unit = strappend("COREDUMP_UNIT=", unit);
+        } else if (cg_pid_get_user_unit(pid, &unit) >= 0)
+                core_unit = strappend("COREDUMP_USER_UNIT=", unit);
 
         if (core_unit)
                 IOVEC_SET_STRING(iovec[j++], core_unit);
@@ -191,7 +233,6 @@ int main(int argc, char* argv[]) {
         }
 
 #endif
-
         if (get_process_exe(pid, &t) >= 0) {
                 core_exe = strappend("COREDUMP_EXE=", t);
                 free(t);
@@ -212,7 +253,6 @@ int main(int argc, char* argv[]) {
         if (core_timestamp)
                 IOVEC_SET_STRING(iovec[j++], core_timestamp);
 
-        IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1");
         IOVEC_SET_STRING(iovec[j++], "PRIORITY=2");
 
         core_message = strjoin("MESSAGE=Process ", argv[ARG_PID], " (", argv[ARG_COMM], ") dumped core.", NULL);
@@ -232,24 +272,39 @@ int main(int argc, char* argv[]) {
                 goto finish;
         }
 
-        p = malloc(9 + COREDUMP_MAX);
-        if (!p) {
-                r = log_oom();
-                goto finish;
-        }
+        corelink = make_core_file(unit ? unit : core_comm);
 
-        memcpy(p, "COREDUMP=", 9);
+        if (! corelink) {
+                IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1");
 
-        n = loop_read(STDIN_FILENO, p + 9, COREDUMP_MAX, false);
-        if (n < 0) {
-                log_error("Failed to read core dump data: %s", strerror(-n));
-                r = (int) n;
-                goto finish;
-        }
+                p = malloc(9 + COREDUMP_MAX);
+                if (!p) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                memcpy(p, "COREDUMP=", 9);
 
-        iovec[j].iov_base = p;
-        iovec[j].iov_len = 9 + n;
-        j++;
+                n = loop_read(STDIN_FILENO, p + 9, COREDUMP_MAX, false);
+                if (n < 0) {
+                        log_error("Failed to read core dump data: %s", strerror(-n));
+                        r = (int) n;
+                        goto finish;
+                }
+
+                iovec[j].iov_base = p;
+                iovec[j].iov_len = 9 + n;
+                j++;
+        } else {
+                if (corelink[0] == '/')
+                        goto finish;
+
+                core_corelink = strjoin("COREDUMP_FILE=coredump/", corelink, NULL);
+                if (core_corelink)
+                        IOVEC_SET_STRING(iovec[j++], core_corelink);
+
+                IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b2");
+        }
 
         r = sd_journal_sendv(iovec, j);
         if (r < 0)
-- 
1.8.1.2



More information about the systemd-devel mailing list