[systemd-commits] 3 commits - Makefile.am TODO man/systemd.device.xml src/hostname src/libsystemd-bus src/locale src/login src/machine src/network src/socket-proxy src/systemd src/timedate units/systemd-hostnamed.service.in units/systemd-localed.service.in units/systemd-logind.service.in units/systemd-machined.service.in units/systemd-networkd.service.in units/systemd-timedated.service.in

Lennart Poettering lennart at kemper.freedesktop.org
Wed Dec 11 09:40:30 PST 2013


 Makefile.am                           |    8 +
 TODO                                  |   11 +-
 man/systemd.device.xml                |   45 ++++++---
 src/hostname/hostnamed.c              |    2 
 src/libsystemd-bus/libsystemd-bus.sym |    1 
 src/libsystemd-bus/sd-event.c         |  163 +++++++++++++++++++++++++++++++---
 src/libsystemd-bus/test-event.c       |    2 
 src/locale/localed.c                  |    2 
 src/login/logind.c                    |    2 
 src/machine/machined.c                |    2 
 src/network/networkd-manager.c        |    2 
 src/socket-proxy/socket-proxyd.c      |    2 
 src/systemd/sd-event.h                |    1 
 src/timedate/timedated.c              |    2 
 units/systemd-hostnamed.service.in    |    1 
 units/systemd-localed.service.in      |    1 
 units/systemd-logind.service.in       |    1 
 units/systemd-machined.service.in     |    1 
 units/systemd-networkd.service.in     |    1 
 units/systemd-timedated.service.in    |    1 
 20 files changed, 219 insertions(+), 32 deletions(-)

New commits:
commit 419173e60a05424008fcd312f6df4b59b2ce8e62
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Dec 11 18:38:51 2013 +0100

    man: explain in more detail how SYSTEMD_READY= influences SYSTEMD_WANTS= in udev rules
    
    https://bugzilla.redhat.com/show_bug.cgi?id=1026860

diff --git a/man/systemd.device.xml b/man/systemd.device.xml
index 96ebe89..002b647 100644
--- a/man/systemd.device.xml
+++ b/man/systemd.device.xml
@@ -70,12 +70,15 @@
                 since no device-specific options may be
                 configured.</para>
 
-                <para>systemd will automatically create dynamic device
-                units for all kernel devices that are marked with the
-                "systemd" udev tag (by default all block and network
-                devices, and a few others). This may be used to define
-                dependencies between devices and other
-                units.</para>
+                <para>systemd will dynamically create device units for
+                all kernel devices that are marked with the "systemd"
+                udev tag (by default all block and network devices,
+                and a few others). This may be used to define
+                dependencies between devices and other units. To tag a
+                udev device use <literal>TAG+="systemd"</literal> in
+                the udev rules file, see
+                <citerefentry><refentrytitle>udev</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+                for details.</para>
 
                 <para>Device units are named after the
                 <filename>/sys</filename> and
@@ -93,7 +96,7 @@
 
                 <para>The settings of device units may either be
                 configured via unit files, or directly from the udev
-                database (which is recommended). The following udev
+                database (which is recommended). The following udev device
                 properties are understood by systemd:</para>
 
                 <variablelist class='udev-directives'>
@@ -101,16 +104,26 @@
                                 <term><varname>SYSTEMD_WANTS=</varname></term>
                                 <listitem><para>Adds dependencies of
                                 type <varname>Wants</varname> from
-                                this unit to all listed units. This
+                                the device unit to all listed units. This
                                 may be used to activate arbitrary
-                                units, when a specific device becomes
+                                units when a specific device becomes
                                 available. Note that this and the
                                 other tags are not taken into account
                                 unless the device is tagged with the
                                 <literal>systemd</literal> string in
                                 the udev database, because otherwise
                                 the device is not exposed as systemd
-                                unit.</para></listitem>
+                                unit (see above). Note that systemd
+                                will only act on
+                                <varname>Wants</varname> dependencies
+                                when a device first becomes active, it
+                                will not act on them if they are added
+                                to devices that are already
+                                active. Use
+                                <varname>SYSTEMD_READY=</varname> (see
+                                below) to influence on which udev
+                                event to trigger the device
+                                dependencies.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
@@ -135,10 +148,14 @@
                                 device disappears from the udev
                                 tree. This option is useful to support
                                 devices that initially show up in an
-                                uninitialized state in the tree, and for
-                                which a changed event is generated the
-                                moment they are fully set
-                                up.</para></listitem>
+                                uninitialized state in the tree, and
+                                for which a <literal>changed</literal>
+                                event is generated the moment they are
+                                fully set up. Note that
+                                <varname>SYSTEMD_WANTS=</varname> (see
+                                above) is not acted on as long as
+                                <varname>SYSTEMD_READY=0</varname> is
+                                set for a device.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>

commit cde93897cdefdd7c7f66c400a61e42ceee5f6a46
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Dec 11 18:14:52 2013 +0100

    event: hook up sd-event with the service watchdog logic
    
    Adds a new call sd_event_set_watchdog() that can be used to hook up the
    event loop with the watchdog supervision logic of systemd. If enabled
    and $WATCHDOG_USEC is set the event loop will ping the invoking systemd
    daemon right after coming back from epoll_wait() but not more often than
    $WATCHDOG_USEC/4. The epoll_wait() will sleep no longer than
    $WATCHDOG_USEC/4*3, to make sure the service manager is called in time.
    
    This means that setting WatchdogSec= in a .service file and calling
    sd_event_set_watchdog() in your daemon is enough to hook it up with the
    watchdog logic.

diff --git a/Makefile.am b/Makefile.am
index 19da6ea..9e4b136 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -652,10 +652,12 @@ test_rtnl_SOURCES = \
 test_rtnl_LDADD = \
 	libsystemd-rtnl.la \
 	libsystemd-bus-internal.la \
+	libsystemd-daemon-internal.la \
 	libsystemd-id128-internal.la \
 	libsystemd-shared.la
 
-tests += test-rtnl
+tests += \
+	test-rtnl
 
 # ------------------------------------------------------------------------------
 noinst_LTLIBRARIES += \
@@ -3966,10 +3968,12 @@ test_network_LDADD = \
 	libudev-internal.la \
 	libsystemd-bus-internal.la \
 	libsystemd-id128-internal.la \
+	libsystemd-daemon-internal.la \
 	libsystemd-rtnl.la \
 	libsystemd-shared.la
 
-tests += test-network
+tests += \
+	test-network
 
 EXTRA_DIST += \
 	src/network/networkd-gperf.gperf \
diff --git a/TODO b/TODO
index 8dc8f63..d909ab9 100644
--- a/TODO
+++ b/TODO
@@ -137,7 +137,6 @@ Features:
     but do not return anything up to the event loop caller. Instead
     add parameter to sd_event_request_quit() to take retval. This way
     errors rippling upwards are the option, not the default
-  - native support for watchdog stuff
 
 * in the final killing spree, detect processes from the root directory, and
   complain loudly if they have argv[0][0] == '@' set.
@@ -311,6 +310,7 @@ Features:
     boot, and causes the journal to be moved back to /run on shutdown,
     so that we don't keep /var busy. This needs to happen synchronously,
     hence doing this via signals is not going to work.
+  - port to sd-event, enable watchdog from event loop
 
 * document:
   - document that deps in [Unit] sections ignore Alias= fileds in
diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c
index f7ae50d..ece2b41 100644
--- a/src/hostname/hostnamed.c
+++ b/src/hostname/hostnamed.c
@@ -627,6 +627,8 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        sd_event_set_watchdog(event, true);
+
         r = connect_bus(&context, event, &bus);
         if (r < 0)
                 goto finish;
diff --git a/src/libsystemd-bus/libsystemd-bus.sym b/src/libsystemd-bus/libsystemd-bus.sym
index 7bc1ef9..4a849b3 100644
--- a/src/libsystemd-bus/libsystemd-bus.sym
+++ b/src/libsystemd-bus/libsystemd-bus.sym
@@ -238,6 +238,7 @@ global:
         sd_event_request_quit;
         sd_event_get_now_realtime;
         sd_event_get_now_monotonic;
+        sd_event_set_watchdog;
 
         sd_event_source_ref;
         sd_event_source_unref;
diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c
index eb03923..9fceb7b 100644
--- a/src/libsystemd-bus/sd-event.c
+++ b/src/libsystemd-bus/sd-event.c
@@ -24,6 +24,7 @@
 #include <sys/wait.h>
 
 #include "sd-id128.h"
+#include "sd-daemon.h"
 #include "macro.h"
 #include "prioq.h"
 #include "hashmap.h"
@@ -43,7 +44,8 @@ typedef enum EventSourceType {
         SOURCE_SIGNAL,
         SOURCE_CHILD,
         SOURCE_DEFER,
-        SOURCE_QUIT
+        SOURCE_QUIT,
+        SOURCE_WATCHDOG
 } EventSourceType;
 
 struct sd_event_source {
@@ -105,6 +107,7 @@ struct sd_event {
         int signal_fd;
         int realtime_fd;
         int monotonic_fd;
+        int watchdog_fd;
 
         Prioq *pending;
         Prioq *prepare;
@@ -139,9 +142,12 @@ struct sd_event {
 
         bool quit_requested:1;
         bool need_process_child:1;
+        bool watchdog:1;
 
         pid_t tid;
         sd_event **default_event_ptr;
+
+        usec_t watchdog_last, watchdog_period;
 };
 
 static int pending_prioq_compare(const void *a, const void *b) {
@@ -323,6 +329,9 @@ static void event_free(sd_event *e) {
         if (e->monotonic_fd >= 0)
                 close_nointr_nofail(e->monotonic_fd);
 
+        if (e->watchdog_fd >= 0)
+                close_nointr_nofail(e->watchdog_fd);
+
         prioq_free(e->pending);
         prioq_free(e->prepare);
         prioq_free(e->monotonic_earliest);
@@ -348,7 +357,7 @@ _public_ int sd_event_new(sd_event** ret) {
                 return -ENOMEM;
 
         e->n_ref = 1;
-        e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
+        e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
         e->realtime_next = e->monotonic_next = (usec_t) -1;
         e->original_pid = getpid();
 
@@ -1422,8 +1431,8 @@ static int event_arm_timer(
         usec_t t;
         int r;
 
-        assert_se(e);
-        assert_se(next);
+        assert(e);
+        assert(next);
 
         a = prioq_peek(earliest);
         if (!a || a->enabled == SD_EVENT_OFF) {
@@ -1462,7 +1471,7 @@ static int event_arm_timer(
 
         r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
         if (r < 0)
-                return r;
+                return -errno;
 
         *next = t;
         return 0;
@@ -1484,7 +1493,6 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
 
         assert(e);
         assert(fd >= 0);
-        assert(next);
 
         assert_return(events == EPOLLIN, -EIO);
 
@@ -1499,7 +1507,8 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
         if (ss != sizeof(x))
                 return -EIO;
 
-        *next = (usec_t) -1;
+        if (next)
+                *next = (usec_t) -1;
 
         return 0;
 }
@@ -1782,6 +1791,43 @@ static sd_event_source* event_next_pending(sd_event *e) {
         return p;
 }
 
+static int arm_watchdog(sd_event *e) {
+        struct itimerspec its = {};
+        usec_t t;
+        int r;
+
+        assert(e);
+        assert(e->watchdog_fd >= 0);
+
+        t = sleep_between(e,
+                          e->watchdog_last + (e->watchdog_period / 2),
+                          e->watchdog_last + (e->watchdog_period * 3 / 4));
+
+        timespec_store(&its.it_value, t);
+
+        r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
+        if (r < 0)
+                return -errno;
+
+        return 0;
+}
+
+static int process_watchdog(sd_event *e) {
+        assert(e);
+
+        if (!e->watchdog)
+                return 0;
+
+        /* Don't notify watchdog too often */
+        if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+                return 0;
+
+        sd_notify(false, "WATCHDOG=1");
+        e->watchdog_last = e->timestamp.monotonic;
+
+        return arm_watchdog(e);
+}
+
 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
         sd_event_source *p;
@@ -1831,6 +1877,8 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
                         r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
                         r = process_signal(e, ev_queue[i].events);
+                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+                        r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
                 else
                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
 
@@ -1838,6 +1886,10 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
                         goto finish;
         }
 
+        r = process_watchdog(e);
+        if (r < 0)
+                goto finish;
+
         r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
         if (r < 0)
                 goto finish;
@@ -1970,3 +2022,63 @@ _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
 
         return -ENXIO;
 }
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+        int r;
+
+        assert_return(e, -EINVAL);
+
+        if (e->watchdog == !!b)
+                return e->watchdog;
+
+        if (b) {
+                struct epoll_event ev = {};
+                const char *env;
+
+                env = getenv("WATCHDOG_USEC");
+                if (!env)
+                        return false;
+
+                r = safe_atou64(env, &e->watchdog_period);
+                if (r < 0)
+                        return r;
+                if (e->watchdog_period <= 0)
+                        return -EIO;
+
+                /* Issue first ping immediately */
+                sd_notify(false, "WATCHDOG=1");
+                e->watchdog_last = now(CLOCK_MONOTONIC);
+
+                e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+                if (e->watchdog_fd < 0)
+                        return -errno;
+
+                r = arm_watchdog(e);
+                if (r < 0)
+                        goto fail;
+
+                ev.events = EPOLLIN;
+                ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
+
+                r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
+                if (r < 0) {
+                        r = -errno;
+                        goto fail;
+                }
+
+        } else {
+                if (e->watchdog_fd >= 0) {
+                        epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+                        close_nointr_nofail(e->watchdog_fd);
+                        e->watchdog_fd = -1;
+                }
+        }
+
+        e->watchdog = !!b;
+        return e->watchdog;
+
+fail:
+        close_nointr_nofail(e->watchdog_fd);
+        e->watchdog_fd = -1;
+        return r;
+}
diff --git a/src/libsystemd-bus/test-event.c b/src/libsystemd-bus/test-event.c
index 5360d87..2b91eb0 100644
--- a/src/libsystemd-bus/test-event.c
+++ b/src/libsystemd-bus/test-event.c
@@ -165,6 +165,8 @@ int main(int argc, char *argv[]) {
 
         assert_se(sd_event_default(&e) >= 0);
 
+        assert_se(sd_event_set_watchdog(e, true) >= 0);
+
         got_a = false, got_b = false, got_c = false, got_d = 0;
 
         /* Add a oneshot handler, trigger it, re-enable it, and trigger
diff --git a/src/locale/localed.c b/src/locale/localed.c
index abb610d..7c41822 100644
--- a/src/locale/localed.c
+++ b/src/locale/localed.c
@@ -1137,6 +1137,8 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        sd_event_set_watchdog(event, true);
+
         r = connect_bus(&context, event, &bus);
         if (r < 0)
                 goto finish;
diff --git a/src/login/logind.c b/src/login/logind.c
index 87d46ee..5ce79b2 100644
--- a/src/login/logind.c
+++ b/src/login/logind.c
@@ -96,6 +96,8 @@ Manager *manager_new(void) {
                 return NULL;
         }
 
+        sd_event_set_watchdog(m->event, true);
+
         return m;
 }
 
diff --git a/src/machine/machined.c b/src/machine/machined.c
index 35b33c3..d639603 100644
--- a/src/machine/machined.c
+++ b/src/machine/machined.c
@@ -59,6 +59,8 @@ Manager *manager_new(void) {
                 return NULL;
         }
 
+        sd_event_set_watchdog(m->event, true);
+
         return m;
 }
 
diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c
index c8ec239..d59b913 100644
--- a/src/network/networkd-manager.c
+++ b/src/network/networkd-manager.c
@@ -35,6 +35,8 @@ int manager_new(Manager **ret) {
         if (r < 0)
                 return r;
 
+        sd_event_set_watchdog(m->event, true);
+
         r = sd_rtnl_open(RTMGRP_LINK | RTMGRP_IPV4_IFADDR, &m->rtnl);
         if (r < 0)
                 return r;
diff --git a/src/socket-proxy/socket-proxyd.c b/src/socket-proxy/socket-proxyd.c
index 432558d..c6f56be 100644
--- a/src/socket-proxy/socket-proxyd.c
+++ b/src/socket-proxy/socket-proxyd.c
@@ -632,6 +632,8 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        sd_event_set_watchdog(event, true);
+
         n = sd_listen_fds(1);
         if (n < 0) {
                 log_error("Failed to receive sockets from parent.");
diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h
index 63b223c..3551077 100644
--- a/src/systemd/sd-event.h
+++ b/src/systemd/sd-event.h
@@ -93,6 +93,7 @@ int sd_event_get_quit(sd_event *e);
 int sd_event_request_quit(sd_event *e);
 int sd_event_get_now_realtime(sd_event *e, uint64_t *usec);
 int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec);
+int sd_event_set_watchdog(sd_event *e, int b);
 
 sd_event_source* sd_event_source_ref(sd_event_source *s);
 sd_event_source* sd_event_source_unref(sd_event_source *s);
diff --git a/src/timedate/timedated.c b/src/timedate/timedated.c
index 6d4388c..af2b078 100644
--- a/src/timedate/timedated.c
+++ b/src/timedate/timedated.c
@@ -836,6 +836,8 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        sd_event_set_watchdog(event, true);
+
         r = connect_bus(&context, event, &bus);
         if (r < 0)
                 goto finish;
diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in
index 874f6c2..3f5ef75 100644
--- a/units/systemd-hostnamed.service.in
+++ b/units/systemd-hostnamed.service.in
@@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/hostnamed
 ExecStart=@rootlibexecdir@/systemd-hostnamed
 BusName=org.freedesktop.hostname1
 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE
+WatchdogSec=1min
diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in
index 6818a4c..1951123 100644
--- a/units/systemd-localed.service.in
+++ b/units/systemd-localed.service.in
@@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/localed
 ExecStart=@rootlibexecdir@/systemd-localed
 BusName=org.freedesktop.locale1
 CapabilityBoundingSet=
+WatchdogSec=1min
diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in
index 31b5cd0..9019668 100644
--- a/units/systemd-logind.service.in
+++ b/units/systemd-logind.service.in
@@ -19,6 +19,7 @@ Restart=always
 RestartSec=0
 BusName=org.freedesktop.login1
 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG
+WatchdogSec=1min
 
 # Increase the default a bit in order to allow many simultaneous
 # logins since we keep one fd open per session.
diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in
index 26bfe03..a23dca9 100644
--- a/units/systemd-machined.service.in
+++ b/units/systemd-machined.service.in
@@ -18,3 +18,4 @@ Restart=always
 RestartSec=0
 BusName=org.freedesktop.machine1
 CapabilityBoundingSet=CAP_KILL
+WatchdogSec=1min
diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in
index 066d852..95205cd 100644
--- a/units/systemd-networkd.service.in
+++ b/units/systemd-networkd.service.in
@@ -17,3 +17,4 @@ Type=notify
 Restart=always
 RestartSec=0
 ExecStart=@rootlibexecdir@/systemd-networkd
+WatchdogSec=1min
diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in
index dd3eb1b..f7fb657 100644
--- a/units/systemd-timedated.service.in
+++ b/units/systemd-timedated.service.in
@@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/timedated
 ExecStart=@rootlibexecdir@/systemd-timedated
 BusName=org.freedesktop.timedate1
 CapabilityBoundingSet=CAP_SYS_TIME
+WatchdogSec=1min

commit 08cd15525450ff2c2ac814a58930f6d82284a1ba
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Dec 11 03:50:35 2013 +0000

    event: when handling SIGCHLD of a child process only reap after dispatching event source
    
    That way the even source callback is run with the zombie process still
    around so that it can access /proc/$PID/ and similar, and so that it can
    be sure that the PID has not been reused yet.

diff --git a/TODO b/TODO
index 2fb9cd3..8dc8f63 100644
--- a/TODO
+++ b/TODO
@@ -45,9 +45,11 @@ Features:
   - add field to transient units that indicate whether systemd or somebody else saves/restores its settings, for integration with libvirt
   - ensure scope units may be started only a single time
 
-* switch to SipHash for hashmaps/sets?
+* code cleanup
+  - get rid of readdir_r/dirent_storage stuff, it's unnecessary on Linux
+  - we probably should replace the left-over uses of strv_append() and replace them by strv_push() or strv_extend()
 
-* general: get rid of readdir_r/dirent_storage stuff, it's unnecessary on Linux
+* switch to SipHash for hashmaps/sets?
 
 * when we detect low battery and no AC on boot, show pretty splash and refuse boot
 
@@ -71,8 +73,6 @@ Features:
 
 * Add a new Distribute=$NUMBER key to socket units that makes use of SO_REUSEPORT to distribute network traffic on $NUMBER instances
 
-* we probably should replace the left-over uses of strv_append() and replace them by strv_push() or strv_extend()
-
 * move config_parse_path_strv() out of conf-parser.c
 
 * After coming back from hibernation reset hibernation swap partition using the /dev/snapshot ioctl APIs
@@ -137,7 +137,6 @@ Features:
     but do not return anything up to the event loop caller. Instead
     add parameter to sd_event_request_quit() to take retval. This way
     errors rippling upwards are the option, not the default
-  - child pid handling: first invoke waitid(WNOHANG) and call event handler, only afterwards reap the process
   - native support for watchdog stuff
 
 * in the final killing spree, detect processes from the root directory, and
diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c
index 282e914..eb03923 100644
--- a/src/libsystemd-bus/sd-event.c
+++ b/src/libsystemd-bus/sd-event.c
@@ -1555,6 +1555,10 @@ static int process_child(sd_event *e) {
            don't care about. Since this is O(n) this means that if you
            have a lot of processes you probably want to handle SIGCHLD
            yourself.
+
+           We do not reap the children here (by using WNOWAIT), this
+           is only done after the event source is dispatched so that
+           the callback still sees the process as a zombie.
         */
 
         HASHMAP_FOREACH(s, e->child_sources, i) {
@@ -1567,11 +1571,27 @@ static int process_child(sd_event *e) {
                         continue;
 
                 zero(s->child.siginfo);
-                r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
+                r = waitid(P_PID, s->child.pid, &s->child.siginfo,
+                           WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
                 if (r < 0)
                         return -errno;
 
                 if (s->child.siginfo.si_pid != 0) {
+                        bool zombie =
+                                s->child.siginfo.si_code == CLD_EXITED ||
+                                s->child.siginfo.si_code == CLD_KILLED ||
+                                s->child.siginfo.si_code == CLD_DUMPED;
+
+                        if (!zombie && (s->child.options & WEXITED)) {
+                                /* If the child isn't dead then let's
+                                 * immediately remove the state change
+                                 * from the queue, since there's no
+                                 * benefit in leaving it queued */
+
+                                assert(s->child.options & (WSTOPPED|WCONTINUED));
+                                waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+                        }
+
                         r = source_set_pending(s, true);
                         if (r < 0)
                                 return r;
@@ -1625,7 +1645,6 @@ static int process_signal(sd_event *e, uint32_t events) {
                         return r;
         }
 
-
         return 0;
 }
 
@@ -1667,9 +1686,21 @@ static int source_dispatch(sd_event_source *s) {
                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
                 break;
 
-        case SOURCE_CHILD:
+        case SOURCE_CHILD: {
+                bool zombie;
+
+                zombie = s->child.siginfo.si_code == CLD_EXITED ||
+                         s->child.siginfo.si_code == CLD_KILLED ||
+                         s->child.siginfo.si_code == CLD_DUMPED;
+
                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+                /* Now, reap the PID for good. */
+                if (zombie)
+                        waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+
                 break;
+        }
 
         case SOURCE_DEFER:
                 r = s->defer.callback(s, s->userdata);



More information about the systemd-commits mailing list