[systemd-devel] [PATCH v2] log: be more verbose if dbus job fails
Zbigniew Jędrzejewski-Szmek
zbyszek at in.waw.pl
Thu Apr 9 07:10:14 PDT 2015
On Thu, Apr 09, 2015 at 03:20:02PM +0200, Michal Sekletar wrote:
> Users might have hard time figuring out why exactly their systemctl request
> failed. If dbus job fails try to figure out more details about failure by
> examining Result property of the service.
>
> https://bugzilla.redhat.com/show_bug.cgi?id=1016680
> ---
> src/libsystemd/sd-bus/bus-util.c | 41 +++++++++++++++++++---
> src/shared/log.c | 76 ++++++++++++++++++++++++++++++++++++++++
> src/shared/log.h | 2 ++
> 3 files changed, 114 insertions(+), 5 deletions(-)
>
> diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c
> index 6498769..bb4c993 100644
> --- a/src/libsystemd/sd-bus/bus-util.c
> +++ b/src/libsystemd/sd-bus/bus-util.c
> @@ -30,6 +30,7 @@
> #include "path-util.h"
> #include "missing.h"
> #include "set.h"
> +#include "unit-name.h"
>
> #include "sd-bus.h"
> #include "bus-error.h"
> @@ -1716,6 +1717,35 @@ static int bus_process_wait(sd_bus *bus) {
> }
> }
>
> +static int bus_job_get_service_result(BusWaitForJobs *d, char **result) {
> + int r = 0;
> + _cleanup_free_ char *dbus_path = NULL, *p = NULL;
> +
> + assert(d);
> + assert(d->result);
> + assert(d->name);
> + assert(result);
> +
> + dbus_path = unit_dbus_path_from_name(d->name);
> + if (!dbus_path)
> + return -ENOMEM;
> +
> + r = sd_bus_get_property_string(d->bus,
> + "org.freedesktop.systemd1",
> + dbus_path,
> + "org.freedesktop.systemd1.Service",
> + "Result",
> + NULL,
> + &p);
> + if (r < 0)
> + return r;
> +
> + *result = p;
> + p = NULL;
> +
> + return 0;
> +}
> +
> static int check_wait_response(BusWaitForJobs *d, bool quiet) {
> int r = 0;
>
> @@ -1736,13 +1766,14 @@ static int check_wait_response(BusWaitForJobs *d, bool quiet) {
> log_error("Operation on or unit type of %s not supported on this system.", strna(d->name));
> else if (!streq(d->result, "done") && !streq(d->result, "skipped")) {
> if (d->name) {
> - bool quotes;
> + int q;
> + _cleanup_free_ char *result = NULL;
>
> - quotes = chars_intersect(d->name, SHELL_NEED_QUOTES);
> + q = bus_job_get_service_result(d, &result);
> + if (q < 0)
> + log_debug_errno(q, "Failed to get Result property of service %s: %m", d->name);
>
> - log_error("Job for %s failed. See \"systemctl status %s%s%s\" and \"journalctl -xe\" for details.",
> - d->name,
> - quotes ? "'" : "", d->name, quotes ? "'" : "");
> + log_job_error_with_service_result(d->name, result);
> } else
> log_error("Job failed. See \"journalctl -xe\" for details.");
> }
> diff --git a/src/shared/log.c b/src/shared/log.c
> index 646a1d6..3219756 100644
> --- a/src/shared/log.c
> +++ b/src/shared/log.c
> @@ -1061,3 +1061,79 @@ void log_received_signal(int level, const struct signalfd_siginfo *si) {
> void log_set_upgrade_syslog_to_journal(bool b) {
> upgrade_syslog_to_journal = b;
> }
> +
> +typedef enum ServiceResult {
> + SERVICE_SUCCESS,
> + SERVICE_FAILURE_RESOURCES,
> + SERVICE_FAILURE_TIMEOUT,
> + SERVICE_FAILURE_EXIT_CODE,
> + SERVICE_FAILURE_SIGNAL,
> + SERVICE_FAILURE_CORE_DUMP,
> + SERVICE_FAILURE_WATCHDOG,
> + SERVICE_FAILURE_START_LIMIT,
> + _SERVICE_RESULT_MAX,
> + _SERVICE_RESULT_INVALID = -1
> +} ServiceResult;
dbus property returns the result as a string. So we need to map this string
to an explanation. Going through the intermediate ServiceResult int does
not really gain anything... I think things would be simpler if you added a
table mapping result to explanation:
static const struct {
const char *result, *explanation;
} explanations[] = {
{"timeout", "start timeout was exceeded"},
...
{}
};
and then simply looped over this table. This way it won't be necessary to
have three different lists.
This could live simply in systemctl.c. Those are only parts of
sentences, nothing generally useful.
> +static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
> + [SERVICE_SUCCESS] = "success",
> + [SERVICE_FAILURE_RESOURCES] = "resources",
> + [SERVICE_FAILURE_TIMEOUT] = "timeout",
> + [SERVICE_FAILURE_EXIT_CODE] = "exit-code",
> + [SERVICE_FAILURE_SIGNAL] = "signal",
> + [SERVICE_FAILURE_CORE_DUMP] = "core-dump",
> + [SERVICE_FAILURE_WATCHDOG] = "watchdog",
> + [SERVICE_FAILURE_START_LIMIT] = "start-limit"
> +};
> +
> +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(service_result, ServiceResult);
> +
> +static const char* const service_result_error_message_table[_SERVICE_RESULT_MAX] = {
> + [SERVICE_FAILURE_RESOURCES] = "configured resource limit was exceeded",
> + [SERVICE_FAILURE_TIMEOUT] = ,
> + [SERVICE_FAILURE_EXIT_CODE] = "ExecStart process exited with error code",
> + [SERVICE_FAILURE_SIGNAL] = "fatal signal was delivered to ExecStart process",
> + [SERVICE_FAILURE_CORE_DUMP] = "fatal signal was delivered to ExecStart process. Core dumped",
"fatal signal was delivered to ExecStart process and it dumped core"?
> + [SERVICE_FAILURE_WATCHDOG] = "service failed to sent watchdog ping",
"...failed to send..."
> + [SERVICE_FAILURE_START_LIMIT] = "start of the service was attempted too often too quickly",
> +};
> +
> +void log_job_error_with_service_result(const char* service, const char *result) {
> + bool quotes;
> + _cleanup_free_ char *stream_buffer = NULL;
> + _cleanup_fclose_ FILE *stream = NULL;
> + char *service_maybe_quoted = NULL;
> + size_t size;
> + ServiceResult r;
> +
> + assert(service);
> + assert(result);
> +
> + quotes = chars_intersect(service, SHELL_NEED_QUOTES);
> +
> + service_maybe_quoted = alloca(strlen(service) + 3);
> + sprintf(service_maybe_quoted, "%s%s%s", quotes ? "'" : "", service, quotes ? "'" : "");
> +
> + stream = open_memstream(&stream_buffer, &size);
> + if (!stream)
> + return;
> +
> + r = service_result_from_string(result);
> +
> + assert(r > _SERVICE_RESULT_INVALID && r < _SERVICE_RESULT_MAX);
> +
> + fprintf(stream, "Job for %s failed because %s. See \"systemctl status %s\" and \"journalctl -xe\" for details.\n",
> + service,
> + service_result_error_message_table[r],
> + service_maybe_quoted);
> +
> + /* For some results maybe additional explanation is required */
> + if (streq_ptr(result, "start-limit"))
> + fprintf(stream, "To force a start please invoke \"systemctl reset-failed %s\" followed by \"systemctl start %s\" again.",
> + service_maybe_quoted,
> + service_maybe_quoted);
> +
> + fflush(stream);
> +
> + log_error("%s", stream_buffer);
I don't understand why memstream is needed. Why not just do log_error(),
and optionally follow that by log_info() with the additional hint?
> +}
> diff --git a/src/shared/log.h b/src/shared/log.h
> index d6061c0..1627a1c 100644
> --- a/src/shared/log.h
> +++ b/src/shared/log.h
> @@ -209,3 +209,5 @@ LogTarget log_target_from_string(const char *s) _pure_;
> void log_received_signal(int level, const struct signalfd_siginfo *si);
>
> void log_set_upgrade_syslog_to_journal(bool b);
> +
> +void log_job_error_with_service_result(const char* service, const char *result);
> --
Zbyszek
More information about the systemd-devel
mailing list