[systemd-commits] Makefile.am TODO src/shared

Lennart Poettering lennart at kemper.freedesktop.org
Thu Jul 12 16:09:48 PDT 2012


 Makefile.am            |    3 +-
 TODO                   |    2 -
 src/shared/logs-show.c |   22 ++++-----------
 src/shared/utf8.c      |   71 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/shared/utf8.h      |    2 +
 5 files changed, 81 insertions(+), 19 deletions(-)

New commits:
commit ba961854ddec8a8efcffab44540c33cc7dffebfa
Author: Zbigniew Jędrzejewski-Szmek <zbyszek at in.waw.pl>
Date:   Fri Jul 13 01:07:41 2012 +0200

    journalctl: show any printable Unicode character
    
    This makes sure we are OK in outputting all valid, non-control UTF-8
    characters, instead of just printable 7bit ASCII.

diff --git a/Makefile.am b/Makefile.am
index 14f9455..507ea3a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -726,7 +726,8 @@ libsystemd_logs_la_CFLAGS = \
 
 libsystemd_logs_la_LIBADD = \
 	libsystemd-journal-internal.la \
-	libsystemd-id128-internal.la
+	libsystemd-id128-internal.la \
+	libsystemd-shared.la
 
 # ------------------------------------------------------------------------------
 noinst_LTLIBRARIES += \
diff --git a/TODO b/TODO
index e15d4b9..25266b2 100644
--- a/TODO
+++ b/TODO
@@ -121,8 +121,6 @@ Features:
 
 * drop accountsservice's StandardOutput=syslog and Type=dbus fields
 
-* make sure show-logs checks for utf8 validity, not ascii validity
-
 * when breaking cycles drop sysv services first, then services from /run, then from /etc, then from /usr
 
 * readahead: when bumping /sys readahead variable save mtime and compare later to detect changes
diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c
index 540b5a2..e111922 100644
--- a/src/shared/logs-show.c
+++ b/src/shared/logs-show.c
@@ -28,19 +28,10 @@
 #include "logs-show.h"
 #include "log.h"
 #include "util.h"
+#include "utf8.h"
 
 #define PRINT_THRESHOLD 128
 
-static bool contains_unprintable(const void *p, size_t l) {
-        const char *j;
-
-        for (j = p; j < (const char *) p + l; j++)
-                if (*j < ' ' || *j >= 127)
-                        return true;
-
-        return false;
-}
-
 static int parse_field(const void *data, size_t length, const char *field, char **target, size_t *target_size) {
         size_t fl, nl;
         void *buf;
@@ -80,7 +71,7 @@ static bool shall_print(bool show_all, char *p, size_t l) {
         if (l > PRINT_THRESHOLD)
                 return false;
 
-        if (contains_unprintable(p, l))
+        if (!utf8_is_printable_n(p, l))
                 return false;
 
         return true;
@@ -226,7 +217,7 @@ static int output_short(sd_journal *j, unsigned line, unsigned n_columns, bool s
 
         if (show_all)
                 printf(": %.*s\n", (int) message_len, message);
-        else if (contains_unprintable(message, message_len)) {
+        else if (!utf8_is_printable_n(message, message_len)) {
                 char bytes[FORMAT_BYTES_MAX];
                 printf(": [%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len));
         } else if (message_len + n < n_columns)
@@ -298,7 +289,7 @@ static int output_verbose(sd_journal *j, unsigned line, unsigned n_columns, bool
 
         SD_JOURNAL_FOREACH_DATA(j, data, length) {
                 if (!show_all && (length > PRINT_THRESHOLD ||
-                                  contains_unprintable(data, length))) {
+                                  !utf8_is_printable_n(data, length))) {
                         const char *c;
                         char bytes[FORMAT_BYTES_MAX];
 
@@ -367,7 +358,7 @@ static int output_export(sd_journal *j, unsigned line, unsigned n_columns, bool
                     memcmp(data, "_BOOT_ID=", 9) == 0)
                         continue;
 
-                if (contains_unprintable(data, length)) {
+                if (!utf8_is_printable_n(data, length)) {
                         const char *c;
                         uint64_t le64;
 
@@ -394,8 +385,7 @@ static int output_export(sd_journal *j, unsigned line, unsigned n_columns, bool
 }
 
 static void json_escape(const char* p, size_t l) {
-
-        if (contains_unprintable(p, l)) {
+        if (!utf8_is_printable_n(p, l)) {
                 bool not_first = false;
 
                 fputs("[ ", stdout);
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 13f0521..a6f5b3f 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -78,6 +78,77 @@ static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
         *u_ch |= ch & 0x3f;
 }
 
+static bool is_unicode_control(uint32_t ch) {
+
+        /*
+          0 to ' '-1 is the C0 range.
+          DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+          '\t' is in C0 range, but more or less harmless and commonly used.
+        */
+
+        return (ch < ' ' && ch != '\t') ||
+                (0x7F <= ch && ch <= 0x9F);
+}
+
+char* utf8_is_printable_n(const char* str, size_t length) {
+        uint32_t val = 0;
+        uint32_t min = 0;
+        const uint8_t *p;
+
+        assert(str);
+
+        for (p = (const uint8_t*) str; length; p++, length--) {
+                if (*p < 128) {
+                        val = *p;
+                } else {
+                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
+                                min = 128;
+                                val = (uint32_t) (*p & 0x1e);
+                                goto ONE_REMAINING;
+                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
+                                min = (1 << 11);
+                                val = (uint32_t) (*p & 0x0f);
+                                goto TWO_REMAINING;
+                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
+                                min = (1 << 16);
+                                val = (uint32_t) (*p & 0x07);
+                        } else
+                                goto error;
+
+                        p++;
+                        length--;
+                        if (!length || !is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                TWO_REMAINING:
+                        p++;
+                        length--;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                ONE_REMAINING:
+                        p++;
+                        length--;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                        if (val < min)
+                                goto error;
+                }
+
+                if (is_unicode_control(val))
+                        goto error;
+        }
+
+        return (char*) str;
+
+error:
+        return NULL;
+}
+
 static char* utf8_validate(const char *str, char *output) {
         uint32_t val = 0;
         uint32_t min = 0;
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index af2420f..fec76b4 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -27,6 +27,8 @@
 char *utf8_is_valid(const char *s) _pure_;
 char *ascii_is_valid(const char *s) _pure_;
 
+char *utf8_is_printable_n(const char* str, size_t length) _pure_;
+
 char *utf8_filter(const char *s);
 char *ascii_filter(const char *s);
 



More information about the systemd-commits mailing list