[systemd-commits] 3 commits - .gitignore Makefile.am src/libudev src/shared src/test TODO

Dave Reisner dreisner at kemper.freedesktop.org
Thu Sep 19 08:59:29 PDT 2013


 .gitignore                   |    1 
 Makefile.am                  |   14 +
 TODO                         |    1 
 src/libudev/libudev-util.c   |    5 
 src/shared/device-nodes.c    |   74 +++++++++
 src/shared/device-nodes.h    |   23 +++
 src/shared/utf8.c            |  325 +++++++++----------------------------------
 src/shared/utf8.h            |    5 
 src/shared/util.c            |    4 
 src/test/test-device-nodes.c |   55 +++++++
 src/test/test-utf8.c         |   63 +++++---
 11 files changed, 283 insertions(+), 287 deletions(-)

New commits:
commit e7363c59d69b71a4327429719e24cab9020e2796
Author: Dave Reisner <dreisner at archlinux.org>
Date:   Wed Sep 18 12:32:23 2013 -0400

    test-utf8: add more tests for public functions

diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index b5a833e..7bd0db1 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -34,9 +34,43 @@ static void test_utf8_is_valid(void) {
         assert_se(!utf8_is_valid("\341\204"));
 }
 
+static void test_ascii_is_valid(void) {
+        assert_se(ascii_is_valid("alsdjf\t\vbarr\nba z"));
+        assert_se(!ascii_is_valid("\342\204\242"));
+        assert_se(!ascii_is_valid("\341\204"));
+}
+
+static void test_ascii_filter(void) {
+        char *f;
+
+        f = ascii_filter("alsdjf\t\vbarr\nba z");
+        assert_se(streq(f, "alsdjf\t\vbarr\nba z"));
+        free(f);
+
+        f = ascii_filter("\342\204\242");
+        assert_se(streq(f, ""));
+        free(f);
+
+        f = ascii_filter("foo\341\204bar");
+        assert_se(streq(f, "foobar"));
+        free(f);
+}
+
+static void test_utf8_encoded_valid_unichar(void) {
+        assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
+        assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
+        assert_se(utf8_encoded_valid_unichar("a") == 1);
+        assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
+        assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
+
+}
+
 int main(int argc, char *argv[]) {
         test_utf8_is_valid();
         test_utf8_is_printable();
+        test_ascii_is_valid();
+        test_ascii_filter();
+        test_utf8_encoded_valid_unichar();
 
         return 0;
 }

commit 8f6ce71fe79d897b67157d92869db87ee2042af6
Author: Dave Reisner <dreisner at archlinux.org>
Date:   Wed Sep 18 12:12:04 2013 -0400

    device-nodes: move device node specific code to own file
    
    In the process, rename udev_encode_string which is poorly named for what
    it does. It deals specifically with encoding names that udev creates and
    has its own rules: utf8 is valid but some ascii is not (e.g. path
    separators), and everything else is simply escaped. Rename it to
    encode_devnode_name.

diff --git a/.gitignore b/.gitignore
index deeee53..8115d4d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,6 +101,7 @@
 /test-cgroup-util
 /test-daemon
 /test-date
+/test-device-nodes
 /test-efivars
 /test-engine
 /test-env-replace
diff --git a/Makefile.am b/Makefile.am
index 8d70ad3..89a5c86 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -642,6 +642,8 @@ libsystemd_shared_la_SOURCES = \
 	src/shared/list.h \
 	src/shared/macro.h \
 	src/shared/def.h \
+	src/shared/device-nodes.c \
+	src/shared/device-nodes.h \
 	src/shared/sparse-endian.h \
 	src/shared/util.c \
 	src/shared/util.h \
@@ -1137,7 +1139,8 @@ tests += \
 	test-time \
 	test-hashmap \
 	test-list \
-	test-tables
+	test-tables \
+	test-device-nodes
 
 EXTRA_DIST += \
 	test/sched_idle_bad.service \
@@ -1149,6 +1152,15 @@ EXTRA_DIST += \
 EXTRA_DIST += \
 	src/test/test-helper.h
 
+test_device_nodes_SOURCES = \
+	src/test/test-device-nodes.c
+
+test_device_nodes_CFLAGS = \
+	$(AM_CFLAGS)
+
+test_device_nodes_LDADD = \
+	libsystemd-shared.la
+
 test_engine_SOURCES = \
 	src/test/test-engine.c
 
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
index d54430c..b5b9db6 100644
--- a/src/libudev/libudev-util.c
+++ b/src/libudev/libudev-util.c
@@ -32,6 +32,7 @@
 #include <sys/stat.h>
 #include <sys/param.h>
 
+#include "device-nodes.h"
 #include "libudev.h"
 #include "libudev-private.h"
 #include "utf8.h"
@@ -344,7 +345,7 @@ int util_replace_chars(char *str, const char *white)
         while (str[i] != '\0') {
                 int len;
 
-                if (is_utf8_encoding_whitelisted(str[i], white)) {
+                if (whitelisted_char_for_devnode(str[i], white)) {
                         i++;
                         continue;
                 }
@@ -392,7 +393,7 @@ int util_replace_chars(char *str, const char *white)
  **/
 _public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len)
 {
-        return udev_encode_string(str, str_enc, len);
+        return encode_devnode_name(str, str_enc, len);
 }
 
 /*
diff --git a/src/shared/device-nodes.c b/src/shared/device-nodes.c
new file mode 100644
index 0000000..986553e
--- /dev/null
+++ b/src/shared/device-nodes.c
@@ -0,0 +1,74 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "device-nodes.h"
+#include "utf8.h"
+
+int whitelisted_char_for_devnode(char c, const char *white) {
+        if ((c >= '0' && c <= '9') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c >= 'a' && c <= 'z') ||
+            strchr("#+-.:=@_", c) != NULL ||
+            (white != NULL && strchr(white, c) != NULL))
+                return 1;
+        return 0;
+}
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len) {
+        size_t i, j;
+
+        if (str == NULL || str_enc == NULL)
+                return -1;
+
+        for (i = 0, j = 0; str[i] != '\0'; i++) {
+                int seqlen;
+
+                seqlen = utf8_encoded_valid_unichar(&str[i]);
+                if (seqlen > 1) {
+                        if (len-j < (size_t)seqlen)
+                                goto err;
+                        memcpy(&str_enc[j], &str[i], seqlen);
+                        j += seqlen;
+                        i += (seqlen-1);
+                } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) {
+                        if (len-j < 4)
+                                goto err;
+                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+                        j += 4;
+                } else {
+                        if (len-j < 1)
+                                goto err;
+                        str_enc[j] = str[i];
+                        j++;
+                }
+        }
+        if (len-j < 1)
+                goto err;
+        str_enc[j] = '\0';
+        return 0;
+err:
+        return -1;
+}
diff --git a/src/shared/device-nodes.h b/src/shared/device-nodes.h
new file mode 100644
index 0000000..a98195a
--- /dev/null
+++ b/src/shared/device-nodes.h
@@ -0,0 +1,23 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len);
+int whitelisted_char_for_devnode(char c, const char *additional);
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 732f0f0..c3d97cc 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -285,49 +285,3 @@ int utf8_encoded_valid_unichar(const char *str) {
 
         return len;
 }
-
-int is_utf8_encoding_whitelisted(char c, const char *white) {
-        if ((c >= '0' && c <= '9') ||
-            (c >= 'A' && c <= 'Z') ||
-            (c >= 'a' && c <= 'z') ||
-            strchr("#+-.:=@_", c) != NULL ||
-            (white != NULL && strchr(white, c) != NULL))
-                return 1;
-        return 0;
-}
-
-int udev_encode_string(const char *str, char *str_enc, size_t len) {
-        size_t i, j;
-
-        if (str == NULL || str_enc == NULL)
-                return -1;
-
-        for (i = 0, j = 0; str[i] != '\0'; i++) {
-                int seqlen;
-
-                seqlen = utf8_encoded_valid_unichar(&str[i]);
-                if (seqlen > 1) {
-                        if (len-j < (size_t)seqlen)
-                                goto err;
-                        memcpy(&str_enc[j], &str[i], seqlen);
-                        j += seqlen;
-                        i += (seqlen-1);
-                } else if (str[i] == '\\' || !is_utf8_encoding_whitelisted(str[i], NULL)) {
-                        if (len-j < 4)
-                                goto err;
-                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
-                        j += 4;
-                } else {
-                        if (len-j < 1)
-                                goto err;
-                        str_enc[j] = str[i];
-                        j++;
-                }
-        }
-        if (len-j < 1)
-                goto err;
-        str_enc[j] = '\0';
-        return 0;
-err:
-        return -1;
-}
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index 22e1346..96a03ea 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -35,5 +35,3 @@ char *ascii_filter(const char *s);
 char *utf16_to_utf8(const void *s, size_t length);
 
 int utf8_encoded_valid_unichar(const char *str);
-int is_utf8_encoding_whitelisted(char c, const char *white);
-int udev_encode_string(const char *str, char *str_enc, size_t len);
diff --git a/src/shared/util.c b/src/shared/util.c
index 2b76a5c..2009553 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -73,7 +73,7 @@
 #include "hashmap.h"
 #include "env-util.h"
 #include "fileio.h"
-#include "utf8.h"
+#include "device-nodes.h"
 
 int saved_argc = 0;
 char **saved_argv = NULL;
@@ -3509,7 +3509,7 @@ static char *tag_to_udev_node(const char *tagvalue, const char *by) {
         if (t == NULL)
                 return NULL;
 
-        if (udev_encode_string(u, t, enc_len) < 0)
+        if (encode_devnode_name(u, t, enc_len) < 0)
                 return NULL;
 
         if (asprintf(&dn, "/dev/disk/by-%s/%s", by, t) < 0)
diff --git a/src/test/test-device-nodes.c b/src/test/test-device-nodes.c
new file mode 100644
index 0000000..2f3dedb
--- /dev/null
+++ b/src/test/test-device-nodes.c
@@ -0,0 +1,55 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Dave Reisner
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+
+#include "device-nodes.h"
+#include "util.h"
+
+/* helpers for test_encode_devnode_name */
+static char *do_encode_string(const char *in) {
+        size_t out_len = strlen(in) * 4;
+        char *out = malloc(out_len);
+
+        assert_se(out);
+        assert_se(encode_devnode_name(in, out, out_len) >= 0);
+        puts(out);
+
+        return out;
+}
+
+static bool expect_encoded_as(const char *in, const char *expected) {
+        _cleanup_free_ char *encoded = do_encode_string(in);
+        return streq(encoded, expected);
+}
+
+static void test_encode_devnode_name(void) {
+        assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
+        assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
+        assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
+        assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
+}
+
+int main(int argc, char *argv[]) {
+        test_encode_devnode_name();
+
+        return 0;
+}
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index 26cc37b..b5a833e 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -19,34 +19,9 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
-
 #include "utf8.h"
 #include "util.h"
 
-/* helpers for test_udev_encode_string */
-static char *do_encode_string(const char *in) {
-        size_t out_len = strlen(in) * 4;
-        char *out = malloc(out_len);
-
-        assert_se(out);
-        assert_se(udev_encode_string(in, out, out_len) >= 0);
-        puts(out);
-
-        return out;
-}
-
-static bool expect_encoded_as(const char *in, const char *expected) {
-        _cleanup_free_ char *encoded = do_encode_string(in);
-        return streq(encoded, expected);
-}
-
-static void test_udev_encode_string(void) {
-        assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
-        assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
-        assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
-        assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
-}
-
 static void test_utf8_is_printable(void) {
         assert_se(utf8_is_printable("ascii is valid\tunicode", 22));
         assert_se(utf8_is_printable("\342\204\242", 3));
@@ -55,14 +30,13 @@ static void test_utf8_is_printable(void) {
 
 static void test_utf8_is_valid(void) {
         assert_se(utf8_is_valid("ascii is valid unicode"));
-        assert_se(utf8_is_valid("\341\204\242"));
+        assert_se(utf8_is_valid("\342\204\242"));
         assert_se(!utf8_is_valid("\341\204"));
 }
 
 int main(int argc, char *argv[]) {
         test_utf8_is_valid();
         test_utf8_is_printable();
-        test_udev_encode_string();
 
         return 0;
 }

commit 7991ac34ab08421415b907e42775c5539a4a5bbb
Author: Dave Reisner <dreisner at archlinux.org>
Date:   Wed Sep 18 11:52:14 2013 -0400

    shared/utf8: merge implementations, remove cruft
    
    This unifies the utf8 handling code which was previously duplicated in
    udev and systemd.

diff --git a/TODO b/TODO
index c0f51de..01bc993 100644
--- a/TODO
+++ b/TODO
@@ -602,7 +602,6 @@ Features:
 * udev:
   - remove src/udev/udev-builtin-firmware.c (CONFIG_FW_LOADER_USER_HELPER=n)
   - move to LGPL
-  - unify utf8 validator code with shared/
   - kill scsi_id
   - add trigger --subsystem-match=usb/usb_device device
 
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 1a68394..732f0f0 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -51,8 +51,6 @@
 #include "utf8.h"
 #include "util.h"
 
-#define FILTER_CHAR '_'
-
 static inline bool is_unicode_valid(uint32_t ch) {
 
         if (ch >= 0x110000) /* End of unicode space */
@@ -67,17 +65,6 @@ static inline bool is_unicode_valid(uint32_t ch) {
         return true;
 }
 
-static inline bool is_continuation_char(uint8_t ch) {
-        if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
-                return false;
-        return true;
-}
-
-static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
-        *u_ch <<= 6;
-        *u_ch |= ch & 0x3f;
-}
-
 static bool is_unicode_control(uint32_t ch) {
 
         /*
@@ -90,163 +77,97 @@ static bool is_unicode_control(uint32_t ch) {
                 (0x7F <= ch && ch <= 0x9F);
 }
 
-bool utf8_is_printable(const char* str, size_t length) {
-        uint32_t val = 0;
-        uint32_t min = 0;
-        const uint8_t *p;
+/* count of characters used to encode one unicode char */
+static int utf8_encoded_expected_len(const char *str) {
+        unsigned char c = (unsigned char)str[0];
 
-        assert(str);
+        if (c < 0x80)
+                return 1;
+        if ((c & 0xe0) == 0xc0)
+                return 2;
+        if ((c & 0xf0) == 0xe0)
+                return 3;
+        if ((c & 0xf8) == 0xf0)
+                return 4;
+        if ((c & 0xfc) == 0xf8)
+                return 5;
+        if ((c & 0xfe) == 0xfc)
+                return 6;
+        return 0;
+}
 
-        for (p = (const uint8_t*) str; length; p++, length--) {
-                if (*p < 128) {
-                        val = *p;
-                } else {
-                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
-                                min = 128;
-                                val = (uint32_t) (*p & 0x1e);
-                                goto ONE_REMAINING;
-                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
-                                min = (1 << 11);
-                                val = (uint32_t) (*p & 0x0f);
-                                goto TWO_REMAINING;
-                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
-                                min = (1 << 16);
-                                val = (uint32_t) (*p & 0x07);
-                        } else
-                                return false;
-
-                        p++;
-                        length--;
-                        if (!length || !is_continuation_char(*p))
-                                return false;
-                        merge_continuation_char(&val, *p);
-
-                TWO_REMAINING:
-                        p++;
-                        length--;
-                        if (!is_continuation_char(*p))
-                                return false;
-                        merge_continuation_char(&val, *p);
-
-                ONE_REMAINING:
-                        p++;
-                        length--;
-                        if (!is_continuation_char(*p))
-                                return false;
-                        merge_continuation_char(&val, *p);
-
-                        if (val < min)
-                                return false;
-                }
+/* decode one unicode char */
+static int utf8_encoded_to_unichar(const char *str) {
+        int unichar;
+        int len;
+        int i;
 
-                if (is_unicode_control(val))
-                        return false;
+        len = utf8_encoded_expected_len(str);
+        switch (len) {
+        case 1:
+                return (int)str[0];
+        case 2:
+                unichar = str[0] & 0x1f;
+                break;
+        case 3:
+                unichar = (int)str[0] & 0x0f;
+                break;
+        case 4:
+                unichar = (int)str[0] & 0x07;
+                break;
+        case 5:
+                unichar = (int)str[0] & 0x03;
+                break;
+        case 6:
+                unichar = (int)str[0] & 0x01;
+                break;
+        default:
+                return -1;
         }
 
-        return true;
+        for (i = 1; i < len; i++) {
+                if (((int)str[i] & 0xc0) != 0x80)
+                        return -1;
+                unichar <<= 6;
+                unichar |= (int)str[i] & 0x3f;
+        }
+
+        return unichar;
 }
 
-static char* utf8_validate(const char *str, char *output) {
-        uint32_t val = 0;
-        uint32_t min = 0;
-        const uint8_t *p, *last;
-        int size;
-        uint8_t *o;
+bool utf8_is_printable(const char* str, size_t length) {
+        const uint8_t *p;
 
         assert(str);
 
-        o = (uint8_t*) output;
-        for (p = (const uint8_t*) str; *p; p++) {
-                if (*p < 128) {
-                        if (o)
-                                *o = *p;
-                } else {
-                        last = p;
-
-                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
-                                size = 2;
-                                min = 128;
-                                val = (uint32_t) (*p & 0x1e);
-                                goto ONE_REMAINING;
-                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
-                                size = 3;
-                                min = (1 << 11);
-                                val = (uint32_t) (*p & 0x0f);
-                                goto TWO_REMAINING;
-                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
-                                size = 4;
-                                min = (1 << 16);
-                                val = (uint32_t) (*p & 0x07);
-                        } else
-                                goto error;
-
-                        p++;
-                        if (!is_continuation_char(*p))
-                                goto error;
-                        merge_continuation_char(&val, *p);
-
-                TWO_REMAINING:
-                        p++;
-                        if (!is_continuation_char(*p))
-                                goto error;
-                        merge_continuation_char(&val, *p);
-
-                ONE_REMAINING:
-                        p++;
-                        if (!is_continuation_char(*p))
-                                goto error;
-                        merge_continuation_char(&val, *p);
-
-                        if (val < min)
-                                goto error;
-
-                        if (!is_unicode_valid(val))
-                                goto error;
-
-                        if (o) {
-                                memcpy(o, last, (size_t) size);
-                                o += size;
-                        }
-
-                        continue;
-
-                error:
-                        if (o) {
-                                *o = FILTER_CHAR;
-                                p = last; /* We retry at the next character */
-                        } else
-                                goto failure;
-                }
+        for (p = (const uint8_t*) str; length; p++) {
+                int encoded_len = utf8_encoded_valid_unichar((const char *)p);
+                int32_t val = utf8_encoded_to_unichar((const char*)p);
 
-                if (o)
-                        o++;
-        }
+                if (encoded_len < 0 || val < 0 || is_unicode_control(val))
+                        return false;
 
-        if (o) {
-                *o = '\0';
-                return output;
+                length -= encoded_len;
         }
 
-        return (char*) str;
-
-failure:
-        return NULL;
-}
-
-char* utf8_is_valid (const char *str) {
-        return utf8_validate(str, NULL);
+        return true;
 }
 
-char* utf8_filter (const char *str) {
-        char *new_str;
+const char *utf8_is_valid(const char *str) {
+        const uint8_t *p;
 
         assert(str);
 
-        new_str = malloc(strlen(str) + 1);
-        if (!new_str)
-                return NULL;
+        for (p = (const uint8_t*) str; *p; ) {
+                int len = utf8_encoded_valid_unichar((const char *)p);
+
+                if (len < 0)
+                        return NULL;
+
+                p += len;
+        }
 
-        return utf8_validate(str, new_str);
+        return str;
 }
 
 char *ascii_is_valid(const char *str) {
@@ -318,64 +239,6 @@ char *utf16_to_utf8(const void *s, size_t length) {
         return r;
 }
 
-/* count of characters used to encode one unicode char */
-static int utf8_encoded_expected_len(const char *str) {
-        unsigned char c = (unsigned char)str[0];
-
-        if (c < 0x80)
-                return 1;
-        if ((c & 0xe0) == 0xc0)
-                return 2;
-        if ((c & 0xf0) == 0xe0)
-                return 3;
-        if ((c & 0xf8) == 0xf0)
-                return 4;
-        if ((c & 0xfc) == 0xf8)
-                return 5;
-        if ((c & 0xfe) == 0xfc)
-                return 6;
-        return 0;
-}
-
-/* decode one unicode char */
-static int utf8_encoded_to_unichar(const char *str) {
-        int unichar;
-        int len;
-        int i;
-
-        len = utf8_encoded_expected_len(str);
-        switch (len) {
-        case 1:
-                return (int)str[0];
-        case 2:
-                unichar = str[0] & 0x1f;
-                break;
-        case 3:
-                unichar = (int)str[0] & 0x0f;
-                break;
-        case 4:
-                unichar = (int)str[0] & 0x07;
-                break;
-        case 5:
-                unichar = (int)str[0] & 0x03;
-                break;
-        case 6:
-                unichar = (int)str[0] & 0x01;
-                break;
-        default:
-                return -1;
-        }
-
-        for (i = 1; i < len; i++) {
-                if (((int)str[i] & 0xc0) != 0x80)
-                        return -1;
-                unichar <<= 6;
-                unichar |= (int)str[i] & 0x3f;
-        }
-
-        return unichar;
-}
-
 /* expected size used to encode one unicode char */
 static int utf8_unichar_to_encoded_len(int unichar) {
         if (unichar < 0x80)
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index 7a5608c..22e1346 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -25,12 +25,11 @@
 
 #include "macro.h"
 
-char *utf8_is_valid(const char *s) _pure_;
+const char *utf8_is_valid(const char *s) _pure_;
 char *ascii_is_valid(const char *s) _pure_;
 
 bool utf8_is_printable(const char* str, size_t length) _pure_;
 
-char *utf8_filter(const char *s);
 char *ascii_filter(const char *s);
 
 char *utf16_to_utf8(const void *s, size_t length);
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index d2b9771..26cc37b 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -47,6 +47,12 @@ static void test_udev_encode_string(void) {
         assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
 }
 
+static void test_utf8_is_printable(void) {
+        assert_se(utf8_is_printable("ascii is valid\tunicode", 22));
+        assert_se(utf8_is_printable("\342\204\242", 3));
+        assert_se(!utf8_is_printable("\341\204", 2));
+}
+
 static void test_utf8_is_valid(void) {
         assert_se(utf8_is_valid("ascii is valid unicode"));
         assert_se(utf8_is_valid("\341\204\242"));
@@ -55,5 +61,8 @@ static void test_utf8_is_valid(void) {
 
 int main(int argc, char *argv[]) {
         test_utf8_is_valid();
+        test_utf8_is_printable();
         test_udev_encode_string();
+
+        return 0;
 }



More information about the systemd-commits mailing list