[systemd-commits] 2 commits - .gitignore Makefile.am src/libudev src/shared src/test

Tue Sep 17 13:31:38 PDT 2013

.gitignore                 |    2 
 Makefile.am                |   10 ++
 src/libudev/libudev-util.c |  171 ---------------------------------------------
 src/shared/utf8.c          |  151 +++++++++++++++++++++++++++++++++++++++
 src/shared/utf8.h          |    4 +
 src/shared/util.c          |   20 ++---
 src/test/test-utf8.c       |   59 +++++++++++++++
 src/test/test-util.c       |   36 +++++++++
 8 files changed, 274 insertions(+), 179 deletions(-)

New commits:
commit 22f5f6281fc25f43a85938e9dad2480b2595c471
Author: Dave Reisner <dreisner at archlinux.org>
Date:   Tue Sep 17 15:47:08 2013 -0400

    Use udev_encode_string in fstab_node_to_udev_node
    
    Resolves a longstanding bug which caused this function to wrongly
    handle (escape) valid utf8 characters.

diff --git a/src/shared/util.c b/src/shared/util.c
index f6f3b18..2b76a5c 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -73,6 +73,7 @@
 #include "hashmap.h"
 #include "env-util.h"
 #include "fileio.h"
+#include "utf8.h"
 
 int saved_argc = 0;
 char **saved_argv = NULL;
@@ -3495,26 +3496,23 @@ int signal_from_string_try_harder(const char *s) {
 }
 
 static char *tag_to_udev_node(const char *tagvalue, const char *by) {
-        char *dn, *t, *u;
-        int r;
-
-        /* FIXME: to follow udev's logic 100% we need to leave valid
-         * UTF8 chars unescaped */
+        _cleanup_free_ char *t = NULL, *u = NULL;
+        char *dn;
+        size_t enc_len;
 
         u = unquote(tagvalue, "\"\'");
         if (u == NULL)
                 return NULL;
 
-        t = xescape(u, "/ ");
-        free(u);
-
+        enc_len = strlen(u) * 4;
+        t = new(char, enc_len);
         if (t == NULL)
                 return NULL;
 
-        r = asprintf(&dn, "/dev/disk/by-%s/%s", by, t);
-        free(t);
+        if (udev_encode_string(u, t, enc_len) < 0)
+                return NULL;
 
-        if (r < 0)
+        if (asprintf(&dn, "/dev/disk/by-%s/%s", by, t) < 0)
                 return NULL;
 
         return dn;
diff --git a/src/test/test-util.c b/src/test/test-util.c
index dd7768d..ad13d53 100644
--- a/src/test/test-util.c
+++ b/src/test/test-util.c
@@ -547,6 +547,41 @@ static void test_split_pair(void) {
         assert_se(streq(b, "="));
 }
 
+static void test_fstab_node_to_udev_node(void) {
+        char *n;
+
+        n = fstab_node_to_udev_node("LABEL=applÃ©/jack");
+        puts(n);
+        assert_se(streq(n, "/dev/disk/by-label/applÃ©\\x2fjack"));
+        free(n);
+
+        n = fstab_node_to_udev_node("PARTLABEL=pinkiÃ© pie");
+        puts(n);
+        assert_se(streq(n, "/dev/disk/by-partlabel/pinkiÃ©\\x20pie"));
+        free(n);
+
+        n = fstab_node_to_udev_node("UUID=037b9d94-148e-4ee4-8d38-67bfe15bb535");
+        puts(n);
+        assert_se(streq(n, "/dev/disk/by-uuid/037b9d94-148e-4ee4-8d38-67bfe15bb535"));
+        free(n);
+
+        n = fstab_node_to_udev_node("PARTUUID=037b9d94-148e-4ee4-8d38-67bfe15bb535");
+        puts(n);
+        assert_se(streq(n, "/dev/disk/by-partuuid/037b9d94-148e-4ee4-8d38-67bfe15bb535"));
+        free(n);
+
+
+        n = fstab_node_to_udev_node("PONIES=awesome");
+        puts(n);
+        assert_se(streq(n, "PONIES=awesome"));
+        free(n);
+
+        n = fstab_node_to_udev_node("/dev/xda1");
+        puts(n);
+        assert_se(streq(n, "/dev/xda1"));
+        free(n);
+}
+
 int main(int argc, char *argv[]) {
         test_streq_ptr();
         test_first_word();
@@ -582,6 +617,7 @@ int main(int argc, char *argv[]) {
         test_strrep();
         test_parse_user_at_host();
         test_split_pair();
+        test_fstab_node_to_udev_node();
 
         return 0;
 }

commit 02a36bc9a1769c744f141f127898dbe076dbfd96
Author: Dave Reisner <dreisner at archlinux.org>
Date:   Tue Sep 17 15:39:09 2013 -0400

    move utf8 functions from libudev-private.h to utf8.h
    
    There's now some more obvious overlap amongst the two utf8 validation
    functions, but no more than there already was previously.
    
    This also adds some menial tests for anyone who wants to do more
    merging of these two in the future.

diff --git a/.gitignore b/.gitignore
index 61bc2a3..deeee53 100644
--- a/.gitignore
+++ b/.gitignore
@@ -124,6 +124,7 @@
 /test-list
 /test-log
 /test-login
+/test-login-shared
 /test-loopback
 /test-mmap-cache
 /test-ns
@@ -143,6 +144,7 @@
 /test-udev
 /test-unit-file
 /test-unit-name
+/test-utf8
 /test-util
 /test-watchdog
 /timedatectl
diff --git a/Makefile.am b/Makefile.am
index af12fa5..49c1a1d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1123,6 +1123,7 @@ tests += \
 	test-strxcpyx \
 	test-unit-name \
 	test-unit-file \
+	test-utf8 \
 	test-util \
 	test-date \
 	test-sleep \
@@ -1225,6 +1226,15 @@ test_unit_file_CFLAGS = \
 test_unit_file_LDADD = \
 	libsystemd-core.la
 
+test_utf8_SOURCES = \
+	src/test/test-utf8.c
+
+test_utf8_CFLAGS = \
+	$(AM_CFLAGS)
+
+test_utf8_LDADD = \
+	libsystemd-shared.la
+
 test_util_SOURCES = \
 	src/test/test-util.c
 
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
index 714dc50..d54430c 100644
--- a/src/libudev/libudev-util.c
+++ b/src/libudev/libudev-util.c
@@ -34,6 +34,7 @@
 
 #include "libudev.h"
 #include "libudev-private.h"
+#include "utf8.h"
 
 /**
  * SECTION:libudev-util
@@ -306,129 +307,6 @@ void util_remove_trailing_chars(char *path, char c)
                 path[--len] = '\0';
 }
 
-/* count of characters used to encode one unicode char */
-static int utf8_encoded_expected_len(const char *str)
-{
-        unsigned char c = (unsigned char)str[0];
-
-        if (c < 0x80)
-                return 1;
-        if ((c & 0xe0) == 0xc0)
-                return 2;
-        if ((c & 0xf0) == 0xe0)
-                return 3;
-        if ((c & 0xf8) == 0xf0)
-                return 4;
-        if ((c & 0xfc) == 0xf8)
-                return 5;
-        if ((c & 0xfe) == 0xfc)
-                return 6;
-        return 0;
-}
-
-/* decode one unicode char */
-static int utf8_encoded_to_unichar(const char *str)
-{
-        int unichar;
-        int len;
-        int i;
-
-        len = utf8_encoded_expected_len(str);
-        switch (len) {
-        case 1:
-                return (int)str[0];
-        case 2:
-                unichar = str[0] & 0x1f;
-                break;
-        case 3:
-                unichar = (int)str[0] & 0x0f;
-                break;
-        case 4:
-                unichar = (int)str[0] & 0x07;
-                break;
-        case 5:
-                unichar = (int)str[0] & 0x03;
-                break;
-        case 6:
-                unichar = (int)str[0] & 0x01;
-                break;
-        default:
-                return -1;
-        }
-
-        for (i = 1; i < len; i++) {
-                if (((int)str[i] & 0xc0) != 0x80)
-                        return -1;
-                unichar <<= 6;
-                unichar |= (int)str[i] & 0x3f;
-        }
-
-        return unichar;
-}
-
-/* expected size used to encode one unicode char */
-static int utf8_unichar_to_encoded_len(int unichar)
-{
-        if (unichar < 0x80)
-                return 1;
-        if (unichar < 0x800)
-                return 2;
-        if (unichar < 0x10000)
-                return 3;
-        if (unichar < 0x200000)
-                return 4;
-        if (unichar < 0x4000000)
-                return 5;
-        return 6;
-}
-
-/* check if unicode char has a valid numeric range */
-static int utf8_unichar_valid_range(int unichar)
-{
-        if (unichar > 0x10ffff)
-                return 0;
-        if ((unichar & 0xfffff800) == 0xd800)
-                return 0;
-        if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
-                return 0;
-        if ((unichar & 0xffff) == 0xffff)
-                return 0;
-        return 1;
-}
-
-/* validate one encoded unicode char and return its length */
-static int utf8_encoded_valid_unichar(const char *str)
-{
-        int len;
-        int unichar;
-        int i;
-
-        len = utf8_encoded_expected_len(str);
-        if (len == 0)
-                return -1;
-
-        /* ascii is valid */
-        if (len == 1)
-                return 1;
-
-        /* check if expected encoded chars are available */
-        for (i = 0; i < len; i++)
-                if ((str[i] & 0x80) != 0x80)
-                        return -1;
-
-        unichar = utf8_encoded_to_unichar(str);
-
-        /* check if encoded length matches encoded value */
-        if (utf8_unichar_to_encoded_len(unichar) != len)
-                return -1;
-
-        /* check if value has valid range */
-        if (!utf8_unichar_valid_range(unichar))
-                return -1;
-
-        return len;
-}
-
 int util_replace_whitespace(const char *str, char *to, size_t len)
 {
         size_t i, j;
@@ -457,17 +335,6 @@ int util_replace_whitespace(const char *str, char *to, size_t len)
         return 0;
 }
 
-static int is_whitelisted(char c, const char *white)
-{
-        if ((c >= '0' && c <= '9') ||
-            (c >= 'A' && c <= 'Z') ||
-            (c >= 'a' && c <= 'z') ||
-            strchr("#+-.:=@_", c) != NULL ||
-            (white != NULL && strchr(white, c) != NULL))
-                return 1;
-        return 0;
-}
-
 /* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
 int util_replace_chars(char *str, const char *white)
 {
@@ -477,7 +344,7 @@ int util_replace_chars(char *str, const char *white)
         while (str[i] != '\0') {
                 int len;
 
-                if (is_whitelisted(str[i], white)) {
+                if (is_utf8_encoding_whitelisted(str[i], white)) {
                         i++;
                         continue;
                 }
@@ -525,39 +392,7 @@ int util_replace_chars(char *str, const char *white)
  **/
 _public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len)
 {
-        size_t i, j;
-
-        if (str == NULL || str_enc == NULL)
-                return -1;
-
-        for (i = 0, j = 0; str[i] != '\0'; i++) {
-                int seqlen;
-
-                seqlen = utf8_encoded_valid_unichar(&str[i]);
-                if (seqlen > 1) {
-                        if (len-j < (size_t)seqlen)
-                                goto err;
-                        memcpy(&str_enc[j], &str[i], seqlen);
-                        j += seqlen;
-                        i += (seqlen-1);
-                } else if (str[i] == '\\' || !is_whitelisted(str[i], NULL)) {
-                        if (len-j < 4)
-                                goto err;
-                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
-                        j += 4;
-                } else {
-                        if (len-j < 1)
-                                goto err;
-                        str_enc[j] = str[i];
-                        j++;
-                }
-        }
-        if (len-j < 1)
-                goto err;
-        str_enc[j] = '\0';
-        return 0;
-err:
-        return -1;
+        return udev_encode_string(str, str_enc, len);
 }
 
 /*
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 655cc77..1a68394 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -317,3 +317,154 @@ char *utf16_to_utf8(const void *s, size_t length) {
 
         return r;
 }
+
+/* count of characters used to encode one unicode char */
+static int utf8_encoded_expected_len(const char *str) {
+        unsigned char c = (unsigned char)str[0];
+
+        if (c < 0x80)
+                return 1;
+        if ((c & 0xe0) == 0xc0)
+                return 2;
+        if ((c & 0xf0) == 0xe0)
+                return 3;
+        if ((c & 0xf8) == 0xf0)
+                return 4;
+        if ((c & 0xfc) == 0xf8)
+                return 5;
+        if ((c & 0xfe) == 0xfc)
+                return 6;
+        return 0;
+}
+
+/* decode one unicode char */
+static int utf8_encoded_to_unichar(const char *str) {
+        int unichar;
+        int len;
+        int i;
+
+        len = utf8_encoded_expected_len(str);
+        switch (len) {
+        case 1:
+                return (int)str[0];
+        case 2:
+                unichar = str[0] & 0x1f;
+                break;
+        case 3:
+                unichar = (int)str[0] & 0x0f;
+                break;
+        case 4:
+                unichar = (int)str[0] & 0x07;
+                break;
+        case 5:
+                unichar = (int)str[0] & 0x03;
+                break;
+        case 6:
+                unichar = (int)str[0] & 0x01;
+                break;
+        default:
+                return -1;
+        }
+
+        for (i = 1; i < len; i++) {
+                if (((int)str[i] & 0xc0) != 0x80)
+                        return -1;
+                unichar <<= 6;
+                unichar |= (int)str[i] & 0x3f;
+        }
+
+        return unichar;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(int unichar) {
+        if (unichar < 0x80)
+                return 1;
+        if (unichar < 0x800)
+                return 2;
+        if (unichar < 0x10000)
+                return 3;
+        if (unichar < 0x200000)
+                return 4;
+        if (unichar < 0x4000000)
+                return 5;
+        return 6;
+}
+
+/* validate one encoded unicode char and return its length */
+int utf8_encoded_valid_unichar(const char *str) {
+        int len;
+        int unichar;
+        int i;
+
+        len = utf8_encoded_expected_len(str);
+        if (len == 0)
+                return -1;
+
+        /* ascii is valid */
+        if (len == 1)
+                return 1;
+
+        /* check if expected encoded chars are available */
+        for (i = 0; i < len; i++)
+                if ((str[i] & 0x80) != 0x80)
+                        return -1;
+
+        unichar = utf8_encoded_to_unichar(str);
+
+        /* check if encoded length matches encoded value */
+        if (utf8_unichar_to_encoded_len(unichar) != len)
+                return -1;
+
+        /* check if value has valid range */
+        if (!is_unicode_valid(unichar))
+                return -1;
+
+        return len;
+}
+
+int is_utf8_encoding_whitelisted(char c, const char *white) {
+        if ((c >= '0' && c <= '9') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c >= 'a' && c <= 'z') ||
+            strchr("#+-.:=@_", c) != NULL ||
+            (white != NULL && strchr(white, c) != NULL))
+                return 1;
+        return 0;
+}
+
+int udev_encode_string(const char *str, char *str_enc, size_t len) {
+        size_t i, j;
+
+        if (str == NULL || str_enc == NULL)
+                return -1;
+
+        for (i = 0, j = 0; str[i] != '\0'; i++) {
+                int seqlen;
+
+                seqlen = utf8_encoded_valid_unichar(&str[i]);
+                if (seqlen > 1) {
+                        if (len-j < (size_t)seqlen)
+                                goto err;
+                        memcpy(&str_enc[j], &str[i], seqlen);
+                        j += seqlen;
+                        i += (seqlen-1);
+                } else if (str[i] == '\\' || !is_utf8_encoding_whitelisted(str[i], NULL)) {
+                        if (len-j < 4)
+                                goto err;
+                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+                        j += 4;
+                } else {
+                        if (len-j < 1)
+                                goto err;
+                        str_enc[j] = str[i];
+                        j++;
+                }
+        }
+        if (len-j < 1)
+                goto err;
+        str_enc[j] = '\0';
+        return 0;
+err:
+        return -1;
+}
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index f805ea6..7a5608c 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -34,3 +34,7 @@ char *utf8_filter(const char *s);
 char *ascii_filter(const char *s);
 
 char *utf16_to_utf8(const void *s, size_t length);
+
+int utf8_encoded_valid_unichar(const char *str);
+int is_utf8_encoding_whitelisted(char c, const char *white);
+int udev_encode_string(const char *str, char *str_enc, size_t len);
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
new file mode 100644
index 0000000..d2b9771
--- /dev/null
+++ b/src/test/test-utf8.c
@@ -0,0 +1,59 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Dave Reisner
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+
+#include "utf8.h"
+#include "util.h"
+
+/* helpers for test_udev_encode_string */
+static char *do_encode_string(const char *in) {
+        size_t out_len = strlen(in) * 4;
+        char *out = malloc(out_len);
+
+        assert_se(out);
+        assert_se(udev_encode_string(in, out, out_len) >= 0);
+        puts(out);
+
+        return out;
+}
+
+static bool expect_encoded_as(const char *in, const char *expected) {
+        _cleanup_free_ char *encoded = do_encode_string(in);
+        return streq(encoded, expected);
+}
+
+static void test_udev_encode_string(void) {
+        assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
+        assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
+        assert_se(expect_encoded_as("valÃd\\Å³tf8", "valÃd\\x5cÅ³tf8"));
+        assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
+}
+
+static void test_utf8_is_valid(void) {
+        assert_se(utf8_is_valid("ascii is valid unicode"));
+        assert_se(utf8_is_valid("\341\204\242"));
+        assert_se(!utf8_is_valid("\341\204"));
+}
+
+int main(int argc, char *argv[]) {
+        test_utf8_is_valid();
+        test_udev_encode_string();
+}