[systemd-devel] [PATCH v2 1/2] utf8: intruduce utf8_escape_non_printable
WaLyong Cho
walyong.cho at samsung.com
Sun Nov 2 22:00:00 PST 2014
---
src/shared/utf8.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++
src/shared/utf8.h | 1 +
src/test/test-utf8.c | 30 ++++++++++++++++++
3 files changed, 118 insertions(+)
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 9353559..5245604 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -210,6 +210,93 @@ char *utf8_escape_invalid(const char *str) {
return p;
}
+char *utf8_escape_non_printable(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str);
+ if (len > 0) {
+ if (utf8_is_printable(str, len)) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ switch (*str) {
+
+ case '\a':
+ *(s++) = '\\';
+ *(s++) = 'a';
+ break;
+ case '\b':
+ *(s++) = '\\';
+ *(s++) = 'b';
+ break;
+ case '\f':
+ *(s++) = '\\';
+ *(s++) = 'f';
+ break;
+ case '\n':
+ *(s++) = '\\';
+ *(s++) = 'n';
+ break;
+ case '\r':
+ *(s++) = '\\';
+ *(s++) = 'r';
+ break;
+ case '\t':
+ *(s++) = '\\';
+ *(s++) = 't';
+ break;
+ case '\v':
+ *(s++) = '\\';
+ *(s++) = 'v';
+ break;
+ case '\\':
+ *(s++) = '\\';
+ *(s++) = '\\';
+ break;
+ case '"':
+ *(s++) = '\\';
+ *(s++) = '"';
+ break;
+ case '\'':
+ *(s++) = '\\';
+ *(s++) = '\'';
+ break;
+
+ default:
+ /* For special chars we prefer octal over
+ * hexadecimal encoding, simply because glib's
+ * g_strescape() does the same */
+ if ((*str < ' ') || (*str >= 127)) {
+ *(s++) = '\\';
+ *(s++) = octchar((unsigned char) *str >> 6);
+ *(s++) = octchar((unsigned char) *str >> 3);
+ *(s++) = octchar((unsigned char) *str);
+ } else
+ *(s++) = *str;
+ break;
+ }
+ str += 1;
+ }
+ } else {
+ s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+
+ return p;
+}
+
char *ascii_is_valid(const char *str) {
const char *p;
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index c087995..1fe1a35 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -30,6 +30,7 @@
const char *utf8_is_valid(const char *s) _pure_;
char *ascii_is_valid(const char *s) _pure_;
char *utf8_escape_invalid(const char *s);
+char *utf8_escape_non_printable(const char *str);
bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_;
_pure_ static inline bool utf8_is_printable(const char* str, size_t length) {
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index b7d988f..fb27fe5 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -66,12 +66,42 @@ static void test_utf8_escaping(void) {
assert_se(utf8_is_valid(p3));
}
+static void test_utf8_escaping_printable(void) {
+ _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6;
+ char c[10];
+
+ p1 = utf8_escape_non_printable("goo goo goo");
+ puts(p1);
+ assert_se(utf8_is_valid(p1));
+
+ p2 = utf8_escape_non_printable("\341\204\341\204");
+ puts(p2);
+ assert_se(utf8_is_valid(p2));
+
+ p3 = utf8_escape_non_printable("\341\204");
+ puts(p3);
+ assert_se(utf8_is_valid(p3));
+
+ p4 = utf8_escape_non_printable("ąę");
+ puts(p4);
+ assert_se(utf8_is_valid(p4));
+
+ p5 = utf8_escape_non_printable("가너도루");
+ puts(p5);
+ assert_se(utf8_is_valid(p5));
+
+ p6 = utf8_escape_non_printable("\001 \019\a");
+ puts(p6);
+ assert_se(utf8_is_valid(p6));
+}
+
int main(int argc, char *argv[]) {
test_utf8_is_valid();
test_utf8_is_printable();
test_ascii_is_valid();
test_utf8_encoded_valid_unichar();
test_utf8_escaping();
+ test_utf8_escaping_printable();
return 0;
}
--
1.9.3
More information about the systemd-devel
mailing list