[systemd-devel] [PATCH] util, utf8: recognize wide characters in wellipsize_mem()
Shawn Landden
shawn at churchofgit.com
Wed Aug 28 15:55:22 PDT 2013
---
src/shared/utf8.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/shared/utf8.h | 4 +++-
src/shared/util.c | 19 ++++++++++++++---
src/shared/util.h | 1 +
4 files changed, 84 insertions(+), 4 deletions(-)
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 8a37c3a..607f0c1 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -372,4 +372,68 @@ utf8_get_char (const char *p)
UTF8_GET (result, p, i, mask, len);
return result;
+}
+
+struct Interval
+{
+ unichar start, end;
+};
+
+static int
+interval_compare (const void *key, const void *elt)
+{
+ unichar c = (unichar) (long) (key);
+ struct Interval *interval = (struct Interval *)elt;
+
+ if (c < interval->start)
+ return -1;
+ if (c > interval->end)
+ return +1;
+
+ return 0;
+}
+
+/*
+ * NOTE:
+ *
+ * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
+ * generated from the Unicode Character Database's file
+ * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
+ * in this way:
+ *
+ * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
+ *
+ * Last update for Unicode 6.0.
+ */
+
+/**
+ * g_unichar_iswide:
+ * @c: a Unicode character
+ *
+ * Determines if a character is typically rendered in a double-width
+ * cell.
+ *
+ * Return value: %TRUE if the character is wide
+ **/
+bool
+unichar_iswide (unichar c)
+{
+ /* See NOTE earlier for how to update this table. */
+ static const struct Interval wide[] = {
+ {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
+ {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
+ {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
+ {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
+ {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
+ {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
+ {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, {0x1F240,
+ 0x1F248}, {0x1F250, 0x1F251}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}
+ };
+
+ if (bsearch ((long)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
+ interval_compare))
+ return true;
+
+ return false;
}
\ No newline at end of file
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index 020bc27..f1be180 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -131,4 +131,6 @@ static const char utf8_skip_data[256] = {
* Before using this macro, use g_utf8_validate() to validate strings
* that may contain invalid UTF-8.
*/
-#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const char *)(p)])
\ No newline at end of file
+#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const char *)(p)])
+
+bool unichar_iswide (unichar c);
\ No newline at end of file
diff --git a/src/shared/util.c b/src/shared/util.c
index 58a1787..1c73b3e 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -3357,22 +3357,35 @@ char *wellipsize_mem(const char *s, size_t old_length, size_t new_length, unsign
if (x > new_length - 3)
x = new_length - 3;
- for (i = (char *)s;k < x;i = utf8_next_char(i))
+ for (i = (char *)s;k < x;i = utf8_next_char(i)) {
+ c = utf8_get_char(i);
k++;
+ if (unichar_iswide(c))
+ k++;
+ }
+
+ if (k > x) /* last character was wide and went over quota */
+ x++;
j = i - s;
memcpy(e, s, j);
- e[j] = '.'; /* TODO: use … tri-dot? */
- e[j+1] = '.'; /* 0xe2 0x80 0xa6 */
+ e[j] = '.'; /* TODO: use … tri-dot? */
+ e[j+1] = '.'; /* 0xE2 0x80 0xA6 */
e[j+2] = '.';
k = 0;
for (i = (char *)s + old_length;
k < new_length - x - 3;) {
i = utf8_prev_char(i);
+ c = utf8_get_char(i);
k++;
+ if (unichar_iswide(c))
+ k++;
}
+ if (k > new_length - x - 3) /* last (reverse) character was wide and went over quota */
+ i = utf8_next_char(i);
+
strcpy(e + j + 3, i);
return e;
diff --git a/src/shared/util.h b/src/shared/util.h
index 9b17db9..97d8697 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -405,6 +405,7 @@ int running_in_chroot(void);
char *ellipsize(const char *s, size_t length, unsigned percent);
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent);
char *wellipsize(const char *s, size_t length, unsigned percent);
+ /* bytes columns */
char *wellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent);
int touch(const char *path);
--
1.8.4.rc3
More information about the systemd-devel
mailing list