[Telepathy-commits] [telepathy-idle/master] Don't split a utf-8 codepoint in half when splitting long message

Jonathon Jongsma jonathon.jongsma at collabora.co.uk
Fri Feb 13 11:54:36 PST 2009


Basic approach: find the place we would normally break the message, and walk
back to the previous utf-8 character start.
---
 src/idle-text.c |   33 ++++++++++++++++-----------------
 1 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/idle-text.c b/src/idle-text.c
index 6cd5370..5cd4321 100644
--- a/src/idle-text.c
+++ b/src/idle-text.c
@@ -54,7 +54,7 @@ gboolean idle_text_decode(const gchar *text, TpChannelTextMessageType *type, gch
 GStrv idle_text_encode_and_split(TpChannelTextMessageType type, const gchar *recipient, const gchar *text, GError **error) {
 	GPtrArray *messages;
 	const gchar *remaining_text = text;
-	gsize remaining_text_len = strlen(text);
+	const gchar * const text_end =  text + strlen(text);
 	gchar *header;
 	const gchar *footer = "";
 	gsize max_bytes;
@@ -79,37 +79,36 @@ GStrv idle_text_encode_and_split(TpChannelTextMessageType type, const gchar *rec
 	messages = g_ptr_array_new();
 	max_bytes = IRC_MSG_MAXLEN - (strlen(header) + strlen(footer));
 
-	while (remaining_text_len > 0) {
+	while (remaining_text < text_end) {
 		char *newline = strchr(remaining_text, '\n');
-		gsize bytes_to_copy;
-		gsize advance;
+		const char *end_iter;
 		char *message;
 
 		if (newline != NULL && ((unsigned) (newline - remaining_text)) < max_bytes) {
 			/* String up to the next newline is short enough. */
-			bytes_to_copy = (newline - remaining_text);
+			end_iter = newline;
 
-			/* Advance over the newline */
-			advance = bytes_to_copy + 1;
-		} else if (remaining_text_len > max_bytes) {
+		} else if ((text_end - remaining_text) > (gint) max_bytes) {
 			/* Remaining string is too long; take as many bytes as possible */
-			/* FIXME: check we're not breaking a UTF-8 code point in half */
-			bytes_to_copy = max_bytes;
-			advance = max_bytes;
+			end_iter = remaining_text + max_bytes;
+			/* make sure we don't break a UTF-8 code point in half */
+			end_iter = g_utf8_find_prev_char (remaining_text, end_iter);
 		} else {
 			/* Just take it all. */
-			bytes_to_copy = remaining_text_len;
-			advance = remaining_text_len;
+			end_iter = text_end;
 		}
 
-		message = g_strdup_printf("%s%.*s%s", header, (int)bytes_to_copy, remaining_text, footer);
+		message = g_strdup_printf("%s%.*s%s", header, (int)(end_iter - remaining_text), remaining_text, footer);
 		g_ptr_array_add(messages, message);
 
-		remaining_text += advance;
-		remaining_text_len -= advance;
+		remaining_text = end_iter;
+		if (*end_iter == '\n') {
+				/* advance over a newline */
+				remaining_text++;
+		}
 	}
 
-	g_assert (remaining_text_len == 0);
+	g_assert (remaining_text == text_end);
 
 	g_ptr_array_add(messages, NULL);
 
-- 
1.5.6.5




More information about the telepathy-commits mailing list