[Spice-devel] [PATCH spice-gtk 3/4] util: add unix2dos and dos2unix
Hans de Goede
hdegoede at redhat.com
Sat Aug 24 03:20:38 PDT 2013
Hi,
On 08/23/2013 10:25 PM, Marc-André Lureau wrote:
> Convert line endings from/to LF/CRLF, in utf8.
> ---
> gtk/spice-util-priv.h | 2 +
> gtk/spice-util.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 124 insertions(+)
>
> diff --git a/gtk/spice-util-priv.h b/gtk/spice-util-priv.h
> index ee5a42d..cc559dc 100644
> --- a/gtk/spice-util-priv.h
> +++ b/gtk/spice-util-priv.h
> @@ -29,6 +29,8 @@ gboolean spice_strv_contains(const GStrv strv, const gchar *str);
> gchar* spice_uuid_to_string(const guint8 uuid[16]);
> const gchar* spice_yes_no(gboolean value);
> guint16 spice_make_scancode(guint scancode, gboolean release);
> +gchar* spice_unix2dos(const gchar *str, gssize len, GError **error);
> +gchar* spice_dos2unix(const gchar *str, gssize len, GError **error);
>
> #if GLIB_CHECK_VERSION(2,32,0)
> #define STATIC_MUTEX GMutex
> diff --git a/gtk/spice-util.c b/gtk/spice-util.c
> index 774a145..be10edc 100644
> --- a/gtk/spice-util.c
> +++ b/gtk/spice-util.c
> @@ -19,6 +19,7 @@
> #ifdef HAVE_CONFIG_H
> # include "config.h"
> #endif
> +
> #include <stdlib.h>
> #include <string.h>
> #include <glib-object.h>
> @@ -245,3 +246,124 @@ guint16 spice_make_scancode(guint scancode, gboolean release)
>
> g_return_val_if_reached(0);
> }
> +
> +typedef enum {
> + NEWLINE_TYPE_LF,
> + NEWLINE_TYPE_CR_LF
> +} NewlineType;
> +
> +static gssize get_line(const gchar *str, gsize len,
> + NewlineType type, gsize *nl_len,
> + GError **error)
> +{
> + const gchar *p = str;
> + gsize nl = 0;
> +
> + if (type == NEWLINE_TYPE_CR_LF) {
> + while ((p - str) < len) {
> + p = g_utf8_strchr(p, len, '\r');
> + if (!p)
> + break;
> + p = g_utf8_next_char(p);
> + if (g_utf8_get_char(p) == '\n') {
> + len = (p - str) - 1;
> + nl = 2;
> + break;
> + }
> + }
> + } else {
> + p = g_utf8_strchr(str, len, '\n');
> + if (p) {
> + len = p - str;
> + nl = 1;
> + }
> + }
This looks way more complicated then it needs to be, in UTF-8
0x00 - 0x7f only are valid as a single-byte sequence. multi-byte
encoded characters will never contain 0x00 - 0x7f. UTF-8 was designed
this way, is so that existing string parsing code for non multi-byte
encodings, which make look for example for ' " = or LF characters does
not break when parsing strings with multi-byte characters in there.
TL;DR: LF and CR will never be part of a multi byte character, so
you can simple do: strstr(str, "\r\n") to find the CRLF.
> +
> + if (!g_utf8_validate(str, len, NULL)) {
> + g_set_error_literal(error, G_CONVERT_ERROR,
> + G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
> + "Invalid byte sequence in conversion input");
> + return -1;
> + }
And once you simply treat this as a regular C-string without worrying
about multi-byte encodings you can also drop this.
> +
> + *nl_len = nl;
> + return len;
> +}
> +
> +
> +static gchar* spice_convert_newlines(const gchar *str, gssize len,
> + NewlineType from,
> + NewlineType to,
> + GError **error)
> +{
> + GError *err = NULL;
> + gssize length;
> + gsize nl;
> + GString *output;
> + gboolean free_segment = FALSE;
> + gint i;
> +
> + g_return_val_if_fail(str != NULL, NULL);
> + g_return_val_if_fail(len >= -1, NULL);
> + g_return_val_if_fail(error == NULL || *error == NULL, NULL);
> + /* only 2 supported combinations */
> + g_return_val_if_fail((from == NEWLINE_TYPE_LF &&
> + to == NEWLINE_TYPE_CR_LF) ||
> + (from == NEWLINE_TYPE_CR_LF &&
> + to == NEWLINE_TYPE_LF), NULL);
> +
> + if (len == -1)
> + len = strlen(str);
> + /* sometime we get \0 terminated strings, skip that, or it fails
> + to utf8 validate line with \0 end */
> + else if (str[len] == 0)
> + len -= 1;
> +
> + /* allocate worst case, if it's small enough, we don't care much,
> + * if it's big, malloc will put us in mmap'd region, and we can
> + * over allocate.
> + */
> + output = g_string_sized_new(len * 2 + 1);
> +
> + for (i = 0; i < len; i += length + nl) {
> + length = get_line(str + i, len - i, from, &nl, error);
> + if (length < 0)
> + break;
> +
> + g_string_append_len(output, str + i, length);
> +
> + if (nl) {
> + /* let's not double \r if it's already in the line */
> + if (to == NEWLINE_TYPE_CR_LF &&
> + output->str[output->len - 1] != '\r')
> + g_string_append_c(output, '\r');
> +
> + g_string_append_c(output, '\n');
> + }
> + }
> +
> + if (err) {
> + g_propagate_error(error, err);
> + free_segment = TRUE;
> + }
> +
> + return g_string_free(output, free_segment);
> +}
> +
> +G_GNUC_INTERNAL
> +gchar* spice_dos2unix(const gchar *str, gssize len, GError **error)
> +{
> + return spice_convert_newlines(str, len,
> + NEWLINE_TYPE_CR_LF,
> + NEWLINE_TYPE_LF,
> + error);
> +}
> +
> +G_GNUC_INTERNAL
> +gchar* spice_unix2dos(const gchar *str, gssize len, GError **error)
> +{
> + return spice_convert_newlines(str, len,
> + NEWLINE_TYPE_LF,
> + NEWLINE_TYPE_CR_LF,
> + error);
> +}
>
Regards,
Hans
More information about the Spice-devel
mailing list