[poppler] cpp/poppler-global.cpp

Albert Astals Cid aacid at kemper.freedesktop.org
Thu Aug 27 13:38:56 PDT 2015


 cpp/poppler-global.cpp |   17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

New commits:
commit e5511b58e732f921c65e366fb4d221371b95d905
Author: Hans-Peter Deifel <hpdeifel at gmx.de>
Date:   Thu Aug 27 22:38:08 2015 +0200

    cpp: Fix utf8/utf16 conversion
    
    The old code assumed that ustring::size() would return the number of
    bytes in ustring, but it really returns the number of characters. Since
    ustring is a basic_string<unsigned short>, these two values differ (by a
    factor of two).
    
    This needs to be considered when using iconv, since it operates on byte
    counts, not character counts.
    
    Bug #91644

diff --git a/cpp/poppler-global.cpp b/cpp/poppler-global.cpp
index 525dc99..b99259f 100644
--- a/cpp/poppler-global.cpp
+++ b/cpp/poppler-global.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2009-2010, Pino Toscano <pino at kde.org>
  * Copyright (C) 2010, Hib Eris <hib at hiberis.nl>
- * Copyright (C) 2014, Hans-Peter Deifel <hpdeifel at gmx.de>
+ * Copyright (C) 2014, 2015 Hans-Peter Deifel <hpdeifel at gmx.de>
  * Copyright (C) 2015, Tamas Szekeres <szekerest at gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -225,9 +225,9 @@ byte_array ustring::to_utf8() const
         return byte_array();
     }
     const value_type *me_data = data();
-    byte_array str(size());
+    byte_array str(size()*sizeof(value_type));
     char *str_data = &str[0];
-    size_t me_len_char = size();
+    size_t me_len_char = size()*sizeof(value_type);
     size_t str_len_left = str.size();
     size_t ir = iconv(ic, (ICONV_CONST char **)&me_data, &me_len_char, &str_data, &str_len_left);
     if ((ir == (size_t)-1) && (errno == E2BIG)) {
@@ -273,23 +273,24 @@ ustring ustring::from_utf8(const char *str, int len)
         return ustring();
     }
 
-    ustring ret(len * 2, 0);
+    // +1, because iconv inserts byte order marks
+    ustring ret(len+1, 0);
     char *ret_data = reinterpret_cast<char *>(&ret[0]);
     char *str_data = const_cast<char *>(str);
     size_t str_len_char = len;
-    size_t ret_len_left = ret.size();
+    size_t ret_len_left = ret.size() * sizeof(ustring::value_type);
     size_t ir = iconv(ic, (ICONV_CONST char **)&str_data, &str_len_char, &ret_data, &ret_len_left);
     if ((ir == (size_t)-1) && (errno == E2BIG)) {
         const size_t delta = ret_data - reinterpret_cast<char *>(&ret[0]);
-        ret_len_left += ret.size();
+        ret_len_left += ret.size()*sizeof(ustring::value_type);
         ret.resize(ret.size() * 2);
-        ret_data = reinterpret_cast<char *>(&ret[delta]);
+        ret_data = reinterpret_cast<char *>(&ret[0]) + delta;
         ir = iconv(ic, (ICONV_CONST char **)&str_data, &str_len_char, &ret_data, &ret_len_left);
         if (ir == (size_t)-1) {
             return ustring();
         }
     }
-    ret.resize(ret.size() - ret_len_left);
+    ret.resize(ret.size() - ret_len_left/sizeof(ustring::value_type));
 
     return ret;
 }


More information about the poppler mailing list