[Libreoffice-commits] libvisio.git: configure.ac src/lib

Fridrich Å trba fridrich.strba at bluewin.ch
Mon Apr 1 13:58:10 PDT 2013


 configure.ac                    |   25 +
 src/lib/VSDContentCollector.cpp |  504 +++++++++++-----------------------------
 src/lib/VSDContentCollector.h   |    1 
 src/lib/VSDTypes.h              |    5 
 4 files changed, 168 insertions(+), 367 deletions(-)

New commits:
commit a655e5fcca4835a55992b3c9db792ed98bcb265a
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date:   Mon Apr 1 22:57:46 2013 +0200

    Use ICU for charset conversions

diff --git a/configure.ac b/configure.ac
index 508e034..151f581 100644
--- a/configure.ac
+++ b/configure.ac
@@ -68,6 +68,27 @@ PKG_CHECK_MODULES([LIBXML],[
 	libxml-2.0
 ])
 
+# ========
+# Find icu
+# ========
+AS_IF([test "${ICU_CFLAGS+set}" = set],[
+	ICU_CFLAGS_overriden=yes],[
+	ICU_CFLAGS_overriden=no])
+PKG_CHECK_MODULES([ICU],[icu-i18n],[
+	AS_IF([test "${ICU_CFLAGS_overriden}" = no], [
+		ICU_CFLAGS="${ICU_CFLAGS} `${PKG_CONFIG} icu-i18n --variable=CXXFLAGS`"])
+	], [
+	AC_PATH_PROG([ICU_CONFIG],[icu-config])
+	AC_MSG_CHECKING([ICU installation])
+	AS_IF([${ICU_CONFIG} --cflags >/dev/null 2>&1],[
+		ICU_CFLAGS=`${ICU_CONFIG} --cppflags-searchpath --cxxflags`
+		ICU_LIBS=`${ICU_CONFIG} --ldflags`
+		AC_MSG_RESULT([found])],[
+		AC_MSG_ERROR([libicu config program icu-config not found])])]
+)
+AC_SUBST(ICU_CFLAGS)
+AC_SUBST(ICU_LIBS)
+
 # =========
 # Find zlib
 # =========
@@ -222,8 +243,8 @@ AS_IF([test "x$native_win32" = "xyes"], [
 		CXXFLAGS="$CXXFLAGS -Wall -Wextra -Wshadow -pedantic"
 	])
 ])
-LIBVISIO_CXXFLAGS="${WPD_CFLAGS} ${WPG_CFLAGS} ${LIBXML_CFLAGS} ${ZLIB_CFLAGS}"
-LIBVISIO_LIBS="${WPD_LIBS} ${WPG_LIBS} ${LIBXML_LIBS} ${ZLIB_LIBS}"
+LIBVISIO_CXXFLAGS="${WPD_CFLAGS} ${WPG_CFLAGS} ${LIBXML_CFLAGS} ${ZLIB_CFLAGS} ${ICU_CFLAGS}"
+LIBVISIO_LIBS="${WPD_LIBS} ${WPG_LIBS} ${LIBXML_LIBS} ${ZLIB_LIBS} ${ICU_LIBS}"
 AC_SUBST(LIBVISIO_CXXFLAGS)
 AC_SUBST(LIBVISIO_LIBS)
 
diff --git a/src/lib/VSDContentCollector.cpp b/src/lib/VSDContentCollector.cpp
index 82ca61f..f90ce7b 100644
--- a/src/lib/VSDContentCollector.cpp
+++ b/src/lib/VSDContentCollector.cpp
@@ -31,6 +31,9 @@
 #include <string.h> // for memcpy
 #include <stack>
 #include <boost/spirit/include/classic.hpp>
+#include <unicode/ucnv.h>
+#include <unicode/utypes.h>
+#include <unicode/utf8.h>
 
 #include "VSDContentCollector.h"
 #include "VSDParser.h"
@@ -54,52 +57,19 @@ static unsigned bitmapId = 0;
 namespace
 {
 
-static void _appendUCS4(WPXString &text, unsigned ucs4Character)
+static void _appendUCS4(WPXString &text, UChar32 ucs4Character)
 {
-  unsigned char first;
-  int len;
-  if (ucs4Character < 0x80)
-  {
-    first = 0;
-    len = 1;
-  }
-  else if (ucs4Character < 0x800)
-  {
-    first = 0xc0;
-    len = 2;
-  }
-  else if (ucs4Character < 0x10000)
-  {
-    first = 0xe0;
-    len = 3;
-  }
-  else if (ucs4Character < 0x200000)
-  {
-    first = 0xf0;
-    len = 4;
-  }
-  else if (ucs4Character < 0x4000000)
-  {
-    first = 0xf8;
-    len = 5;
-  }
-  else
-  {
-    first = 0xfc;
-    len = 6;
-  }
+  // Convert carriage returns to new line characters
+  // Writerperfect/LibreOffice will replace them by <text:line-break>
+  if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e)
+    ucs4Character = (UChar32) '\n';
 
-  unsigned char outbuf[6] = { 0, 0, 0, 0, 0, 0 };
-  int i;
-  for (i = len - 1; i > 0; --i)
-  {
-    outbuf[i] = (ucs4Character & 0x3f) | 0x80;
-    ucs4Character >>= 6;
-  }
-  outbuf[0] = (ucs4Character & 0xff) | first;
+  unsigned char outbuf[U8_MAX_LENGTH+1];
+  int i = 0;
+  U8_APPEND_UNSAFE(&outbuf[0], i, ucs4Character);
+  outbuf[i] = 0;
 
-  for (i = 0; i < len; i++)
-    text.append(outbuf[i]);
+  text.append((char *)outbuf);
 }
 
 } // anonymous namespace
@@ -2636,353 +2606,157 @@ void libvisio::VSDContentCollector::appendCharacters(WPXString &text, const std:
     return;
   }
 
-  static const unsigned short cp874map[] =
-  {
-    0x20AC, 0x0020, 0x0020, 0x0020, 0x0020, 0x2026, 0x0020, 0x0020,
-    0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
-    0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
-    0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
-    0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
-    0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
-    0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
-    0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
-    0x0E38, 0x0E39, 0x0E3A, 0x0020, 0x0020, 0x0020, 0x0020, 0x0E3F,
-    0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
-    0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
-    0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
-    0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0x0020, 0x0020, 0x0020, 0x0020
-  };
-
-  static const unsigned short cp1250map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0020, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x0020, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x0020, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
-    0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
-    0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
-    0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
-    0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
-    0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
-    0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
-    0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
-    0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
-    0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
-    0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
-  };
-
-  static const unsigned short cp1251map[] =
-  {
-    0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
-    0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x0020, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
-    0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
-    0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
-    0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
-    0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
-    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
-    0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
-    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
-    0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
-    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
-    0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
-    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
-    0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F
-  };
-
-  static const unsigned short cp1252map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0020, 0x017D, 0x0020,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0020, 0x017E, 0x0178,
-    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
-    0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
-    0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
-    0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
-    0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
-    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
-    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
-    0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
-    0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
-  };
-
-  static const unsigned short cp1253map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x0020, 0x2030, 0x0020, 0x2039, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x0020, 0x2122, 0x0020, 0x203A, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x0020, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
-    0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
-    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
-    0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
-    0x03A0, 0x03A1, 0x0020, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
-    0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
-    0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
-    0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
-    0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
-    0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0020
-  };
-
-  static const unsigned short cp1254map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0020, 0x0020, 0x0020,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0020, 0x0020, 0x0178,
-    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
-    0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
-    0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
-    0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
-    0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
-    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
-    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
-    0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
-    0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF
+  static const UChar32 symbolmap [] =
+  {
+    0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, // 0x20 ..
+    0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F,
+    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+    0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+    0x2245, 0x0391, 0x0392, 0x03A7, 0x0394, 0x0395, 0x03A6, 0x0393,
+    0x0397, 0x0399, 0x03D1, 0x039A, 0x039B, 0x039C, 0x039D, 0x039F,
+    0x03A0, 0x0398, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03C2, 0x03A9,
+    0x039E, 0x03A8, 0x0396, 0x005B, 0x2234, 0x005D, 0x22A5, 0x005F,
+    0xF8E5, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3,
+    0x03B7, 0x03B9, 0x03D5, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BF,
+    0x03C0, 0x03B8, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03D6, 0x03C9,
+    0x03BE, 0x03C8, 0x03B6, 0x007B, 0x007C, 0x007D, 0x223C, 0x0020, // .. 0x7F
+    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009E, 0x009f,
+    0x20AC, 0x03D2, 0x2032, 0x2264, 0x2044, 0x221E, 0x0192, 0x2663, // 0xA0 ..
+    0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193,
+    0x00B0, 0x00B1, 0x2033, 0x2265, 0x00D7, 0x221D, 0x2202, 0x2022,
+    0x00F7, 0x2260, 0x2261, 0x2248, 0x2026, 0x23D0, 0x23AF, 0x21B5,
+    0x2135, 0x2111, 0x211C, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229,
+    0x222A, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
+    0x2220, 0x2207, 0x00AE, 0x00A9, 0x2122, 0x220F, 0x221A, 0x22C5,
+    0x00AC, 0x2227, 0x2228, 0x21D4, 0x21D0, 0x21D1, 0x21D2, 0x21D3,
+    0x25CA, 0x3008, 0x00AE, 0x00A9, 0x2122, 0x2211, 0x239B, 0x239C,
+    0x239D, 0x23A1, 0x23A2, 0x23A3, 0x23A7, 0x23A8, 0x23A9, 0x23AA,
+    0xF8FF, 0x3009, 0x222B, 0x2320, 0x23AE, 0x2321, 0x239E, 0x239F,
+    0x23A0, 0x23A4, 0x23A5, 0x23A6, 0x23AB, 0x23AC, 0x23AD, 0x0020  // .. 0xFE
   };
 
-  static const unsigned short cp1255map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x02C6, 0x2030, 0x0020, 0x2039, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x02DC, 0x2122, 0x0020, 0x203A, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
-    0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
-    0x05B8, 0x05B9, 0x0020, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
-    0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
-    0x05F4, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020,
-    0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
-    0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
-    0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
-    0x05E8, 0x05E9, 0x05EA, 0x0020, 0x0020, 0x200E, 0x200F, 0x0020
-  };
-
-  static const unsigned short cp1256map[] =
-  {
-    0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
-    0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
-    0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
-    0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
-    0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
-    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
-    0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
-    0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
-    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
-    0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
-    0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2
-  };
-
-  static const unsigned short cp1257map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0020, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x0020, 0x2030, 0x0020, 0x2039, 0x0020, 0x00A8, 0x02C7, 0x00B8,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x0020, 0x2122, 0x0020, 0x203A, 0x0020, 0x00AF, 0x02DB, 0x0020,
-    0x00A0, 0x0020, 0x00A2, 0x00A3, 0x00A4, 0x0020, 0x00A6, 0x00A7,
-    0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
-    0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
-    0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
-    0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
-    0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
-    0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
-    0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
-    0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
-    0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9
-  };
-
-  static const unsigned short cp1258map[] =
-  {
-    0x20AC, 0x0020, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x02C6, 0x2030, 0x0020, 0x2039, 0x0152, 0x0020, 0x0020, 0x0020,
-    0x0020, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x02DC, 0x2122, 0x0020, 0x203A, 0x0153, 0x0020, 0x0020, 0x0178,
-    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
-    0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
-    0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
-    0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
-    0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
-    0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
-    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
-    0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
-    0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF
-  };
-
-  std::vector<unsigned char>::const_iterator iter = characters.begin();
-  while (iter != characters.end())
+  UChar32  ucs4Character = 0;
+  if (format == VSD_TEXT_SYMBOL) // SYMBOL
   {
-    uint32_t ucs4Character = 0;
-    switch (*iter)
+    for (std::vector<unsigned char>::const_iterator iter = characters.begin();
+         iter != characters.end(); ++iter)
     {
-    case 0:
-      ++iter;
-      break;
-    case 0x09:
-      ucs4Character = '\t';
-      ++iter;
-      break;
-    case 0x0a:
-    case 0x0e:
-      ucs4Character = '\n';
-      ++iter;
-      break;
-    case 0x1e:
-      if (m_fieldIndex < m_fields.size())
-        text.append(m_fields[m_fieldIndex++].cstr());
-      else
-        m_fieldIndex++;
-      ++iter;
-      continue;
-    default:
-      if (*iter < 0x20)
+      if (0x1e == ucs4Character)
       {
-        ucs4Character = 0x20;
-        ++iter;
+        _appendField(text);
+        continue;
       }
-      else if (*iter >= 0x20 && *iter < 0x7f)
-        ucs4Character = *iter++;
-      else if (*iter == 0x7f)
-      {
+      else if (*iter < 0x20)
         ucs4Character = 0x20;
-        ++iter;
-      }
       else
+        ucs4Character = symbolmap[*iter - 0x20];
+      _appendUCS4(text, ucs4Character);
+    }
+  }
+  else
+  {
+    UErrorCode status = U_ZERO_ERROR;
+    UConverter *conv = NULL;
+    switch (format)
+    {
+    case VSD_TEXT_JAPANESE:
+      conv = ucnv_open("windows-932", &status);
+      break;
+    case VSD_TEXT_KOREAN:
+      conv = ucnv_open("windows-949", &status);
+      break;
+    case VSD_TEXT_CHINESE_SIMPLIFIED:
+      conv = ucnv_open("windows-936", &status);
+      break;
+    case VSD_TEXT_CHINESE_TRADITIONAL:
+      conv = ucnv_open("windows-950", &status);
+      break;
+    case VSD_TEXT_GREEK:
+      conv = ucnv_open("windows-1253", &status);
+      break;
+    case VSD_TEXT_TURKISH:
+      conv = ucnv_open("windows-1254", &status);
+      break;
+    case VSD_TEXT_VIETNAMESE:
+      conv = ucnv_open("windows-1258", &status);
+      break;
+    case VSD_TEXT_HEBREW:
+      conv = ucnv_open("windows-1255", &status);
+      break;
+    case VSD_TEXT_ARABIC:
+      conv = ucnv_open("windows-1256", &status);
+      break;
+    case VSD_TEXT_BALTIC:
+      conv = ucnv_open("windows-1257", &status);
+      break;
+    case VSD_TEXT_RUSSIAN:
+      conv = ucnv_open("windows-1251", &status);
+      break;
+    case VSD_TEXT_THAI:
+      conv = ucnv_open("windows-874", &status);
+      break;
+    case VSD_TEXT_CENTRAL_EUROPE:
+      conv = ucnv_open("windows-1250", &status);
+      break;
+    default:
+      conv = ucnv_open("windows-1252", &status);
+      break;
+    }
+    if (U_SUCCESS(status) && conv)
+    {
+      const char *src = (const char *)&characters[0];
+      const char *srcLimit = (const char *)src + characters.size();
+      while (src < srcLimit)
       {
-        switch (format)
+        ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
+        if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character))
         {
-        case VSD_TEXT_ANSI:
-          ucs4Character = cp1252map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_GREEK:
-          ucs4Character = cp1253map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_TURKISH:
-          ucs4Character = cp1254map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_VIETNAMESE:
-          ucs4Character = cp1258map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_HEBREW:
-          ucs4Character = cp1255map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_ARABIC:
-          ucs4Character = cp1256map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_BALTIC:
-          ucs4Character = cp1257map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_RUSSIAN:
-          ucs4Character = cp1251map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_THAI:
-          ucs4Character = cp874map[*iter++ - 0x80];
-          break;
-        case VSD_TEXT_CENTRAL_EUROPE:
-          ucs4Character = cp1250map[*iter++ - 0x80];
-          break;
-        default:
-          ucs4Character = *iter++;
-          break;
+          if (0x1e == ucs4Character)
+            _appendField(text);
+          else
+            _appendUCS4(text, ucs4Character);
         }
       }
-      break;
     }
-    _appendUCS4(text, ucs4Character);
+    if (conv)
+      ucnv_close(conv);
   }
 }
 
 void libvisio::VSDContentCollector::appendCharacters(WPXString &text, const std::vector<unsigned char> &characters)
 {
-  std::vector<unsigned char>::const_iterator iter = characters.begin();
-  while (iter != characters.end())
+  UErrorCode status = U_ZERO_ERROR;
+  UConverter *conv = ucnv_open("UTF-16LE", &status);
+
+  if (U_SUCCESS(status) && conv)
   {
-    uint16_t high_surrogate = 0;
-    bool fail = false;
-    uint32_t ucs4Character = 0;
-    while (true)
+    const char *src = (const char *)&characters[0];
+    const char *srcLimit = (const char *)src + characters.size();
+    while (src < srcLimit)
     {
-      if (iter == characters.end())
-        break;
-
-      uint16_t character = *iter++;
-      character |= (uint16_t)(*iter++) << 8;
-      if (character == 0xfffc)
+      UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
+      if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character))
       {
-        if (m_fieldIndex < m_fields.size())
-          text.append(m_fields[m_fieldIndex++].cstr());
+        if (0xfffc == ucs4Character)
+          _appendField(text);
         else
-          m_fieldIndex++;
-        continue;
-      }
-      else if (character >= 0xdc00 && character < 0xe000) /* low surrogate */
-      {
-        if (high_surrogate)
-        {
-          ucs4Character = SURROGATE_VALUE(high_surrogate, character);
-          high_surrogate = 0;
-          break;
-        }
-        else
-        {
-          fail = true;
-          break;
-        }
-      }
-      else
-      {
-        if (high_surrogate)
-        {
-          fail = true;
-          break;
-        }
-        if (character >= 0xd800 && character < 0xdc00) /* high surrogate */
-          high_surrogate = character;
-        else
-        {
-          ucs4Character = character;
-          break;
-        }
+          _appendUCS4(text, ucs4Character);
       }
     }
-    if (fail)
-    {
-      VSD_DEBUG_MSG(("Throwing GenericException\n"));
-      throw libvisio::GenericException();
-    }
-
-    if (ucs4Character == 0xa)
-      _appendUCS4(text, (uint32_t)'\n');
-    else
-      _appendUCS4(text, ucs4Character);
   }
+  if (conv)
+    ucnv_close(conv);
+}
+
+void libvisio::VSDContentCollector::_appendField(WPXString &text)
+{
+  if (m_fieldIndex < m_fields.size())
+    text.append(m_fields[m_fieldIndex++].cstr());
+  else
+    m_fieldIndex++;
 }
 
 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */
diff --git a/src/lib/VSDContentCollector.h b/src/lib/VSDContentCollector.h
index 6fead68..362a883 100644
--- a/src/lib/VSDContentCollector.h
+++ b/src/lib/VSDContentCollector.h
@@ -217,6 +217,7 @@ private:
   void appendCharacters(WPXString &text, const std::vector<unsigned char> &characters);
   void _convertDataToString(WPXString &result, const WPXBinaryData &data, TextFormat format);
   bool parseFormatId( const char *formatString, unsigned short &result );
+  void _appendField(WPXString &text);
 
   bool m_isPageStarted;
   double m_pageWidth;
diff --git a/src/lib/VSDTypes.h b/src/lib/VSDTypes.h
index 08e4e76..809e9e6 100644
--- a/src/lib/VSDTypes.h
+++ b/src/lib/VSDTypes.h
@@ -163,6 +163,7 @@ struct ForeignData
 enum TextFormat
 {
   VSD_TEXT_ANSI = 0,
+  VSD_TEXT_SYMBOL,
   VSD_TEXT_GREEK,
   VSD_TEXT_TURKISH,
   VSD_TEXT_VIETNAMESE,
@@ -172,6 +173,10 @@ enum TextFormat
   VSD_TEXT_RUSSIAN,
   VSD_TEXT_THAI,
   VSD_TEXT_CENTRAL_EUROPE,
+  VSD_TEXT_JAPANESE,
+  VSD_TEXT_KOREAN,
+  VSD_TEXT_CHINESE_SIMPLIFIED,
+  VSD_TEXT_CHINESE_TRADITIONAL,
   VSD_TEXT_UTF8,
   VSD_TEXT_UTF16
 };


More information about the Libreoffice-commits mailing list