[Libreoffice-commits] libcdr.git: 2 commits - src/lib

Fridrich Å trba fridrich.strba at bluewin.ch
Thu Mar 28 10:54:07 PDT 2013


 src/lib/libcdr_utils.cpp |  120 ++++++++++++-----------------------------------
 1 file changed, 31 insertions(+), 89 deletions(-)

New commits:
commit 5f156ceae675f194f0cfabb12bed35e28055db35
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date:   Thu Mar 28 18:53:43 2013 +0100

    Use ICU for UCS4 -> UTF8 conversion

diff --git a/src/lib/libcdr_utils.cpp b/src/lib/libcdr_utils.cpp
index b9e697b..964f9f0 100644
--- a/src/lib/libcdr_utils.cpp
+++ b/src/lib/libcdr_utils.cpp
@@ -148,57 +148,29 @@ static unsigned short getEncoding(const unsigned char *buffer, unsigned bufferLe
   }
 }
 
-static void _appendUCS4(WPXString &text, unsigned ucs4Character)
+static void _appendUCS4(WPXString &text, UChar ucs4Character)
 {
   // Convert carriage returns to new line characters
   // Writerperfect/LibreOffice will replace them by <text:line-break>
-  if (ucs4Character == 0x0d)
-    ucs4Character = (unsigned) '\n';
+  if (ucs4Character == (UChar)0x0d)
+    ucs4Character = (UChar)'\n';
 
-  unsigned char first;
-  int len;
-  if (ucs4Character < 0x80)
-  {
-    first = 0;
-    len = 1;
-  }
-  else if (ucs4Character < 0x800)
-  {
-    first = 0xc0;
-    len = 2;
-  }
-  else if (ucs4Character < 0x10000)
-  {
-    first = 0xe0;
-    len = 3;
-  }
-  else if (ucs4Character < 0x200000)
-  {
-    first = 0xf0;
-    len = 4;
-  }
-  else if (ucs4Character < 0x4000000)
-  {
-    first = 0xf8;
-    len = 5;
-  }
-  else
+  UErrorCode status = U_ZERO_ERROR;
+  UConverter *conv = ucnv_open("UTF-8", &status);
+
+  if (U_SUCCESS(status) && conv)
   {
-    first = 0xfc;
-    len = 6;
+    char outbuf[7] = { 0, 0, 0, 0, 0, 0, 0 };
+    ucnv_fromUChars(conv, &outbuf[0], 7, &ucs4Character, 1, &status);
+    if (U_SUCCESS(status))
+    {
+      text.append(outbuf);
+    }
   }
-
-  unsigned char outbuf[6] = { 0, 0, 0, 0, 0, 0 };
-  int i;
-  for (i = len - 1; i > 0; --i)
+  if (conv)
   {
-    outbuf[i] = (ucs4Character & 0x3f) | 0x80;
-    ucs4Character >>= 6;
+    ucnv_close(conv);
   }
-  outbuf[0] = (ucs4Character & 0xff) | first;
-
-  for (i = 0; i < len; i++)
-    text.append(outbuf[i]);
 }
 
 } // anonymous namespace
@@ -338,7 +310,7 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
 {
   if (characters.empty())
     return;
-  static const unsigned short symbolmap [] =
+  static const UChar symbolmap [] =
   {
     0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, // 0x20 ..
     0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F,
@@ -440,7 +412,7 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
       const char *srcLimit = (const char *)src + characters.size();
       while (src < srcLimit)
       {
-        uint32_t ucs4Character = (uint32_t)ucnv_getNextUChar(conv, &src, srcLimit, &status);
+        UChar ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
         if (U_SUCCESS(status))
         {
           _appendUCS4(text, ucs4Character);
@@ -465,7 +437,7 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
     const char *srcLimit = (const char *)src + characters.size();
     while (src < srcLimit)
     {
-      uint32_t ucs4Character = (uint32_t)ucnv_getNextUChar(conv, &src, srcLimit, &status);
+      UChar ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
       if (U_SUCCESS(status))
       {
         _appendUCS4(text, ucs4Character);
commit 4e7632756bcd09f03b9c287ef3ef0934fa83dca5
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date:   Thu Mar 28 18:23:40 2013 +0100

    Use ICU for utf-16le conversion too

diff --git a/src/lib/libcdr_utils.cpp b/src/lib/libcdr_utils.cpp
index 510aed6..b9e697b 100644
--- a/src/lib/libcdr_utils.cpp
+++ b/src/lib/libcdr_utils.cpp
@@ -456,55 +456,25 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
 
 void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> characters)
 {
-  for (std::vector<unsigned char>::const_iterator iter = characters.begin();
-       iter != characters.end();)
+  UErrorCode status = U_ZERO_ERROR;
+  UConverter *conv = ucnv_open("UTF-16LE", &status);
+
+  if (U_SUCCESS(status) && conv)
   {
-    uint16_t high_surrogate = 0;
-    bool fail = false;
-    uint32_t ucs4Character = 0;
-    while (true)
+    const char *src = (const char *)&characters[0];
+    const char *srcLimit = (const char *)src + characters.size();
+    while (src < srcLimit)
     {
-      if (iter == characters.end())
-      {
-        fail = true;
-        break;
-      }
-      uint16_t character = *iter++;
-      character |= (uint16_t)(*iter++) << 8;
-      if (character >= 0xdc00 && character < 0xe000) /* low surrogate */
+      uint32_t ucs4Character = (uint32_t)ucnv_getNextUChar(conv, &src, srcLimit, &status);
+      if (U_SUCCESS(status))
       {
-        if (high_surrogate)
-        {
-          ucs4Character = SURROGATE_VALUE(high_surrogate, character);
-          high_surrogate = 0;
-          break;
-        }
-        else
-        {
-          fail = true;
-          break;
-        }
-      }
-      else
-      {
-        if (high_surrogate)
-        {
-          fail = true;
-          break;
-        }
-        if (character >= 0xd800 && character < 0xdc00) /* high surrogate */
-          high_surrogate = character;
-        else
-        {
-          ucs4Character = character;
-          break;
-        }
+        _appendUCS4(text, ucs4Character);
       }
     }
-    if (fail)
-      throw libcdr::GenericException();
-
-    _appendUCS4(text, ucs4Character);
+  }
+  if (conv)
+  {
+    ucnv_close(conv);
   }
 }
 


More information about the Libreoffice-commits mailing list