[Libreoffice-commits] libcdr.git: 2 commits - src/lib
Fridrich Å trba
fridrich.strba at bluewin.ch
Thu Mar 28 10:54:07 PDT 2013
src/lib/libcdr_utils.cpp | 120 ++++++++++++-----------------------------------
1 file changed, 31 insertions(+), 89 deletions(-)
New commits:
commit 5f156ceae675f194f0cfabb12bed35e28055db35
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date: Thu Mar 28 18:53:43 2013 +0100
Use ICU for UCS4 -> UTF8 conversion
diff --git a/src/lib/libcdr_utils.cpp b/src/lib/libcdr_utils.cpp
index b9e697b..964f9f0 100644
--- a/src/lib/libcdr_utils.cpp
+++ b/src/lib/libcdr_utils.cpp
@@ -148,57 +148,29 @@ static unsigned short getEncoding(const unsigned char *buffer, unsigned bufferLe
}
}
-static void _appendUCS4(WPXString &text, unsigned ucs4Character)
+static void _appendUCS4(WPXString &text, UChar ucs4Character)
{
// Convert carriage returns to new line characters
// Writerperfect/LibreOffice will replace them by <text:line-break>
- if (ucs4Character == 0x0d)
- ucs4Character = (unsigned) '\n';
+ if (ucs4Character == (UChar)0x0d)
+ ucs4Character = (UChar)'\n';
- unsigned char first;
- int len;
- if (ucs4Character < 0x80)
- {
- first = 0;
- len = 1;
- }
- else if (ucs4Character < 0x800)
- {
- first = 0xc0;
- len = 2;
- }
- else if (ucs4Character < 0x10000)
- {
- first = 0xe0;
- len = 3;
- }
- else if (ucs4Character < 0x200000)
- {
- first = 0xf0;
- len = 4;
- }
- else if (ucs4Character < 0x4000000)
- {
- first = 0xf8;
- len = 5;
- }
- else
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *conv = ucnv_open("UTF-8", &status);
+
+ if (U_SUCCESS(status) && conv)
{
- first = 0xfc;
- len = 6;
+ char outbuf[7] = { 0, 0, 0, 0, 0, 0, 0 };
+ ucnv_fromUChars(conv, &outbuf[0], 7, &ucs4Character, 1, &status);
+ if (U_SUCCESS(status))
+ {
+ text.append(outbuf);
+ }
}
-
- unsigned char outbuf[6] = { 0, 0, 0, 0, 0, 0 };
- int i;
- for (i = len - 1; i > 0; --i)
+ if (conv)
{
- outbuf[i] = (ucs4Character & 0x3f) | 0x80;
- ucs4Character >>= 6;
+ ucnv_close(conv);
}
- outbuf[0] = (ucs4Character & 0xff) | first;
-
- for (i = 0; i < len; i++)
- text.append(outbuf[i]);
}
} // anonymous namespace
@@ -338,7 +310,7 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
{
if (characters.empty())
return;
- static const unsigned short symbolmap [] =
+ static const UChar symbolmap [] =
{
0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, // 0x20 ..
0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F,
@@ -440,7 +412,7 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
const char *srcLimit = (const char *)src + characters.size();
while (src < srcLimit)
{
- uint32_t ucs4Character = (uint32_t)ucnv_getNextUChar(conv, &src, srcLimit, &status);
+ UChar ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
if (U_SUCCESS(status))
{
_appendUCS4(text, ucs4Character);
@@ -465,7 +437,7 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
const char *srcLimit = (const char *)src + characters.size();
while (src < srcLimit)
{
- uint32_t ucs4Character = (uint32_t)ucnv_getNextUChar(conv, &src, srcLimit, &status);
+ UChar ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status);
if (U_SUCCESS(status))
{
_appendUCS4(text, ucs4Character);
commit 4e7632756bcd09f03b9c287ef3ef0934fa83dca5
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date: Thu Mar 28 18:23:40 2013 +0100
Use ICU for utf-16le conversion too
diff --git a/src/lib/libcdr_utils.cpp b/src/lib/libcdr_utils.cpp
index 510aed6..b9e697b 100644
--- a/src/lib/libcdr_utils.cpp
+++ b/src/lib/libcdr_utils.cpp
@@ -456,55 +456,25 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> characters)
{
- for (std::vector<unsigned char>::const_iterator iter = characters.begin();
- iter != characters.end();)
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter *conv = ucnv_open("UTF-16LE", &status);
+
+ if (U_SUCCESS(status) && conv)
{
- uint16_t high_surrogate = 0;
- bool fail = false;
- uint32_t ucs4Character = 0;
- while (true)
+ const char *src = (const char *)&characters[0];
+ const char *srcLimit = (const char *)src + characters.size();
+ while (src < srcLimit)
{
- if (iter == characters.end())
- {
- fail = true;
- break;
- }
- uint16_t character = *iter++;
- character |= (uint16_t)(*iter++) << 8;
- if (character >= 0xdc00 && character < 0xe000) /* low surrogate */
+ uint32_t ucs4Character = (uint32_t)ucnv_getNextUChar(conv, &src, srcLimit, &status);
+ if (U_SUCCESS(status))
{
- if (high_surrogate)
- {
- ucs4Character = SURROGATE_VALUE(high_surrogate, character);
- high_surrogate = 0;
- break;
- }
- else
- {
- fail = true;
- break;
- }
- }
- else
- {
- if (high_surrogate)
- {
- fail = true;
- break;
- }
- if (character >= 0xd800 && character < 0xdc00) /* high surrogate */
- high_surrogate = character;
- else
- {
- ucs4Character = character;
- break;
- }
+ _appendUCS4(text, ucs4Character);
}
}
- if (fail)
- throw libcdr::GenericException();
-
- _appendUCS4(text, ucs4Character);
+ }
+ if (conv)
+ {
+ ucnv_close(conv);
}
}
More information about the Libreoffice-commits
mailing list