[Libreoffice-commits] .: 2 commits - detect-charset.patch src/lib
Libreoffice Gerrit user
logerrit at kemper.freedesktop.org
Mon Jan 21 07:03:09 PST 2013
detect-charset.patch | 184 -----------------------------------------------
src/lib/libcdr_utils.cpp | 86 +++++++++++++++++++++
src/lib/libcdr_utils.h | 4 +
3 files changed, 90 insertions(+), 184 deletions(-)
New commits:
commit ec852bd198fa1aaeb578374f6a8a049c2b333b09
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date: Mon Jan 21 16:02:46 2013 +0100
This is integrated
diff --git a/detect-charset.patch b/detect-charset.patch
deleted file mode 100644
index ada0767..0000000
--- a/detect-charset.patch
+++ /dev/null
@@ -1,184 +0,0 @@
-From 44d988e5df8a782705ebe6a477b5ae1b173418bf Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fridrich=20=C5=A0trba?= <fridrich.strba at bluewin.ch>
-Date: Mon, 21 Jan 2013 14:58:31 +0100
-Subject: [PATCH] Use ICU to guess encoding
-
----
- configure.ac | 16 +++++++++
- src/lib/Makefile.am | 4 +--
- src/lib/libcdr_utils.cpp | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
- src/lib/libcdr_utils.h | 4 +++
- 4 files changed, 108 insertions(+), 2 deletions(-)
-
-diff --git a/configure.ac b/configure.ac
-index 1e32311..e5619cf 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -62,6 +62,22 @@ PKG_CHECK_MODULES([ZLIB],[zlib],[],[
- AC_SUBST(ZLIB_CFLAGS)
- AC_SUBST(ZLIB_LIBS)
-
-+# ========
-+# Find icu
-+# ========
-+AC_PATH_PROG([ICU_CONFIG],[icu-config])
-+AC_MSG_CHECKING([ICU installation])
-+if ${ICU_CONFIG} --cflags >/dev/null 2>&1; then
-+ ICU_CFLAGS=`${ICU_CONFIG} --cppflags-searchpath`
-+ ICU_LIBS=`${ICU_CONFIG} --ldflags`
-+ AC_MSG_RESULT([found])
-+else
-+ AC_MSG_ERROR([libicu config program icu-config not found])
-+fi
-+AC_SUBST(ICU_CFLAGS)
-+AC_SUBST(ICU_LIBS)
-+
-+
- # =================================
- # Libtool/Version Makefile settings
- # =================================
-diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
-index 7255d40..bff4ce3 100644
---- a/src/lib/Makefile.am
-+++ b/src/lib/Makefile.am
-@@ -12,9 +12,9 @@ libcdr_ at CDR_MAJOR_VERSION@_ at CDR_MINOR_VERSION@_include_HEADERS = \
- CDRStringVector.h \
- CMXDocument.h
-
--AM_CXXFLAGS = $(LIBCDR_CXXFLAGS) $(ZLIB_CFLAGS) $(DEBUG_CXXFLAGS)
-+AM_CXXFLAGS = $(LIBCDR_CXXFLAGS) $(ZLIB_CFLAGS) $(ICU_CFLAGS) $(DEBUG_CXXFLAGS)
-
--libcdr_ at CDR_MAJOR_VERSION@_ at CDR_MINOR_VERSION@_la_LIBADD = $(LIBCDR_LIBS) $(ZLIB_LIBS) @LIBCDR_WIN32_RESOURCE@
-+libcdr_ at CDR_MAJOR_VERSION@_ at CDR_MINOR_VERSION@_la_LIBADD = $(LIBCDR_LIBS) $(ZLIB_LIBS) $(ICU_LIBS) @LIBCDR_WIN32_RESOURCE@
- libcdr_ at CDR_MAJOR_VERSION@_ at CDR_MINOR_VERSION@_la_DEPENDENCIES = @LIBCDR_WIN32_RESOURCE@
- libcdr_ at CDR_MAJOR_VERSION@_ at CDR_MINOR_VERSION@_la_LDFLAGS = $(version_info) -export-dynamic -no-undefined
- libcdr_ at CDR_MAJOR_VERSION@_ at CDR_MINOR_VERSION@_la_SOURCES = \
-diff --git a/src/lib/libcdr_utils.cpp b/src/lib/libcdr_utils.cpp
-index ef94582..28162e3 100644
---- a/src/lib/libcdr_utils.cpp
-+++ b/src/lib/libcdr_utils.cpp
-@@ -27,6 +27,8 @@
- * instead of those above.
- */
-
-+#include <string.h>
-+#include <unicode/ucsdet.h>
- #include "libcdr_utils.h"
-
- #define CDR_NUM_ELEMENTS(array) sizeof(array)/sizeof(array[0])
-@@ -36,6 +38,86 @@
- namespace
- {
-
-+static unsigned short getEncodingFromICUName(const char *name)
-+{
-+ // ANSI
-+ if (strcmp(name, "ISO-8859-1") == 0)
-+ return 0;
-+ if (strcmp(name, "windows-1252") == 0)
-+ return 0;
-+ // CENTRAL EUROPE
-+ if (strcmp(name, "ISO-8859-2") == 0)
-+ return 0xee;
-+ if (strcmp(name, "windows-1250") == 0)
-+ return 0xee;
-+ // RUSSIAN
-+ if (strcmp(name, "ISO-8859-5") == 0)
-+ return 0xcc;
-+ if (strcmp(name, "windows-1251") == 0)
-+ return 0xcc;
-+ if (strcmp(name, "KOI8-R") == 0)
-+ return 0xcc;
-+ // ARABIC
-+ if (strcmp(name, "ISO-8859-6") == 0)
-+ return 0xb2;
-+ if (strcmp(name, "windows-1256") == 0)
-+ return 0xb2;
-+ // TURKISH
-+ if (strcmp(name, "ISO-8859-9") == 0)
-+ return 0xa2;
-+ if (strcmp(name, "windows-1254") == 0)
-+ return 0xa2;
-+ // GREEK
-+ if (strcmp(name, "ISO-8859-7") == 0)
-+ return 0xa1;
-+ if (strcmp(name, "windows-1253") == 0)
-+ return 0xa1;
-+ // HEBREW
-+ if (strcmp(name, "ISO-8859-8") == 0)
-+ return 0xb1;
-+ if (strcmp(name, "windows-1255") == 0)
-+ return 0xb1;
-+
-+ return 0;
-+}
-+
-+
-+static unsigned short getEncoding(const unsigned char *buffer, unsigned bufferLength)
-+{
-+ UErrorCode status = U_ZERO_ERROR;
-+ UCharsetDetector *csd = 0;
-+ const UCharsetMatch *csm = 0;
-+ try
-+ {
-+ csd = ucsdet_open(&status);
-+ if (U_FAILURE(status))
-+ throw libcdr::EncodingException();
-+ ucsdet_setText(csd, (const char *)buffer, bufferLength, &status);
-+ if (U_FAILURE(status))
-+ throw libcdr::EncodingException();
-+ ucsdet_enableInputFilter(csd, TRUE);
-+ csm = ucsdet_detect(csd, &status);
-+ if (U_FAILURE(status))
-+ throw libcdr::EncodingException();
-+ const char *name = ucsdet_getName(csm, &status);
-+ if (U_FAILURE(status))
-+ throw libcdr::EncodingException();
-+ if (name)
-+ {
-+ unsigned short encoding = getEncodingFromICUName(name);
-+ ucsdet_close(csd);
-+ return encoding;
-+ }
-+ ucsdet_close(csd);
-+ return 0;
-+ }
-+ catch (const libcdr::EncodingException &)
-+ {
-+ ucsdet_close(csd);
-+ return 0;
-+ }
-+}
-+
- static void _appendUCS4(WPXString &text, unsigned ucs4Character)
- {
- unsigned char first;
-@@ -450,6 +532,10 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
- 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
- 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF
- };
-+
-+ if (!charset && characters.size())
-+ charset = getEncoding(&characters[0], characters.size());
-+
- for (std::vector<unsigned char>::const_iterator iter = characters.begin();
- iter != characters.end(); ++iter)
- {
-diff --git a/src/lib/libcdr_utils.h b/src/lib/libcdr_utils.h
-index 320891a..5958b75 100644
---- a/src/lib/libcdr_utils.h
-+++ b/src/lib/libcdr_utils.h
-@@ -133,6 +133,10 @@ class UnknownPrecisionException
- {
- };
-
-+class EncodingException
-+{
-+};
-+
- } // namespace libcdr
-
- #endif // __LIBCDR_UTILS_H__
---
-1.8.1.1
-
commit 7bf6130b715080f15b0cdc2f1149f581ae1483de
Author: Fridrich Å trba <fridrich.strba at bluewin.ch>
Date: Mon Jan 21 14:58:31 2013 +0100
Use ICU to guess encoding
diff --git a/src/lib/libcdr_utils.cpp b/src/lib/libcdr_utils.cpp
index ef94582..28162e3 100644
--- a/src/lib/libcdr_utils.cpp
+++ b/src/lib/libcdr_utils.cpp
@@ -27,6 +27,8 @@
* instead of those above.
*/
+#include <string.h>
+#include <unicode/ucsdet.h>
#include "libcdr_utils.h"
#define CDR_NUM_ELEMENTS(array) sizeof(array)/sizeof(array[0])
@@ -36,6 +38,86 @@
namespace
{
+static unsigned short getEncodingFromICUName(const char *name)
+{
+ // ANSI
+ if (strcmp(name, "ISO-8859-1") == 0)
+ return 0;
+ if (strcmp(name, "windows-1252") == 0)
+ return 0;
+ // CENTRAL EUROPE
+ if (strcmp(name, "ISO-8859-2") == 0)
+ return 0xee;
+ if (strcmp(name, "windows-1250") == 0)
+ return 0xee;
+ // RUSSIAN
+ if (strcmp(name, "ISO-8859-5") == 0)
+ return 0xcc;
+ if (strcmp(name, "windows-1251") == 0)
+ return 0xcc;
+ if (strcmp(name, "KOI8-R") == 0)
+ return 0xcc;
+ // ARABIC
+ if (strcmp(name, "ISO-8859-6") == 0)
+ return 0xb2;
+ if (strcmp(name, "windows-1256") == 0)
+ return 0xb2;
+ // TURKISH
+ if (strcmp(name, "ISO-8859-9") == 0)
+ return 0xa2;
+ if (strcmp(name, "windows-1254") == 0)
+ return 0xa2;
+ // GREEK
+ if (strcmp(name, "ISO-8859-7") == 0)
+ return 0xa1;
+ if (strcmp(name, "windows-1253") == 0)
+ return 0xa1;
+ // HEBREW
+ if (strcmp(name, "ISO-8859-8") == 0)
+ return 0xb1;
+ if (strcmp(name, "windows-1255") == 0)
+ return 0xb1;
+
+ return 0;
+}
+
+
+static unsigned short getEncoding(const unsigned char *buffer, unsigned bufferLength)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCharsetDetector *csd = 0;
+ const UCharsetMatch *csm = 0;
+ try
+ {
+ csd = ucsdet_open(&status);
+ if (U_FAILURE(status))
+ throw libcdr::EncodingException();
+ ucsdet_setText(csd, (const char *)buffer, bufferLength, &status);
+ if (U_FAILURE(status))
+ throw libcdr::EncodingException();
+ ucsdet_enableInputFilter(csd, TRUE);
+ csm = ucsdet_detect(csd, &status);
+ if (U_FAILURE(status))
+ throw libcdr::EncodingException();
+ const char *name = ucsdet_getName(csm, &status);
+ if (U_FAILURE(status))
+ throw libcdr::EncodingException();
+ if (name)
+ {
+ unsigned short encoding = getEncodingFromICUName(name);
+ ucsdet_close(csd);
+ return encoding;
+ }
+ ucsdet_close(csd);
+ return 0;
+ }
+ catch (const libcdr::EncodingException &)
+ {
+ ucsdet_close(csd);
+ return 0;
+ }
+}
+
static void _appendUCS4(WPXString &text, unsigned ucs4Character)
{
unsigned char first;
@@ -450,6 +532,10 @@ void libcdr::appendCharacters(WPXString &text, std::vector<unsigned char> charac
0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF
};
+
+ if (!charset && characters.size())
+ charset = getEncoding(&characters[0], characters.size());
+
for (std::vector<unsigned char>::const_iterator iter = characters.begin();
iter != characters.end(); ++iter)
{
diff --git a/src/lib/libcdr_utils.h b/src/lib/libcdr_utils.h
index 320891a..5958b75 100644
--- a/src/lib/libcdr_utils.h
+++ b/src/lib/libcdr_utils.h
@@ -133,6 +133,10 @@ class UnknownPrecisionException
{
};
+class EncodingException
+{
+};
+
} // namespace libcdr
#endif // __LIBCDR_UTILS_H__
More information about the Libreoffice-commits
mailing list