[Libreoffice-commits] core.git: external/libnumbertext lingucomponent/source

Mike Kaganski (via logerrit) logerrit at kemper.freedesktop.org
Thu Jun 18 15:45:21 UTC 2020


 external/libnumbertext/MSVCNonBMPBug.patch1             |   69 ++++++++++++++++
 external/libnumbertext/UnpackedTarball_libnumbertext.mk |    4 
 lingucomponent/source/numbertext/numbertext.cxx         |   24 +----
 3 files changed, 81 insertions(+), 16 deletions(-)

New commits:
commit 21a59b59d9a40ca32d91b05e62ffcd9aef8fd324
Author:     Mike Kaganski <mike.kaganski at collabora.com>
AuthorDate: Thu Jun 18 16:03:38 2020 +0300
Commit:     Mike Kaganski <mike.kaganski at collabora.com>
CommitDate: Thu Jun 18 17:44:49 2020 +0200

    tdf#133589 Numbertext: Use Win32API to avoid std::codecvt_utf8 bug
    
    Change-Id: I45c85db44c3dfd92e0929f66c8c95cb309c91e05
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/96609
    Reviewed-by: László Németh <nemeth at numbertext.org>
    Tested-by: Jenkins

diff --git a/external/libnumbertext/MSVCNonBMPBug.patch1 b/external/libnumbertext/MSVCNonBMPBug.patch1
new file mode 100644
index 000000000000..8ced22165bcc
--- /dev/null
+++ b/external/libnumbertext/MSVCNonBMPBug.patch1
@@ -0,0 +1,69 @@
+MSVC's std::codecvt_utf8 has a bug converting non-BMP codepoints like U+10CFA.
+Use MultiByteToWideChar/WideCharToMultiByte instead on Windows.
+
+diff --git a/src/Numbertext.cxx b/src/Numbertext.cxx
+index 5f05b48579af..eb83e59f366f 100755
+--- a/src/Numbertext.cxx
++++ b/src/Numbertext.cxx
+@@ -7,6 +7,10 @@
+ #include <sstream>
+ #include <fstream>
+ 
++#ifdef _WIN32
++#include <Windows.h>
++#endif
++
+ #include "Numbertext.hxx"
+ 
+ #ifdef NUMBERTEXT_BOOST
+@@ -22,6 +26,14 @@
+ 
+ bool readfile(const std::string& filename, std::wstring& result)
+ {
++#ifdef _WIN32
++    std::ifstream ifs(filename);
++    if (ifs.fail())
++        return false;
++    std::stringstream ss;
++    ss << ifs.rdbuf();
++    result = Numbertext::string2wstring(ss.str());
++#else
+     std::wifstream wif(filename);
+     if (wif.fail())
+         return false;
+@@ -29,6 +44,7 @@ bool readfile(const std::string& filename, std::wstring& result)
+     std::wstringstream wss;
+     wss << wif.rdbuf();
+     result = wss.str();
++#endif
+     return true;
+ }
+ 
+@@ -99,7 +112,12 @@
+ 
+ std::wstring Numbertext::string2wstring(const std::string& s)
+ {
+-#ifndef NUMBERTEXT_BOOST
++#ifdef _WIN32
++    int nSize = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, nullptr, 0);
++    std::unique_ptr<wchar_t[]> wstr(new wchar_t[nSize]);
++    MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, wstr.get(), nSize);
++    return wstr.get();
++#elif !defined NUMBERTEXT_BOOST
+     typedef std::codecvt_utf8<wchar_t> convert_type;
+     std::wstring_convert<convert_type, wchar_t> converter;
+     return converter.from_bytes( s );
+@@ -110,7 +128,12 @@
+ 
+ std::string Numbertext::wstring2string(const std::wstring& s)
+ {
+-#ifndef NUMBERTEXT_BOOST
++#ifdef _WIN32
++    int nSize = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, nullptr, 0, nullptr, nullptr);
++    std::unique_ptr<char[]> str(new char[nSize]);
++    WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, str.get(), nSize, nullptr, nullptr);
++    return str.get();
++#elif !defined NUMBERTEXT_BOOST
+     typedef std::codecvt_utf8<wchar_t> convert_type;
+     std::wstring_convert<convert_type, wchar_t> converter;
+     return converter.to_bytes( s );
diff --git a/external/libnumbertext/UnpackedTarball_libnumbertext.mk b/external/libnumbertext/UnpackedTarball_libnumbertext.mk
index 1969dcf7d08d..d2efd2f852c3 100644
--- a/external/libnumbertext/UnpackedTarball_libnumbertext.mk
+++ b/external/libnumbertext/UnpackedTarball_libnumbertext.mk
@@ -15,4 +15,8 @@ $(eval $(call gb_UnpackedTarball_update_autoconf_configs,libnumbertext))
 
 $(eval $(call gb_UnpackedTarball_set_patchlevel,libnumbertext,1))
 
+$(eval $(call gb_UnpackedTarball_add_patches,libnumbertext, \
+    external/libnumbertext/MSVCNonBMPBug.patch1 \
+))
+
 # vim: set noet sw=4 ts=4:
diff --git a/lingucomponent/source/numbertext/numbertext.cxx b/lingucomponent/source/numbertext/numbertext.cxx
index 89f5432624bf..34e7694601a3 100644
--- a/lingucomponent/source/numbertext/numbertext.cxx
+++ b/lingucomponent/source/numbertext/numbertext.cxx
@@ -21,7 +21,7 @@
 
 #include <osl/file.hxx>
 #include <tools/debug.hxx>
-#include <rtl/ustrbuf.hxx>
+#include <o3tl/char16_t2wchar_t.hxx>
 
 #include <sal/config.h>
 #include <cppuhelper/factory.hxx>
@@ -132,26 +132,18 @@ OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Lo
     if (!aCountry.isEmpty())
         aCode += "-" + aCountry;
     OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US));
+#if defined(_WIN32)
+    std::wstring sResult(o3tl::toW(rText.getStr()));
+#else
     OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8));
     std::wstring sResult = Numbertext::string2wstring(aInput.getStr());
+#endif
     bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr());
     DBG_ASSERT(result, "numbertext: false");
-    OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
 #if defined(_WIN32)
-    // workaround to fix non-BMP Unicode characters resulted by wstring limitation
-    if (!aScript.isEmpty() && aScript == "Hung")
-    {
-        OUStringBuffer aFix;
-        for (int i = 0; i < aResult.getLength(); ++i)
-        {
-            sal_Unicode c = aResult[i];
-            if (0x0C80 <= c && c <= 0x0CFF)
-                aFix.append(sal_Unicode(0xD803)).append(sal_Unicode(c + 0xD000));
-            else
-                aFix.append(c);
-        }
-        aResult = aFix.makeStringAndClear();
-    }
+    OUString aResult(o3tl::toU(sResult.c_str()));
+#else
+    OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
 #endif
     return aResult;
 }


More information about the Libreoffice-commits mailing list