[Libreoffice-commits] core.git: i18npool/inc i18npool/qa i18npool/source

Mon May 12 02:56:37 PDT 2014

i18npool/inc/xdictionary.hxx                  |   26 +++-
 i18npool/qa/cppunit/test_breakiterator.cxx    |   20 ++-
 i18npool/source/breakiterator/xdictionary.cxx |  147 ++++++++++++++------------
 3 files changed, 117 insertions(+), 76 deletions(-)

New commits:
commit 69a74afb07c7c58b201d315dbd254bc50b9c9f03
Author: Michael Meeks <michael.meeks at collabora.com>
Date:   Fri May 9 16:28:18 2014 +0100

    Avoid expensive dlopen thrash for break iterators.
    
    Change-Id: I770c1b3e5164cb486b5a5c2b1259f713914a1bae

diff --git a/i18npool/inc/xdictionary.hxx b/i18npool/inc/xdictionary.hxx
index 13078e8..2b1ece0 100644
--- a/i18npool/inc/xdictionary.hxx
+++ b/i18npool/inc/xdictionary.hxx
@@ -40,17 +40,29 @@ struct WordBreakCache {
     bool equals(const sal_Unicode *str, Boundary& boundary);    // checking cached string
 };
 
-class xdictionary
+struct xdictionarydata
 {
-private:
     const sal_uInt8  * existMark;
     const sal_Int16  * index1;
-    const sal_Int32 * index2;
-    const sal_Int32 * lenArray;
+    const sal_Int32  * index2;
+    const sal_Int32  * lenArray;
     const sal_Unicode* dataArea;
-#ifndef DISABLE_DYNLOADING
-    oslModule hModule;
-#endif
+    xdictionarydata() :
+        existMark( NULL ),
+        index1( NULL ),
+        index2( NULL ),
+        lenArray( NULL ),
+        dataArea( NULL )
+    {
+    }
+};
+
+class xdictionary
+{
+private:
+    xdictionarydata data;
+    void initDictionaryData(const sal_Char *lang);
+
     Boundary boundary;
     bool japaneseWordBreak;
 
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 6f7b537..36e3d3b 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -68,6 +68,7 @@ public:
     CPPUNIT_TEST_SUITE_END();
 private:
     uno::Reference<i18n::XBreakIterator> m_xBreak;
+    void doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak);
 };
 
 void TestBreakIterator::testLineBreaking()
@@ -906,7 +907,7 @@ void TestBreakIterator::testKhmer()
 }
 #endif
 
-void TestBreakIterator::testJapanese()
+void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak)
 {
     lang::Locale aLocale;
     aLocale.Language = "ja";
@@ -917,7 +918,7 @@ void TestBreakIterator::testJapanese()
         const sal_Unicode JAPANESE[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 };
 
         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
-        aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale,
+        aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
             i18n::WordType::DICTIONARY_WORD, true);
 
         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
@@ -927,18 +928,29 @@ void TestBreakIterator::testJapanese()
         const sal_Unicode JAPANESE[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 };
 
         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
-        aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale,
+        aBounds = xBreak->getWordBoundary(aTest, 1, aLocale,
             i18n::WordType::DICTIONARY_WORD, true);
 
         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
 
-        aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale,
+        aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
             i18n::WordType::DICTIONARY_WORD, true);
 
         CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 6);
     }
 }
 
+void TestBreakIterator::testJapanese()
+{
+    doTestJapanese(m_xBreak);
+
+    // fdo#78479 - test second / cached instantiation of xdictionary
+    uno::Reference< i18n::XBreakIterator > xTmpBreak(m_xSFactory->createInstance(
+        "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
+
+    doTestJapanese(xTmpBreak);
+}
+
 void TestBreakIterator::testChinese()
 {
     lang::Locale aLocale;
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx
index 6ded123..e83b529 100644
--- a/i18npool/source/breakiterator/xdictionary.cxx
+++ b/i18npool/source/breakiterator/xdictionary.cxx
@@ -20,6 +20,7 @@
 #include <config_folders.h>
 
 #include <osl/file.h>
+#include <osl/mutex.hxx>
 #include <rtl/ustrbuf.hxx>
 #include <rtl/bootstrap.hxx>
 #include <com/sun/star/i18n/WordType.hpp>
@@ -57,22 +58,9 @@ sal_Unicode* getDataArea_zh();
 #endif
 
 xdictionary::xdictionary(const sal_Char *lang) :
-    existMark( NULL ),
-    index1( NULL ),
-    index2( NULL ),
-    lenArray( NULL ),
-    dataArea( NULL ),
-#ifndef DISABLE_DYNLOADING
-    hModule( NULL ),
-#endif
     boundary(),
     japaneseWordBreak( false )
 {
-    existMark = NULL;
-    index1 = NULL;
-    index2 = NULL;
-    lenArray = NULL;
-    dataArea = NULL;
 
 #ifdef DICT_JA_ZH_IN_DATAFILE
 
@@ -96,53 +84,33 @@ xdictionary::xdictionary(const sal_Char *lang) :
             // We have the offsets to the parts of the file at its end, see gendict.cxx
             sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize);
 
-            existMark = (sal_uInt8*) (pMapping + pEOF[-1]);
-            index2 = (sal_Int32*) (pMapping + pEOF[-2]);
-            index1 = (sal_Int16*) (pMapping + pEOF[-3]);
-            lenArray = (sal_Int32*) (pMapping + pEOF[-4]);
-            dataArea = (sal_Unicode*) (pMapping + pEOF[-5]);
+            data.existMark = (sal_uInt8*) (pMapping + pEOF[-1]);
+            data.index2 = (sal_Int32*) (pMapping + pEOF[-2]);
+            data.index1 = (sal_Int16*) (pMapping + pEOF[-3]);
+            data.lenArray = (sal_Int32*) (pMapping + pEOF[-4]);
+            data.dataArea = (sal_Unicode*) (pMapping + pEOF[-5]);
         }
     }
 
 #elif !defined DISABLE_DYNLOADING
 
-#ifdef SAL_DLLPREFIX
-    OUStringBuffer aBuf( strlen(lang) + 7 + 6 );    // mostly "lib*.so" (with * == dict_zh)
-    aBuf.appendAscii( SAL_DLLPREFIX );
-#else
-    OUStringBuffer aBuf( strlen(lang) + 7 + 4 );    // mostly "*.dll" (with * == dict_zh)
-#endif
-    aBuf.appendAscii( "dict_" ).appendAscii( lang ).appendAscii( SAL_DLLEXTENSION );
-    hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
-    if( hModule ) {
-        sal_IntPtr (*func)();
-        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getExistMark").pData );
-        existMark = (sal_uInt8*) (*func)();
-        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getIndex1").pData );
-        index1 = (sal_Int16*) (*func)();
-        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getIndex2").pData );
-        index2 = (sal_Int32*) (*func)();
-        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getLenArray").pData );
-        lenArray = (sal_Int32*) (*func)();
-        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData );
-        dataArea = (sal_Unicode*) (*func)();
-    }
+    initDictionaryData( lang );
 
 #else
 
     if( strcmp( lang, "ja" ) == 0 ) {
-        existMark = getExistMark_ja();
-        index1 = getIndex1_ja();
-        index2 = getIndex2_ja();
-        lenArray = getLenArray_ja();
-        dataArea = getDataArea_ja();
+        data.existMark = getExistMark_ja();
+        data.index1 = getIndex1_ja();
+        data.index2 = getIndex2_ja();
+        data.lenArray = getLenArray_ja();
+        data.dataArea = getDataArea_ja();
     }
     else if( strcmp( lang, "zh" ) == 0 ) {
-        existMark = getExistMark_zh();
-        index1 = getIndex1_zh();
-        index2 = getIndex2_zh();
-        lenArray = getLenArray_zh();
-        dataArea = getDataArea_zh();
+        data.existMark = getExistMark_zh();
+        data.index1 = getIndex1_zh();
+        data.index2 = getIndex2_zh();
+        data.lenArray = getLenArray_zh();
+        data.dataArea = getDataArea_zh();
     }
 
 #endif
@@ -155,15 +123,65 @@ xdictionary::xdictionary(const sal_Char *lang) :
 
 xdictionary::~xdictionary()
 {
-#ifndef DISABLE_DYNLOADING
-        osl_unloadModule(hModule);
-#endif
-        for (sal_Int32 i = 0; i < CACHE_MAX; i++) {
-            if (cache[i].size > 0) {
-                delete [] cache[i].contents;
-                delete [] cache[i].wordboundary;
-            }
+    for (sal_Int32 i = 0; i < CACHE_MAX; i++) {
+        if (cache[i].size > 0) {
+            delete [] cache[i].contents;
+            delete [] cache[i].wordboundary;
+        }
+    }
+}
+
+namespace {
+    struct datacache {
+        oslModule       mhModule;
+        OString         maLang;
+        xdictionarydata maData;
+    };
+}
+
+void xdictionary::initDictionaryData(const sal_Char *pLang)
+{
+    // Global cache, never released for performance
+    static std::vector< datacache > aLoadedCache;
+
+    osl::MutexGuard aGuard( osl::Mutex::getGlobalMutex() );
+    for( size_t i = 0; i < aLoadedCache.size(); ++i )
+    {
+        if( !strcmp( pLang, aLoadedCache[ i ].maLang.getStr() ) )
+        {
+            data = aLoadedCache[ i ].maData;
+            return;
         }
+    }
+
+    // otherwise add to the cache, positive or negative.
+    datacache aEntry;
+    aEntry.maLang = OString( pLang, strlen( pLang ) );
+
+#ifdef SAL_DLLPREFIX
+    OUStringBuffer aBuf( strlen( pLang ) + 7 + 6 );    // mostly "lib*.so" (with * == dict_zh)
+    aBuf.appendAscii( SAL_DLLPREFIX );
+#else
+    OUStringBuffer aBuf( strlen( pLang ) + 7 + 4 );    // mostly "*.dll" (with * == dict_zh)
+#endif
+    aBuf.appendAscii( "dict_" ).appendAscii( pLang ).appendAscii( SAL_DLLEXTENSION );
+    aEntry.mhModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
+    if( aEntry.mhModule ) {
+        sal_IntPtr (*func)();
+        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getExistMark").pData );
+        aEntry.maData.existMark = (sal_uInt8*) (*func)();
+        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getIndex1").pData );
+        aEntry.maData.index1 = (sal_Int16*) (*func)();
+        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getIndex2").pData );
+        aEntry.maData.index2 = (sal_Int32*) (*func)();
+        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getLenArray").pData );
+        aEntry.maData.lenArray = (sal_Int32*) (*func)();
+        func = (sal_IntPtr(*)()) osl_getFunctionSymbol( aEntry.mhModule, OUString("getDataArea").pData );
+        aEntry.maData.dataArea = (sal_Unicode*) (*func)();
+    }
+
+    data = aEntry.maData;
+    aLoadedCache.push_back( aEntry );
 }
 
 void xdictionary::setJapaneseWordBreak()
@@ -173,8 +191,8 @@ void xdictionary::setJapaneseWordBreak()
 
 bool xdictionary::exists(const sal_uInt32 c)
 {
-    // 0x1FFF is the hardcoded limit in gendict for existMarks
-    bool exist = (existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False;
+    // 0x1FFF is the hardcoded limit in gendict for data.existMarks
+    bool exist = (data.existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((data.existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False;
     if (!exist && japaneseWordBreak)
         return BreakIteratorImpl::getScriptClass(c) == ScriptType::ASIAN;
     else
@@ -183,24 +201,23 @@ bool xdictionary::exists(const sal_uInt32 c)
 
 sal_Int32 xdictionary::getLongestMatch(const sal_Unicode* str, sal_Int32 sLen)
 {
+    if ( !data.index1 ) return 0;
 
-    if ( !index1 ) return 0;
-
-    sal_Int16 idx = index1[str[0] >> 8];
+    sal_Int16 idx = data.index1[str[0] >> 8];
 
     if (idx == 0xFF) return 0;
 
     idx = (idx<<8) | (str[0]&0xff);
 
-    sal_uInt32 begin = index2[idx], end = index2[idx+1];
+    sal_uInt32 begin = data.index2[idx], end = data.index2[idx+1];
 
     if (begin == 0) return 0;
 
     str++; sLen--; // first character is not stored in the dictionary
     for (sal_uInt32 i = end; i > begin; i--) {
-        sal_Int32 len = lenArray[i] - lenArray[i - 1];
+        sal_Int32 len = data.lenArray[i] - data.lenArray[i - 1];
         if (sLen >= len) {
-            const sal_Unicode *dstr = dataArea + lenArray[i-1];
+            const sal_Unicode *dstr = data.dataArea + data.lenArray[i-1];
             sal_Int32 pos = 0;
 
             while (pos < len && dstr[pos] == str[pos]) { pos++; }