[Libreoffice-commits] .: Branch 'libreoffice-4-0' - 2 commits - i18npool/inc i18npool/qa i18npool/source lingucomponent/source

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Tue Dec 11 04:11:32 PST 2012


 i18npool/inc/i18npool/languagetag.hxx       |   28 ++
 i18npool/qa/cppunit/test_languagetag.cxx    |   42 +++
 i18npool/source/languagetag/languagetag.cxx |  317 ++++++++++++++++++++++------
 lingucomponent/source/lingutil/lingutil.cxx |   21 -
 4 files changed, 323 insertions(+), 85 deletions(-)

New commits:
commit c98a5f1a446196b5158c67d89931470a0c701bcd
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Dec 11 12:57:52 2012 +0100

    do not trigger liblangtag init during startup
    
    Initializing the liblangtag database stuff takes much percentage of
    startup time, avoid that.
    
    Change-Id: Ieb12ff71c2b1421a5644acebaef4c8366694c249
    (cherry picked from commit 90874ab3c75fd161a672a24538cbce909f284e97)

diff --git a/lingucomponent/source/lingutil/lingutil.cxx b/lingucomponent/source/lingutil/lingutil.cxx
index 5edd78a..194f0b7 100644
--- a/lingucomponent/source/lingutil/lingutil.cxx
+++ b/lingucomponent/source/lingutil/lingutil.cxx
@@ -147,20 +147,15 @@ std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicTy
                     nStartIndex - aSystemPrefix.getLength());
                 if (sChunk.isEmpty())
                     continue;
-                //We prefer (now) to use language tags
+                // We prefer (now) to use language tags.
+                // Avoid feeding in the older LANG_REGION scheme to the BCP47
+                // ctor as that triggers use of liblangtag and initializes its
+                // database which we do not want during startup. Convert
+                // instead.
+                sal_Int32 nPos;
+                if (sChunk.indexOf('-') < 0 && ((nPos = sChunk.indexOf('_')) > 0))
+                    sChunk = sChunk.replaceAt( nPos, 1, OUString('-'));
                 LanguageTag aLangTag(sChunk, true);
-                //On failure try older basic LANG_REGION scheme
-                if (!aLangTag.isValidBcp47())
-                {
-                    sal_Int32 nIndex = 0;
-                    OUString sLang = sChunk.getToken(0, '_', nIndex);
-                    if (!sLang.getLength())
-                        continue;
-                    OUString sRegion;
-                    if (nIndex != -1)
-                       sRegion = sChunk.copy(nIndex);
-                    aLangTag = LanguageTag(sLang, sRegion);
-                }
                 if (!aLangTag.isValidBcp47())
                     continue;
 
commit 19095016e005bc084a1b032ac5c70abf82178e02
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Dec 11 11:46:19 2012 +0100

    avoid liblangtag processing for known locales, and know 'qtz'
    
    For "simple" known (to us) locales avoid the overhead of liblangtag,
    also in preparation of not having to initialize its database during
    startup.
    
    This also enables recognition of the 'qtz' pseudolocale for key ID
    resources again, see fdo#57413
    
    Change-Id: Id52a1ddc37b42063e3bf68a6dbeaeacfbfa704ef
    (cherry picked from commit 4d24507451fbc2b7d72cf5f7048a424d52b63850)

diff --git a/i18npool/inc/i18npool/languagetag.hxx b/i18npool/inc/i18npool/languagetag.hxx
index 0b3eedd..bb86b72 100644
--- a/i18npool/inc/i18npool/languagetag.hxx
+++ b/i18npool/inc/i18npool/languagetag.hxx
@@ -229,6 +229,7 @@ private:
     mutable Decision                        meIsValid;
     mutable Decision                        meIsIsoLocale;
     mutable Decision                        meIsIsoODF;
+    mutable Decision                        meIsLiblangtagNeeded;   ///< whether processing with liblangtag needed
             bool                            mbSystemLocale      : 1;
     mutable bool                            mbInitializedBcp47  : 1;
     mutable bool                            mbInitializedLocale : 1;
@@ -245,17 +246,36 @@ private:
     void    convertLangToLocale();
     void    convertLangToBcp47();
 
-    bool    canonicalize() const;
+    bool    canonicalize();
 
-    rtl::OUString   getLanguageFromLangtag() const;
-    rtl::OUString   getScriptFromLangtag() const;
-    rtl::OUString   getRegionFromLangtag() const;
+    rtl::OUString   getLanguageFromLangtag();
+    rtl::OUString   getScriptFromLangtag();
+    rtl::OUString   getRegionFromLangtag();
 
     void            resetVars();
 
+    /** Obtain Language, Script and Country via simpleExtract() and assign them
+        to the cached variables if successful.
+
+        @return return of simpleExtract()
+     */
+    bool            cacheSimpleLSC();
+
     static bool     isIsoLanguage( const rtl::OUString& rLanguage );
     static bool     isIsoScript( const rtl::OUString& rScript );
     static bool     isIsoCountry( const rtl::OUString& rRegion );
+
+    /** Of a simple language tag of the form lll[-Ssss][-CC] (i.e. one that
+        would fulfill the isIsoODF() condition) extract the portions.
+
+        Does not check case or content!
+
+        @return TRUE if it detected a simple tag, else FALSE.
+     */
+    static bool     simpleExtract( const rtl::OUString& rBcp47,
+                                   rtl::OUString& rLanguage,
+                                   rtl::OUString& rScript,
+                                   rtl::OUString& rCountry );
 };
 
 #endif  // INCLUDED_I18NPOOL_LANGUAGETAG_HXX
diff --git a/i18npool/qa/cppunit/test_languagetag.cxx b/i18npool/qa/cppunit/test_languagetag.cxx
index deaeecd..f748802 100644
--- a/i18npool/qa/cppunit/test_languagetag.cxx
+++ b/i18npool/qa/cppunit/test_languagetag.cxx
@@ -61,13 +61,21 @@ void TestLanguageTag::testAllTags()
         CPPUNIT_ASSERT( aLocale.Country == "DE" );
         CPPUNIT_ASSERT( aLocale.Variant == "" );
         CPPUNIT_ASSERT( nLanguageType == LANGUAGE_GERMAN );
+        CPPUNIT_ASSERT( de_DE.getLanguage() == "de" );
+        CPPUNIT_ASSERT( de_DE.getCountry() == "DE" );
+        CPPUNIT_ASSERT( de_DE.getScript() == "" );
+        CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de" );
 #else
         // The simple replacement code doesn't do any fancy stuff.
         CPPUNIT_ASSERT_MESSAGE("Default script was stripped after canonicalize!?!", aBcp47 == s_de_Latn_DE );
         CPPUNIT_ASSERT( aLocale.Language == "qlt" );
         CPPUNIT_ASSERT( aLocale.Country == "DE" );
         CPPUNIT_ASSERT( aLocale.Variant == "de-Latn-DE" );
-        (void)nLanguageType; //XXX CPPUNIT_ASSERT( nLanguageType == LANGUAGE_GERMAN );
+        CPPUNIT_ASSERT( nLanguageType == LANGUAGE_SYSTEM );     // XXX not resolved!
+        CPPUNIT_ASSERT( de_DE.getLanguage() == "de" );
+        CPPUNIT_ASSERT( de_DE.getCountry() == "DE" );
+        CPPUNIT_ASSERT( de_DE.getScript() == "Latn" );
+        CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de-Latn" );
 #endif
     }
 
@@ -127,6 +135,10 @@ void TestLanguageTag::testAllTags()
         CPPUNIT_ASSERT( sr_RS.isValidBcp47() == true );
         CPPUNIT_ASSERT( sr_RS.isIsoLocale() == false );
         CPPUNIT_ASSERT( sr_RS.isIsoODF() == true );
+        CPPUNIT_ASSERT( sr_RS.getLanguage() == "sr" );
+        CPPUNIT_ASSERT( sr_RS.getCountry() == "RS" );
+        CPPUNIT_ASSERT( sr_RS.getScript() == "Latn" );
+        CPPUNIT_ASSERT( sr_RS.getLanguageAndScript() == "sr-Latn" );
     }
 
     {
@@ -141,6 +153,10 @@ void TestLanguageTag::testAllTags()
         CPPUNIT_ASSERT( de_DE.isValidBcp47() == true );
         CPPUNIT_ASSERT( de_DE.isIsoLocale() == true );
         CPPUNIT_ASSERT( de_DE.isIsoODF() == true );
+        CPPUNIT_ASSERT( de_DE.getLanguage() == "de" );
+        CPPUNIT_ASSERT( de_DE.getCountry() == "DE" );
+        CPPUNIT_ASSERT( de_DE.getScript() == "" );
+        CPPUNIT_ASSERT( de_DE.getLanguageAndScript() == "de" );
     }
 
     {
@@ -165,6 +181,30 @@ void TestLanguageTag::testAllTags()
         CPPUNIT_ASSERT( de_DE.getLanguageType() == LANGUAGE_GERMAN );
     }
 
+    // 'qtz' is a local use known pseudolocale for key ID resource
+    {
+        OUString s_qtz( "qtz" );
+        LanguageTag qtz( s_qtz );
+        lang::Locale aLocale = qtz.getLocale();
+        CPPUNIT_ASSERT( qtz.getBcp47() == s_qtz );
+        CPPUNIT_ASSERT( aLocale.Language == "qtz" );
+        CPPUNIT_ASSERT( aLocale.Country == "" );
+        CPPUNIT_ASSERT( aLocale.Variant == "" );
+        CPPUNIT_ASSERT( qtz.getLanguageType() == LANGUAGE_USER_KEYID );
+    }
+
+    // 'qty' is a local use unknown locale
+    {
+        OUString s_qty( "qty" );
+        LanguageTag qty( s_qty );
+        lang::Locale aLocale = qty.getLocale();
+        CPPUNIT_ASSERT( qty.getBcp47() == s_qty );
+        CPPUNIT_ASSERT( aLocale.Language == "qty" );
+        CPPUNIT_ASSERT( aLocale.Country == "" );
+        CPPUNIT_ASSERT( aLocale.Variant == "" );
+        CPPUNIT_ASSERT( qty.getLanguageType() == LANGUAGE_SYSTEM );
+    }
+
     // test reset() methods
     {
         LanguageTag aTag( LANGUAGE_DONTKNOW );
diff --git a/i18npool/source/languagetag/languagetag.cxx b/i18npool/source/languagetag/languagetag.cxx
index 94cf2e1..bab2443 100644
--- a/i18npool/source/languagetag/languagetag.cxx
+++ b/i18npool/source/languagetag/languagetag.cxx
@@ -99,6 +99,7 @@ LiblantagDataRef::~LiblantagDataRef()
 
 void LiblantagDataRef::setup()
 {
+    SAL_INFO( "i18npool.langtag", "LiblantagDataRef::setup: initializing database");
     if (maDataPath.isEmpty())
         setupDataPath();
     lt_db_initialize();
@@ -108,6 +109,7 @@ void LiblantagDataRef::setup()
 
 void LiblantagDataRef::teardown()
 {
+    SAL_INFO( "i18npool.langtag", "LiblantagDataRef::teardown: finalizing database");
     lt_db_finalize();
 }
 
@@ -142,6 +144,7 @@ LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonic
         meIsValid( DECISION_DONTKNOW),
         meIsIsoLocale( DECISION_DONTKNOW),
         meIsIsoODF( DECISION_DONTKNOW),
+        meIsLiblangtagNeeded( DECISION_DONTKNOW),
         mbSystemLocale( rBcp47LanguageTag.isEmpty()),
         mbInitializedBcp47( !mbSystemLocale),
         mbInitializedLocale( false),
@@ -151,8 +154,6 @@ LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonic
         mbCachedCountry( false),
         mbIsFallback( false)
 {
-    theDataRef::get().incRef();
-
     if (bCanonicalize)
         canonicalize();
 }
@@ -166,6 +167,7 @@ LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
         meIsValid( DECISION_DONTKNOW),
         meIsIsoLocale( DECISION_DONTKNOW),
         meIsIsoODF( DECISION_DONTKNOW),
+        meIsLiblangtagNeeded( DECISION_DONTKNOW),
         mbSystemLocale( rLocale.Language.isEmpty()),
         mbInitializedBcp47( false),
         mbInitializedLocale( !mbSystemLocale),
@@ -175,7 +177,6 @@ LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
         mbCachedCountry( false),
         mbIsFallback( false)
 {
-    theDataRef::get().incRef();
 }
 
 
@@ -186,6 +187,7 @@ LanguageTag::LanguageTag( LanguageType nLanguage )
         meIsValid( DECISION_DONTKNOW),
         meIsIsoLocale( DECISION_DONTKNOW),
         meIsIsoODF( DECISION_DONTKNOW),
+        meIsLiblangtagNeeded( DECISION_DONTKNOW),
         mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
         mbInitializedBcp47( false),
         mbInitializedLocale( false),
@@ -195,7 +197,6 @@ LanguageTag::LanguageTag( LanguageType nLanguage )
         mbCachedCountry( false),
         mbIsFallback( false)
 {
-    theDataRef::get().incRef();
 }
 
 
@@ -207,6 +208,7 @@ LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& r
         meIsValid( DECISION_DONTKNOW),
         meIsIsoLocale( DECISION_DONTKNOW),
         meIsIsoODF( DECISION_DONTKNOW),
+        meIsLiblangtagNeeded( DECISION_DONTKNOW),
         mbSystemLocale( rLanguage.isEmpty()),
         mbInitializedBcp47( false),
         mbInitializedLocale( !mbSystemLocale),
@@ -216,7 +218,6 @@ LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& r
         mbCachedCountry( false),
         mbIsFallback( false)
 {
-    theDataRef::get().incRef();
 }
 
 
@@ -228,6 +229,7 @@ LanguageTag::LanguageTag( const rtl_Locale & rLocale )
         meIsValid( DECISION_DONTKNOW),
         meIsIsoLocale( DECISION_DONTKNOW),
         meIsIsoODF( DECISION_DONTKNOW),
+        meIsLiblangtagNeeded( DECISION_DONTKNOW),
         mbSystemLocale( maLocale.Language.isEmpty()),
         mbInitializedBcp47( false),
         mbInitializedLocale( !mbSystemLocale),
@@ -237,7 +239,6 @@ LanguageTag::LanguageTag( const rtl_Locale & rLocale )
         mbCachedCountry( false),
         mbIsFallback( false)
 {
-    theDataRef::get().incRef();
 }
 
 
@@ -254,6 +255,7 @@ LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
         meIsValid( rLanguageTag.meIsValid),
         meIsIsoLocale( rLanguageTag.meIsIsoLocale),
         meIsIsoODF( rLanguageTag.meIsIsoODF),
+        meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded),
         mbSystemLocale( rLanguageTag.mbSystemLocale),
         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
@@ -263,7 +265,8 @@ LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
         mbCachedCountry( rLanguageTag.mbCachedCountry),
         mbIsFallback( rLanguageTag.mbIsFallback)
 {
-    theDataRef::get().incRef();
+    if (mpImplLangtag)
+        theDataRef::get().incRef();
 }
 
 
@@ -281,6 +284,7 @@ LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
     meIsValid           = rLanguageTag.meIsValid;
     meIsIsoLocale       = rLanguageTag.meIsIsoLocale;
     meIsIsoODF          = rLanguageTag.meIsIsoODF;
+    meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded;
     mbSystemLocale      = rLanguageTag.mbSystemLocale;
     mbInitializedBcp47  = rLanguageTag.mbInitializedBcp47;
     mbInitializedLocale = rLanguageTag.mbInitializedLocale;
@@ -289,22 +293,30 @@ LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
     mbCachedScript      = rLanguageTag.mbCachedScript;
     mbCachedCountry     = rLanguageTag.mbCachedCountry;
     mbIsFallback        = rLanguageTag.mbIsFallback;
+    if (mpImplLangtag)
+        theDataRef::get().incRef();
     return *this;
 }
 
 
 LanguageTag::~LanguageTag()
 {
-    lt_tag_unref( MPLANGTAG);
-
-    theDataRef::get().decRef();
+    if (mpImplLangtag)
+    {
+        lt_tag_unref( MPLANGTAG);
+        theDataRef::get().decRef();
+    }
 }
 
 
 void LanguageTag::resetVars()
 {
-    lt_tag_unref( MPLANGTAG);
-    mpImplLangtag = NULL;
+    if (mpImplLangtag)
+    {
+        lt_tag_unref( MPLANGTAG);
+        mpImplLangtag = NULL;
+        theDataRef::get().decRef();
+    }
 
     maLocale            = lang::Locale();
     if (!maBcp47.isEmpty())
@@ -319,6 +331,7 @@ void LanguageTag::resetVars()
     meIsValid           = DECISION_DONTKNOW;
     meIsIsoLocale       = DECISION_DONTKNOW;
     meIsIsoODF          = DECISION_DONTKNOW;
+    meIsLiblangtagNeeded= DECISION_DONTKNOW;
     mbSystemLocale      = true;
     mbInitializedBcp47  = false;
     mbInitializedLocale = false;
@@ -360,7 +373,7 @@ void LanguageTag::reset( LanguageType nLanguage )
 }
 
 
-bool LanguageTag::canonicalize() const
+bool LanguageTag::canonicalize()
 {
 #ifdef erDEBUG
     // dump once
@@ -373,9 +386,99 @@ bool LanguageTag::canonicalize() const
     dumper aDumper( &mpImplLangtag);
 #endif
 
-    getBcp47( true );   // side effect: have maBcp47 in any case, resolved system
+    // Side effect: have maBcp47 in any case, resolved system.
+    // Some methods calling canonicalize() (or not calling it due to
+    // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
+    // meIsLiblangtagNeeded anywhere else than hereafter.
+    getBcp47( true );
+
+    // The simple cases and known locales don't need liblangtag processing,
+    // which also avoids loading liblangtag data on startup.
+    if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
+    {
+        bool bTemporaryLocale = false;
+        bool bTemporaryLangID = false;
+        if (!mbInitializedLocale && !mbInitializedLangID)
+        {
+            if (mbSystemLocale)
+            {
+                mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+                mbInitializedLangID = true;
+            }
+            else
+            {
+                // Now this is getting funny.. we only have some BCP47 string
+                // and want to determine if parsing it would be possible
+                // without using liblangtag just to see if it is a simple known
+                // locale.
+                OUString aLanguage, aScript, aCountry;
+                if (simpleExtract( maBcp47, aLanguage, aScript, aCountry))
+                {
+                    if (aScript.isEmpty())
+                    {
+                        maLocale.Language = aLanguage;
+                        maLocale.Country  = aCountry;
+                    }
+                    else
+                    {
+                        maLocale.Language = ISO639_LANGUAGE_TAG;
+                        maLocale.Country  = aCountry;
+                        maLocale.Variant  = maBcp47;
+                    }
+                    bTemporaryLocale = mbInitializedLocale = true;
+                }
+            }
+        }
+        if (mbInitializedLangID && !mbInitializedLocale)
+        {
+            // Do not call getLocale() here because that prefers
+            // convertBcp47ToLocale() which would end up in recursion via
+            // isIsoLocale()!
+
+            // Prepare to verify that we have a known locale, not just an
+            // arbitrary MS-LangID.
+            convertLangToLocale();
+        }
+        if (mbInitializedLocale)
+        {
+            if (maLocale.Variant.isEmpty())
+                meIsLiblangtagNeeded = DECISION_NO;     // per definition ll[l][-CC]
+            else
+            {
+                if (!mbInitializedLangID)
+                {
+                    convertLocaleToLang();
+                    if (bTemporaryLocale)
+                        bTemporaryLangID = true;
+                }
+                if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
+                    meIsLiblangtagNeeded = DECISION_NO; // known locale
+            }
+        }
+        if (bTemporaryLocale)
+        {
+            mbInitializedLocale = false;
+            maLocale = lang::Locale();
+        }
+        if (bTemporaryLangID)
+        {
+            mbInitializedLangID = false;
+            mnLangID = LANGUAGE_DONTKNOW;
+        }
+    }
+    if (meIsLiblangtagNeeded == DECISION_NO)
+    {
+        meIsValid = DECISION_YES;   // really, known must be valid ...
+        return true;                // that's it
+    }
+    meIsLiblangtagNeeded = DECISION_YES;
+    SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47);
+
     if (!mpImplLangtag)
+    {
+        theDataRef::get().incRef();
         mpImplLangtag = lt_tag_new();
+    }
 
     // ensure error is free'd
     struct myerror
@@ -545,66 +648,90 @@ const rtl::OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
 }
 
 
-rtl::OUString LanguageTag::getLanguageFromLangtag() const
+rtl::OUString LanguageTag::getLanguageFromLangtag()
 {
-    rtl::OUString aLanguage;
-    if (!mpImplLangtag)
+    OUString aLanguage;
+    if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
         canonicalize();
     if (maBcp47.isEmpty())
         return aLanguage;
-    const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
-    SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
-    if (!pLangT)
-        return aLanguage;
-    const char* pLang = lt_lang_get_tag( pLangT);
-    SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
-    if (pLang)
-        aLanguage = OUString::createFromAscii( pLang);
+    if (mpImplLangtag)
+    {
+        const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
+        SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
+        if (!pLangT)
+            return aLanguage;
+        const char* pLang = lt_lang_get_tag( pLangT);
+        SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
+        if (pLang)
+            aLanguage = OUString::createFromAscii( pLang);
+    }
+    else
+    {
+        if (mbCachedLanguage || cacheSimpleLSC())
+            aLanguage = maCachedLanguage;
+    }
     return aLanguage;
 }
 
 
-rtl::OUString LanguageTag::getScriptFromLangtag() const
+rtl::OUString LanguageTag::getScriptFromLangtag()
 {
-    rtl::OUString aScript;
-    if (!mpImplLangtag)
+    OUString aScript;
+    if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
         canonicalize();
     if (maBcp47.isEmpty())
         return aScript;
-    const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
-    // pScriptT==NULL is valid for default scripts
-    if (!pScriptT)
-        return aScript;
-    const char* pScript = lt_script_get_tag( pScriptT);
-    SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
-    if (pScript)
-        aScript = OUString::createFromAscii( pScript);
+    if (mpImplLangtag)
+    {
+        const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
+        // pScriptT==NULL is valid for default scripts
+        if (!pScriptT)
+            return aScript;
+        const char* pScript = lt_script_get_tag( pScriptT);
+        SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
+        if (pScript)
+            aScript = OUString::createFromAscii( pScript);
+    }
+    else
+    {
+        if (mbCachedScript || cacheSimpleLSC())
+            aScript = maCachedScript;
+    }
     return aScript;
 }
 
 
-rtl::OUString LanguageTag::getRegionFromLangtag() const
+rtl::OUString LanguageTag::getRegionFromLangtag()
 {
-    rtl::OUString aRegion;
-    if (!mpImplLangtag)
+    OUString aRegion;
+    if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
         canonicalize();
     if (maBcp47.isEmpty())
         return aRegion;
-    const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
-    // pRegionT==NULL is valid for language only tags, rough check here that
-    // does not take sophisticated tags into account that actually should have
-    // a region, check for ll, lll, ll-Ssss and lll-Ssss so that ll-CC and
-    // lll-CC actually fail.
-    SAL_WARN_IF( !pRegionT &&
-            maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
-            maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
-            "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
-    if (!pRegionT)
-        return aRegion;
-    const char* pRegion = lt_region_get_tag( pRegionT);
-    SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
-    if (pRegion)
-        aRegion = OUString::createFromAscii( pRegion);
+    if (mpImplLangtag)
+    {
+        const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
+        // pRegionT==NULL is valid for language only tags, rough check here
+        // that does not take sophisticated tags into account that actually
+        // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
+        // that ll-CC and lll-CC actually fail.
+        SAL_WARN_IF( !pRegionT &&
+                maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
+                maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
+                "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
+        if (!pRegionT)
+            return aRegion;
+        const char* pRegion = lt_region_get_tag( pRegionT);
+        SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
+        if (pRegion)
+            aRegion = OUString::createFromAscii( pRegion);
+    }
+    else
+    {
+        if (mbCachedCountry || cacheSimpleLSC())
+            aRegion = maCachedCountry;
+    }
     return aRegion;
 }
 
@@ -681,7 +808,7 @@ bool LanguageTag::isIsoLanguage( const rtl::OUString& rLanguage )
     SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
                 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
             (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag",
-            "LanguageTag::isIsoLanguage: rejecting upper case");
+            "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
     return false;
 }
 
@@ -694,7 +821,7 @@ bool LanguageTag::isIsoCountry( const rtl::OUString& rRegion )
             (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
         return true;
     SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
-            "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case");
+            "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
     return false;
 }
 
@@ -711,7 +838,7 @@ bool LanguageTag::isIsoScript( const rtl::OUString& rScript )
     SAL_WARN_IF( rScript.getLength() == 4 &&
             (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
              isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
-            "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch");
+            "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
     return false;
 }
 
@@ -720,7 +847,7 @@ rtl::OUString LanguageTag::getLanguage() const
 {
     if (!mbCachedLanguage)
     {
-        maCachedLanguage = getLanguageFromLangtag();
+        maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag();
         mbCachedLanguage = true;
     }
     return maCachedLanguage;
@@ -731,7 +858,7 @@ rtl::OUString LanguageTag::getScript() const
 {
     if (!mbCachedScript)
     {
-        maCachedScript = getScriptFromLangtag();
+        maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag();
         mbCachedScript = true;
     }
     return maCachedScript;
@@ -756,7 +883,7 @@ rtl::OUString LanguageTag::getCountry() const
 {
     if (!mbCachedCountry)
     {
-        maCachedCountry = getRegionFromLangtag();
+        maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag();
         if (!isIsoCountry( maCachedCountry))
             maCachedCountry = OUString();
         mbCachedCountry = true;
@@ -767,7 +894,22 @@ rtl::OUString LanguageTag::getCountry() const
 
 rtl::OUString LanguageTag::getRegion() const
 {
-    return getRegionFromLangtag();
+    return const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+}
+
+
+bool LanguageTag::cacheSimpleLSC()
+{
+    OUString aLanguage, aScript, aCountry;
+    bool bRet = simpleExtract( maBcp47, aLanguage, aScript, aCountry);
+    if (bRet)
+    {
+        maCachedLanguage = aLanguage;
+        maCachedScript   = aScript;
+        maCachedCountry  = aCountry;
+        mbCachedLanguage = mbCachedScript = mbCachedCountry = true;
+    }
+    return bRet;
 }
 
 
@@ -775,8 +917,8 @@ bool LanguageTag::isIsoLocale() const
 {
     if (meIsIsoLocale == DECISION_DONTKNOW)
     {
-        if (!mpImplLangtag)
-            canonicalize();
+        if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+            const_cast<LanguageTag*>(this)->canonicalize();
         // It must be at most ll-CC or lll-CC
         // Do not use getCountry() here, use getRegion() instead.
         meIsIsoLocale = ((maBcp47.isEmpty() ||
@@ -791,8 +933,8 @@ bool LanguageTag::isIsoODF() const
 {
     if (meIsIsoODF == DECISION_DONTKNOW)
     {
-        if (!mpImplLangtag)
-            canonicalize();
+        if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+            const_cast<LanguageTag*>(this)->canonicalize();
         if (!isIsoScript( getScript()))
             return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
         // The usual case is lll-CC so simply check that first.
@@ -812,8 +954,8 @@ bool LanguageTag::isValidBcp47() const
 {
     if (meIsValid == DECISION_DONTKNOW)
     {
-        if (!mpImplLangtag)
-           canonicalize();
+        if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+           const_cast<LanguageTag*>(this)->canonicalize();
         SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag",
                 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
     }
@@ -866,4 +1008,45 @@ bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
 }
 
 
+// static
+bool LanguageTag::simpleExtract( const rtl::OUString& rBcp47,
+                                 rtl::OUString& rLanguage,
+                                 rtl::OUString& rScript,
+                                 rtl::OUString& rCountry )
+{
+    bool bRet = false;
+    const sal_Int32 nLen = rBcp47.getLength();
+    const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
+    if ((nLen == 2 || nLen == 3) && nHyph1 < 0)     // ll or lll
+    {
+        rLanguage = rBcp47;
+        rScript = rCountry = OUString();
+        bRet = true;
+    }
+    else if (  (nLen == 5 && nHyph1 == 2)           // ll-CC
+            || (nLen == 6 && nHyph1 == 3))          // lll-CC
+    {
+        rLanguage = rBcp47.copy( 0, nHyph1);
+        rCountry  = rBcp47.copy( nHyph1 + 1, 2);
+        rScript = OUString();
+        bRet = true;
+    }
+    else if (  (nHyph1 == 2 && nLen == 10)          // ll-Ssss-CC check
+            || (nHyph1 == 3 && nLen == 11))         // lll-Ssss-CC check
+    {
+        const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1);
+        if (nHyph2 == nHyph1 + 5)
+        {
+            rLanguage = rBcp47.copy( 0, nHyph1);
+            rScript   = rBcp47.copy( nHyph1 + 1, 4);
+            rCountry  = rBcp47.copy( nHyph2 + 1, 2);
+            bRet = true;
+        }
+    }
+    if (!bRet)
+        rLanguage = rScript = rCountry = OUString();
+    return bRet;
+}
+
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */


More information about the Libreoffice-commits mailing list