[Libreoffice-commits] core.git: 2 commits - i18nlangtag/Library_i18nlangtag.mk i18nlangtag/source

Tue Sep 3 05:30:37 PDT 2013

i18nlangtag/Library_i18nlangtag.mk             |    1 
 i18nlangtag/source/languagetag/languagetag.cxx |   91 +++++++++++++++++--------
 2 files changed, 64 insertions(+), 28 deletions(-)

New commits:
commit 7c032aa60eaccde88a9064a80bb69fe8076a040b
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Sep 3 14:17:54 2013 +0200

    resolve all known fallbacks
    
    Needed for rsc during build time to not pull in liblangtag and its data.
    
    Change-Id: I1d4dd32b04ed93ec75720132a30b66ef63fec179

diff --git a/i18nlangtag/Library_i18nlangtag.mk b/i18nlangtag/Library_i18nlangtag.mk
index fd1548b..30b6e01 100644
--- a/i18nlangtag/Library_i18nlangtag.mk
+++ b/i18nlangtag/Library_i18nlangtag.mk
@@ -21,6 +21,7 @@ $(eval $(call gb_Library_use_libraries,i18nlangtag,\
 ))
 
 $(eval $(call gb_Library_use_externals,i18nlangtag,\
+	boost_headers \
 	icu_headers \
 	icuuc \
 ))
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 65236ce..de48d06 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -16,6 +16,7 @@
 #include <osl/file.hxx>
 #include <rtl/instance.hxx>
 #include <rtl/locale.h>
+#include <boost/unordered_set.hpp>
 
 //#define erDEBUG
 
@@ -46,13 +47,44 @@ struct myLtError
     ~myLtError() { if (p) lt_error_unref( p); }
 };
 
-
 // "statics" to be returned as const reference to an empty locale and string.
 namespace {
 struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
 struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
 }
 
+typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet;
+namespace {
+struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
+struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
+}
+
+static const KnownTagSet & getKnowns()
+{
+    KnownTagSet & rKnowns = theKnowns::get();
+    if (rKnowns.empty())
+    {
+        osl::MutexGuard aGuard( theMutex::get());
+        if (rKnowns.empty())
+        {
+            ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
+            for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
+                    it != aDefined.end(); ++it)
+            {
+                // Do not use the BCP47 string here to initialize the
+                // LanguageTag because then canonicalize() would call this
+                // getKnowns() again..
+                ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings());
+                for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
+                {
+                    rKnowns.insert( *fb);
+                }
+            }
+        }
+    }
+    return rKnowns;
+}
+
 
 /** A reference holder for liblangtag data de/initialization, one static
     instance. Currently implemented such that the first "ref" inits and dtor
@@ -448,7 +480,7 @@ bool LanguageTag::canonicalize()
                 // Now this is getting funny.. we only have some BCP47 string
                 // and want to determine if parsing it would be possible
                 // without using liblangtag just to see if it is a simple known
-                // locale.
+                // locale or could fall back to one.
                 OUString aLanguage, aScript, aCountry, aVariants;
                 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
                 if (eExt != EXTRACTED_NONE)
@@ -512,6 +544,12 @@ bool LanguageTag::canonicalize()
                 }
                 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
                     meIsLiblangtagNeeded = DECISION_NO; // known locale
+                else
+                {
+                    const KnownTagSet& rKnowns = getKnowns();
+                    if (rKnowns.find( maBcp47) != rKnowns.end())
+                        meIsLiblangtagNeeded = DECISION_NO; // known fallback
+                }
             }
         }
         if (bTemporaryLocale)
commit c318f19c492f76e3b7d557257b3706f05b6fed62
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Sep 3 11:31:39 2013 +0200

    simplified simpleExtract() and made some cases work
    
    Change-Id: I5d39a020a4bb6164c0d7695d2f2e43785869b345

diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 22f82fe..65236ce 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -1302,9 +1302,9 @@ LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
     Extraction eRet = EXTRACTED_NONE;
     const sal_Int32 nLen = rBcp47.getLength();
     const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
-    const sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
-    const sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
-    const sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
+    sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
+    sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
+    sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
     if (nLen == 1 && rBcp47[0] == '*')              // * the dreaded jolly joker
     {
         // It's f*d up but we need to recognize this.
@@ -1362,45 +1362,42 @@ LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
     else if (  (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15)   // ll-Ssss-CC-vvvv[vvvv][-...]
             || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16))  // lll-Ssss-CC-vvvv[vvvv][-...]
     {
-        if (nHyph4 < 0 || (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9))
+        if (nHyph4 < 0)
+            nHyph4 = rBcp47.getLength();
+        if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
         {
+            rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+            rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
+            rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
             rVariants = rBcp47.copy( nHyph3 + 1);
-            if (nHyph4 < 0 && (rVariants.getLength() < 4 || 8 < rVariants.getLength()))
-            {
-                rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
-                rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
-                rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
-                eRet = EXTRACTED_LV;
-            }
+            eRet = EXTRACTED_LV;
         }
     }
     else if (  (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10)   // ll-CC-vvvv[vvvv][-...]
             || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11))  // lll-CC-vvvv[vvvv][-...]
     {
-        if (nHyph3 < 0 || (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9))
+        if (nHyph3 < 0)
+            nHyph3 = rBcp47.getLength();
+        if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
         {
+            rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+            rScript   = OUString();
+            rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
             rVariants = rBcp47.copy( nHyph2 + 1);
-            if (nHyph3 < 0 && (rVariants.getLength() < 4 || 8 < rVariants.getLength()))
-            {
-                rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
-                rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
-                rScript   = OUString();
-                eRet = EXTRACTED_LV;
-            }
+            eRet = EXTRACTED_LV;
         }
     }
     else if (  (nHyph1 == 2 && nLen >= 8)                   // ll-vvvvv[vvv][-...]
             || (nHyph1 == 3 && nLen >= 9))                  // lll-vvvvv[vvv][-...]
     {
-        if (nHyph2 < 0 || (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9))
+        if (nHyph2 < 0)
+            nHyph2 = rBcp47.getLength();
+        if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
         {
+            rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+            rScript   = rCountry = OUString();
             rVariants = rBcp47.copy( nHyph1 + 1);
-            if (nHyph2 < 0 && (rVariants.getLength() < 5 || 8 < rVariants.getLength()))
-            {
-                rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
-                rScript   = rCountry = OUString();
-                eRet = EXTRACTED_LV;
-            }
+            eRet = EXTRACTED_LV;
         }
     }
     if (eRet == EXTRACTED_NONE)