[Libreoffice-commits] core.git: i18nlangtag/qa i18nlangtag/source include/i18nlangtag

Eike Rathke erack at redhat.com
Thu Oct 17 09:36:27 PDT 2013


 i18nlangtag/qa/cppunit/test_languagetag.cxx    |   24 ++++++++
 i18nlangtag/source/isolang/isolang.cxx         |    1 
 i18nlangtag/source/languagetag/languagetag.cxx |   71 +++++++++++++++++--------
 include/i18nlangtag/lang.h                     |    1 
 4 files changed, 75 insertions(+), 22 deletions(-)

New commits:
commit c450aafad00da4553fc6f7abadb19b4b16df6b88
Author: Eike Rathke <erack at redhat.com>
Date:   Thu Oct 17 18:33:47 2013 +0200

    solve the 'dz' vs 'bo' and 'nn', 'nb' and 'nn' share primary ID cases
    
    Change-Id: I7ccead4493c9848b55f642c2636daa3c60aeb7a6

diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx
index 8a055be..6f7cc2d 100644
--- a/i18nlangtag/qa/cppunit/test_languagetag.cxx
+++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx
@@ -335,13 +335,35 @@ void TestLanguageTag::testAllTags()
         CPPUNIT_ASSERT( aLocale.Variant == "" );
         LanguageType de_LangID = de.getLanguageType();
         CPPUNIT_ASSERT( de_LangID != LANGUAGE_GERMAN );
-        CPPUNIT_ASSERT( MsLangId::getPrimaryLanguage( de_LangID) == MsLangId::getPrimaryLanguage( LANGUAGE_GERMAN) );
+        CPPUNIT_ASSERT( de_LangID == MsLangId::getPrimaryLanguage( LANGUAGE_GERMAN) );
         CPPUNIT_ASSERT( de.makeFallback().getBcp47() == "de-DE");
         // Check registered mapping.
         LanguageTag de_l( de_LangID);
         CPPUNIT_ASSERT( de_l.getBcp47() == s_de );
     }
 
+    // "bo" and "dz" share the same primary language ID, only one gets it
+    // assigned, "dz" language-only has a special mapping.
+    {
+        LanguageTag bo( "bo", true );
+        CPPUNIT_ASSERT( bo.getLanguageType() == MsLangId::getPrimaryLanguage( LANGUAGE_TIBETAN) );
+        LanguageTag dz( "dz", true );
+        CPPUNIT_ASSERT( dz.getLanguageType() == LANGUAGE_USER_DZONGKHA_MAP_LONLY );
+    }
+
+    // "no", "nb" and "nn" share the same primary language ID, which even is
+    // assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly.
+    {
+        LanguageTag no( "no", true );
+        CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( no.getLanguageType()) );
+        LanguageTag nb( "nb", true );
+        CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nb.getLanguageType()) );
+        LanguageTag nn( "nn", true );
+        CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nn.getLanguageType()) );
+        LanguageTag no_NO( "no-NO", true );
+        CPPUNIT_ASSERT( no_NO.getLanguageType() == LANGUAGE_NORWEGIAN );
+    }
+
     // 'de-1901' derived from 'de-DE-1901' grandfathered to check that it is
     // accepted as (DIGIT 3ALNUM) variant
     {
diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx
index 25e8eb2..fb75395 100644
--- a/i18nlangtag/source/isolang/isolang.cxx
+++ b/i18nlangtag/source/isolang/isolang.cxx
@@ -352,6 +352,7 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
     { LANGUAGE_TIBETAN,                     "bo", "CN", false },   // CN politically correct?
     { LANGUAGE_USER_TIBETAN_INDIA,          "bo", "IN", false },
     { LANGUAGE_DZONGKHA,                    "dz", "BT", false },
+    { LANGUAGE_USER_DZONGKHA_MAP_LONLY,     "dz", ""  , false },    // because of the MS error, see lang.h
     { LANGUAGE_TURKMEN,                     "tk", "TM", false },
     { LANGUAGE_WELSH,                       "cy", "GB", false },
     { LANGUAGE_SESOTHO,                     "st", "ZA", false },
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index fc1f5b5..4f8af9a 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -309,8 +309,10 @@ private:
     /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
 
         @param  nRegisterID
-                If not 0 and not LANGUAGE_DONTKNOW, use that ID instead of
-                generating an on-the-fly ID.
+                If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
+                instead of generating an on-the-fly ID. Implementation may
+                still generate an ID if the suggested ID is already used for
+                another language tag.
 
         @return NULL if no ID could be obtained or registration failed.
      */
@@ -616,10 +618,10 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
 
     osl::MutexGuard aGuard( theMutex::get());
 
-    MapBcp47& rMap = theMapBcp47::get();
-    MapBcp47::const_iterator it( rMap.find( maBcp47));
+    MapBcp47& rMapBcp47 = theMapBcp47::get();
+    MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
     bool bOtherImpl = false;
-    if (it != rMap.end())
+    if (it != rMapBcp47.end())
     {
         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
         pImpl = (*it).second;
@@ -639,23 +641,47 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
     {
         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
         pImpl.reset( new LanguageTagImpl( *this));
-        rMap.insert( ::std::make_pair( maBcp47, pImpl));
+        rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
     }
 
     if (!bOtherImpl || !pImpl->mbInitializedLangID)
     {
-        LanguageType nLang = ((nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW) ?
-                getNextOnTheFlyLanguage() : nRegisterID);
-        if (!nLang)
+        if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
+            nRegisterID = getNextOnTheFlyLanguage();
+        else
+        {
+            // Accept a suggested ID only if it is not mapped yet to something
+            // different, otherwise we would end up with ambiguous assignments
+            // of different language tags, for example for the same primary
+            // LangID with "no", "nb" and "nn".
+            const MapLangID& rMapLangID = theMapLangID::get();
+            MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
+            if (itID != rMapLangID.end())
+            {
+                if ((*itID).second->maBcp47 != maBcp47)
+                {
+                    SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
+                            << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
+                            << (*itID).second->maBcp47 << "'");
+                    nRegisterID = getNextOnTheFlyLanguage();
+                }
+                else
+                {
+                    SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
+                            << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
+                }
+            }
+        }
+        if (!nRegisterID)
         {
             // out of IDs, nothing to register
             return pImpl;
         }
-        pImpl->mnLangID = nLang;
+        pImpl->mnLangID = nRegisterID;
         pImpl->mbInitializedLangID = true;
         if (pImpl.get() != this)
         {
-            mnLangID = nLang;
+            mnLangID = nRegisterID;
             mbInitializedLangID = true;
         }
     }
@@ -678,6 +704,13 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
 }
 
 
+static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
+{
+    return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
+        (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
+}
+
+
 LanguageTag::ImplPtr LanguageTag::registerImpl() const
 {
     // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
@@ -884,10 +917,8 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
                 if (!pImpl->mbInitializedLangID)
                     pImpl->convertLocaleToLang( true);
                 // Unconditionally insert (round-trip is possible) for
-                // on-the-fly IDs and (generated or not) primary language IDs.
-                bool bInsert = (pImpl->mnLangID != LANGUAGE_DONTKNOW &&
-                        (LanguageTag::isOnTheFlyID( pImpl->mnLangID) ||
-                         (pImpl->mnLangID == MsLangId::getPrimaryLanguage( pImpl->mnLangID))));
+                // on-the-fly IDs and (generated or not) suggested IDs.
+                bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
                 OUString aBcp47;
                 if (!bInsert)
                 {
@@ -1309,8 +1340,8 @@ void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
             if (isValidBcp47())
             {
                 // For language-only (including script) look if we know some
-                // locale of that language and if so use the primary language
-                // ID of that instead of generating an on-the-fly-ID.
+                // locale of that language and if so try to use the primary
+                // language ID of that instead of generating an on-the-fly ID.
                 if (getCountry().isEmpty() && isIsoODF())
                 {
                     lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
@@ -1319,10 +1350,8 @@ void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
                     if (aLoc.Language != "en" || getLanguage() == "en")
                     {
                         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
-                        // LANGUAGE_DONTKNOW is all bits of primary language,
-                        // so this is ok even if the conversion failed, which
-                        // it should not anyway..
-                        mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
+                        if (mnLangID != LANGUAGE_DONTKNOW)
+                            mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
                     }
                 }
                 registerOnTheFly( mnLangID);
diff --git a/include/i18nlangtag/lang.h b/include/i18nlangtag/lang.h
index 29a3cd1..51e4e56 100644
--- a/include/i18nlangtag/lang.h
+++ b/include/i18nlangtag/lang.h
@@ -446,6 +446,7 @@ typedef unsigned short LanguageType;
 #define LANGUAGE_USER_KURDISH_IRAN          0x1226  /* sublang 0x04, Arabic script */
 #define LANGUAGE_USER_SARDINIAN             0x0627
 /* was reserved for Dzongkha but turned down with #i53497#: 0x0628 */  /* obsoleted by LANGUAGE_DZONGKHA */
+#define LANGUAGE_USER_DZONGKHA_MAP_LONLY    0xF851  /* to map "dz" only, because of the MS error, and preserve CTL information, sub 0x3e */
 #define LANGUAGE_USER_SWAHILI_TANZANIA      0x8041  /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_SWAHILI)) */
 #define LANGUAGE_OBSOLETE_USER_BRETON       0x0629
 #define LANGUAGE_USER_BRETON                LANGUAGE_BRETON_FRANCE


More information about the Libreoffice-commits mailing list