[Libreoffice-commits] core.git: 2 commits - i18nlangtag/qa i18nlangtag/source

Eike Rathke erack at redhat.com
Tue Sep 12 14:58:20 UTC 2017


 i18nlangtag/qa/cppunit/test_languagetag.cxx    |   51 +++++++++++++++++++++++++
 i18nlangtag/source/languagetag/languagetag.cxx |   34 ++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)

New commits:
commit 4f2a06379dde3839a71a52e81c2ca09aaa9a41c9
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Sep 12 16:57:20 2017 +0200

    Unit tests for 'es_ES_u_co_trad' and 'es_ES_tradnl', tdf#83190
    
    Change-Id: Iac2b892f3b3e4146101a99d1febb6110c83e56ed

diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx
index e9ef5739fabd..fba1550c23a4 100644
--- a/i18nlangtag/qa/cppunit/test_languagetag.cxx
+++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx
@@ -436,6 +436,57 @@ void TestLanguageTag::testAllTags()
         CPPUNIT_ASSERT_EQUAL( OUString("en"), en_GB_oxendict_Fallbacks[4]);
     }
 
+    // 'es-ES-u-co-trad' is a valid (and known) Extension U tag
+    {
+        OUString s_es_ES_u_co_trad( "es-ES-u-co-trad" );
+        LanguageTag es_ES_u_co_trad( s_es_ES_u_co_trad );
+        lang::Locale aLocale = es_ES_u_co_trad.getLocale();
+        CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, es_ES_u_co_trad.getBcp47() );
+        CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
+        CPPUNIT_ASSERT_EQUAL( OUString("ES"), aLocale.Country );
+        CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, aLocale.Variant );
+        CPPUNIT_ASSERT_EQUAL( LANGUAGE_SPANISH_DATED, es_ES_u_co_trad.getLanguageType() );
+        CPPUNIT_ASSERT( es_ES_u_co_trad.isValidBcp47() );
+        CPPUNIT_ASSERT( !es_ES_u_co_trad.isIsoLocale() );
+        CPPUNIT_ASSERT( !es_ES_u_co_trad.isIsoODF() );
+        CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_u_co_trad.getLanguageAndScript() );
+        CPPUNIT_ASSERT_EQUAL( OUString("u-co-trad"), es_ES_u_co_trad.getVariants() );
+        ::std::vector< OUString > es_ES_u_co_trad_Fallbacks( es_ES_u_co_trad.getFallbackStrings( true));
+        CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(4), es_ES_u_co_trad_Fallbacks.size() );
+        CPPUNIT_ASSERT_EQUAL( OUString("es-ES-u-co-trad"), es_ES_u_co_trad_Fallbacks[0]);
+        CPPUNIT_ASSERT_EQUAL( OUString("es-u-co-trad"), es_ES_u_co_trad_Fallbacks[1]);
+        CPPUNIT_ASSERT_EQUAL( OUString("es-ES"), es_ES_u_co_trad_Fallbacks[2]);
+        CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_u_co_trad_Fallbacks[3]);
+        // Map to broken MS.
+        CPPUNIT_ASSERT_EQUAL( OUString("es-ES_tradnl"), es_ES_u_co_trad.getBcp47MS() );
+    }
+
+    // 'es-ES_tradnl' (broken MS) maps to 'es-ES-u-co-trad'
+    {
+        OUString s_es_ES_u_co_trad( "es-ES-u-co-trad" );
+        OUString s_es_ES_tradnl( "es-ES_tradnl" );
+        LanguageTag es_ES_tradnl( s_es_ES_tradnl );
+        lang::Locale aLocale = es_ES_tradnl.getLocale();
+        CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, es_ES_tradnl.getBcp47() );
+        CPPUNIT_ASSERT_EQUAL( OUString("qlt"), aLocale.Language );
+        CPPUNIT_ASSERT_EQUAL( OUString("ES"), aLocale.Country );
+        CPPUNIT_ASSERT_EQUAL( s_es_ES_u_co_trad, aLocale.Variant );
+        CPPUNIT_ASSERT_EQUAL( LANGUAGE_SPANISH_DATED, es_ES_tradnl.getLanguageType() );
+        CPPUNIT_ASSERT( es_ES_tradnl.isValidBcp47() );
+        CPPUNIT_ASSERT( !es_ES_tradnl.isIsoLocale() );
+        CPPUNIT_ASSERT( !es_ES_tradnl.isIsoODF() );
+        CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_tradnl.getLanguageAndScript() );
+        CPPUNIT_ASSERT_EQUAL( OUString("u-co-trad"), es_ES_tradnl.getVariants() );
+        ::std::vector< OUString > es_ES_tradnl_Fallbacks( es_ES_tradnl.getFallbackStrings( true));
+        CPPUNIT_ASSERT_EQUAL( static_cast<size_t>(4), es_ES_tradnl_Fallbacks.size() );
+        CPPUNIT_ASSERT_EQUAL( OUString("es-ES-u-co-trad"), es_ES_tradnl_Fallbacks[0]);
+        CPPUNIT_ASSERT_EQUAL( OUString("es-u-co-trad"), es_ES_tradnl_Fallbacks[1]);
+        CPPUNIT_ASSERT_EQUAL( OUString("es-ES"), es_ES_tradnl_Fallbacks[2]);
+        CPPUNIT_ASSERT_EQUAL( OUString("es"), es_ES_tradnl_Fallbacks[3]);
+        // Map back to broken MS.
+        CPPUNIT_ASSERT_EQUAL( s_es_ES_tradnl, es_ES_tradnl.getBcp47MS() );
+    }
+
     // 'zh-yue-HK' uses redundant 'zh-yue' and should be preferred 'yue-HK'
 #if 0
     /* XXX Disabled because liblangtag in lt_tag_canonicalize() after replacing
commit 82ee2ad282c3548e7ce6e745c8aee0db8b9b2201
Author: Eike Rathke <erack at redhat.com>
Date:   Tue Sep 12 16:56:00 2017 +0200

    Handle 'es-ES_tradnl' and 'es-ES-u-co-trad' as known, tdf#83190 follow-up
    
    LanguageTagImpl::simpleExtract() needs to handle them because for known tags
    mpImplLangtag is not used to extract language, country, region; which for the
    malformed 'es-ES_tradnl' wouldn't work anyway.
    
    Change-Id: I1efcf32e90e513d5af4624c7ae15848d1fe951a7

diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 684364ff1c81..c57514cbd977 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -335,7 +335,8 @@ private:
         EXTRACTED_LV,
         EXTRACTED_C_LOCALE,
         EXTRACTED_X,
-        EXTRACTED_X_JOKER
+        EXTRACTED_X_JOKER,
+        EXTRACTED_KNOWN_BAD
     };
 
     /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
@@ -349,6 +350,7 @@ private:
                 EXTRACTED_C_LOCALE if a 'C' locale was detected,
                 EXTRACTED_X if x-... privateuse tag was detected,
                 EXTRACTED_X_JOKER if "*" joker was detected,
+                EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
                 EXTRACTED_NONE else.
      */
     static Extraction   simpleExtract( const OUString& rBcp47,
@@ -2511,6 +2513,26 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
             eRet = EXTRACTED_LV;
         }
     }
+    else if (  (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7)      // ll-CC-u-...
+            || (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8))     // lll-CC-u-...
+    {
+        if (rBcp47[nHyph3-1] == 'u')
+        {
+            // Need to recognize as known, otherwise getLanguage() and
+            // getCountry() return empty string because mpImplLangtag is not
+            // used with a known mapping.
+            /* TODO: if there were more this would get ugly and needed some
+             * table driven approach via isolang.cxx instead. */
+            if (rBcp47.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
+            {
+                rLanguage = "es";
+                rScript.clear();
+                rCountry  = "ES";
+                rVariants = "u-co-trad";    // not strictly a variant, but used to reconstruct the tag.
+                eRet = EXTRACTED_LV;
+            }
+        }
+    }
     else if (  (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10)   // ll-CC-vvvv[vvvv][-...]
             || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11))  // lll-CC-vvvv[vvvv][-...]
     {
@@ -2551,6 +2573,16 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
                 rVariants = "oed";
                 eRet = EXTRACTED_LV;
             }
+            // Other known and handled odd cases.
+            else if (rBcp47.equalsIgnoreAsciiCase( "es-ES_tradnl"))
+            {
+                // Will get overridden, but needs to be recognized as known.
+                rLanguage = "es";
+                rScript.clear();
+                rCountry  = "ES";
+                rVariants = "tradnl";   // this is nonsense, but.. ignored.
+                eRet = EXTRACTED_KNOWN_BAD;
+            }
         }
     }
     if (eRet == EXTRACTED_NONE)


More information about the Libreoffice-commits mailing list