[Libreoffice-commits] core.git: i18nlangtag/qa i18nlangtag/source include/i18nlangtag svtools/source

Eike Rathke erack at redhat.com
Mon Nov 18 12:20:52 PST 2013


 i18nlangtag/qa/cppunit/test_languagetag.cxx    |    8 ++++
 i18nlangtag/source/isolang/isolang.cxx         |   21 ++++++++----
 i18nlangtag/source/isolang/mslangid.cxx        |   20 +++++++++++
 i18nlangtag/source/languagetag/languagetag.cxx |   43 +++++++++++++++++++++++++
 include/i18nlangtag/lang.h                     |    6 ++-
 svtools/source/misc/langtab.src                |    9 ++---
 6 files changed, 94 insertions(+), 13 deletions(-)

New commits:
commit 6a826ddc4ee40a9727131cd4b13365bf6ae16319
Author: Eike Rathke <erack at redhat.com>
Date:   Mon Nov 18 21:07:43 2013 +0100

    cleaned up ISO code usage for Kurdish, fdo#63460
    
    * instead of the 'ku' macrolanguage code use proper ISO 639-3 codes and
      use 'Latn' script with 'kmr'
    * use MS-LCID 0x0492 for Central Kurdish (Iraq) [ckb-IQ]
    * added Southern Kurdish (Iraq) [sdh-IQ]
    
    Change-Id: Iaee8be98d0659a0e7bbf041e60025dd1f771066f

diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx
index 17217fd..8946969 100644
--- a/i18nlangtag/qa/cppunit/test_languagetag.cxx
+++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx
@@ -625,6 +625,14 @@ static bool checkMapping( const OUString rStr1, const OUString& rStr2 )
     if (rStr1 == "yi-Hebr-IL"  ) return rStr2 == "yi-IL";
     if (rStr1 == "ha-NG"       ) return rStr2 == "ha-Latn-NG";
     if (rStr1 == "ha-GH"       ) return rStr2 == "ha-Latn-GH";
+    if (rStr1 == "ku-Arab-IQ"  ) return rStr2 == "ckb-IQ";
+    if (rStr1 == "ku-Arab"     ) return rStr2 == "ckb";
+    if (rStr1 == "kmr-TR"      ) return rStr2 == "kmr-Latn-TR";
+    if (rStr1 == "ku-TR"       ) return rStr2 == "kmr-Latn-TR";
+    if (rStr1 == "kmr-SY"      ) return rStr2 == "kmr-Latn-SY";
+    if (rStr1 == "ku-SY"       ) return rStr2 == "kmr-Latn-SY";
+    if (rStr1 == "ku-IQ"       ) return rStr2 == "ckb-IQ";
+    if (rStr1 == "ku-IR"       ) return rStr2 == "ckb-IR";
     return rStr1 == rStr2;
 }
 
diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx
index f9f34a5..5505540 100644
--- a/i18nlangtag/source/isolang/isolang.cxx
+++ b/i18nlangtag/source/isolang/isolang.cxx
@@ -506,10 +506,17 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
     { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE", 0     },
     { LANGUAGE_OCCITAN_FRANCE,              "oc", "FR", 0     },
     { LANGUAGE_OBSOLETE_USER_OCCITAN,       "oc", "FR", 0     },
-    { LANGUAGE_USER_KURDISH_TURKEY,         "ku", "TR", 0     },
-    { LANGUAGE_USER_KURDISH_SYRIA,          "ku", "SY", 0     },
-    { LANGUAGE_USER_KURDISH_IRAQ,           "ku", "IQ", 0     },
-    { LANGUAGE_USER_KURDISH_IRAN,           "ku", "IR", 0     },
+    { LANGUAGE_USER_KURDISH_TURKEY,        "kmr", "TR", kSAME },
+    { LANGUAGE_USER_KURDISH_TURKEY,         "ku", "TR", kSAME },
+    { LANGUAGE_USER_KURDISH_SYRIA,         "kmr", "SY", kSAME },
+    { LANGUAGE_USER_KURDISH_SYRIA,          "ku", "SY", kSAME },
+    { LANGUAGE_KURDISH_ARABIC_IRAQ,        "ckb", "IQ", 0     },
+    { LANGUAGE_KURDISH_ARABIC_IRAQ,         "ku", "IQ", kSAME },
+    { LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ,  "ku", "IQ", LANGUAGE_KURDISH_ARABIC_IRAQ },
+    { LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ, "sdh", "IQ", 0     },
+    { LANGUAGE_USER_KURDISH_IRAN,          "ckb", "IR", 0     },
+    { LANGUAGE_USER_KURDISH_IRAN,           "ku", "IR", kSAME },
+    { LANGUAGE_KURDISH_ARABIC_LSO,         "ckb", ""  , 0     },
     { LANGUAGE_USER_SARDINIAN,              "sc", "IT", 0     },    // macrolanguage code
     { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT", 0     },
     { LANGUAGE_USER_SARDINIAN_GALLURESE,   "sdn", "IT", 0     },
@@ -697,8 +704,10 @@ static IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] =
     { LANGUAGE_LATIN_LSO,                           "la-Latn", ""  , kSAME },   // MS, though Latn is suppress-script
     { LANGUAGE_TAI_NUA_CHINA,                      "tdd-Tale", "CN", 0     },   // MS reserved
     { LANGUAGE_LU_CHINA,                           "khb-Talu", "CN", 0     },   // MS reserved
-    { LANGUAGE_KURDISH_ARABIC_IRAQ,                 "ku-Arab", "IQ", 0     },   // macrolanguage code, MS
-    { LANGUAGE_KURDISH_ARABIC_LSO,                  "ku-Arab", ""  , 0     },   // macrolanguage code
+    { LANGUAGE_KURDISH_ARABIC_IRAQ,                 "ku-Arab", "IQ", kSAME },   // macrolanguage code, MS
+    { LANGUAGE_KURDISH_ARABIC_LSO,                  "ku-Arab", ""  , kSAME },   // macrolanguage code, MS
+    { LANGUAGE_USER_KURDISH_TURKEY,                "kmr-Latn", "TR", 0     },
+    { LANGUAGE_USER_KURDISH_SYRIA,                 "kmr-Latn", "SY", 0     },
     { LANGUAGE_PUNJABI_PAKISTAN,                   "pnb-Arab", "PK", 0     },
     { LANGUAGE_PUNJABI_ARABIC_LSO,                 "pnb-Arab", ""  , 0     },
     { LANGUAGE_PUNJABI_PAKISTAN,                    "pa-Arab", "PK", 0     },   // MS, incorrect
diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx
index 3964c6f..d44256d 100644
--- a/i18nlangtag/source/isolang/mslangid.cxx
+++ b/i18nlangtag/source/isolang/mslangid.cxx
@@ -216,6 +216,18 @@ bool MsLangId::isRightToLeft( LanguageType nLang )
         default:
             break;
     }
+    switch (nLang)
+    {
+        case LANGUAGE_USER_KURDISH_IRAN:
+        case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ:
+        case LANGUAGE_KURDISH_ARABIC_IRAQ:
+        case LANGUAGE_KURDISH_ARABIC_LSO:
+        case LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ:
+            return true;
+
+        default:
+            break;
+    }
     return false;
 }
 
@@ -323,7 +335,10 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
         case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
         case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
         case LANGUAGE_USER_KURDISH_IRAN:
-        case LANGUAGE_USER_KURDISH_IRAQ:
+        case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ:
+        case LANGUAGE_KURDISH_ARABIC_IRAQ:
+        case LANGUAGE_KURDISH_ARABIC_LSO:
+        case LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ:
         case LANGUAGE_USER_KYRGYZ_CHINA:
             nScript = ::com::sun::star::i18n::ScriptType::COMPLEX;
             break;
@@ -507,6 +522,9 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo
         case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO:
             nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO;
             break;
+        case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ:
+            nLang = LANGUAGE_KURDISH_ARABIC_IRAQ;
+            break;
 
         // The following are not strictly obsolete but should be mapped to a
         // replacement locale when encountered.
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index b0a8f4d..60e8c12 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -2073,6 +2073,11 @@ LanguageTag & LanguageTag::makeFallback()
 }
 
 
+/* TODO: maybe this now could take advantage of the mnOverride field in
+ * isolang.cxx entries and search for kSAME instead of harcoded special
+ * fallbacks. Though iterating through those tables would be slower and even
+ * then there would be some special cases, but we wouldn't lack entries that
+ * were missed out. */
 ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
 {
     ::std::vector< OUString > aVec;
@@ -2111,6 +2116,36 @@ LanguageTag & LanguageTag::makeFallback()
                 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
                 // Already includes 'ca' language fallback.
             }
+            else if (aLanguage == "ku")
+            {
+                if (aCountry == "TR" || aCountry == "SY")
+                {
+                    aVec.push_back( "kmr-Latn-" + aCountry);
+                    aVec.push_back( "kmr-" + aCountry);
+                    aVec.push_back( "kmr-Latn");
+                    aVec.push_back( "kmr");
+                    aVec.push_back( aLanguage);
+                }
+                else if (aCountry == "IQ" || aCountry == "IR")
+                {
+                    aVec.push_back( "ckb-" + aCountry);
+                    aVec.push_back( "ckb");
+                }
+            }
+            else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
+            {
+                aVec.push_back( "ku-Latn-" + aCountry);
+                aVec.push_back( "ku-" + aCountry);
+                aVec.push_back( aLanguage);
+                aVec.push_back( "ku");
+            }
+            else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
+            {
+                aVec.push_back( "ku-Arab-" + aCountry);
+                aVec.push_back( "ku-" + aCountry);
+                aVec.push_back( aLanguage);
+                // not 'ku' only, that was used for Latin script
+            }
             else
                 aVec.push_back( aLanguage);
         }
@@ -2174,6 +2209,8 @@ LanguageTag & LanguageTag::makeFallback()
             }
             else if (aLanguage == "pi" && aScript == "Latn")
                 aVec.push_back( "pli");     // a special case for Pali dictionary, see fdo#41599
+            else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
+                aVec.push_back( "ku-" + aCountry);
         }
         if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
         {
@@ -2184,10 +2221,16 @@ LanguageTag & LanguageTag::makeFallback()
         aTmp = aLanguage + "-" + aScript;
         if (aTmp != maBcp47)
             aVec.push_back( aTmp);
+
         // 'sh' actually denoted a script, so have it here instead of appended
         // at the end as language-only.
         if (aLanguage == "sr" && aScript == "Latn")
             aVec.push_back( "sh");
+        else if (aLanguage == "ku" && aScript == "Arab")
+            aVec.push_back( "ckb");
+        // 'ku' only denoted Latin script
+        else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
+            aVec.push_back( "ku");
     }
     bool bHaveLanguageVariant = false;
     if (!aCountry.isEmpty())
diff --git a/include/i18nlangtag/lang.h b/include/i18nlangtag/lang.h
index aefe380..e08c152 100644
--- a/include/i18nlangtag/lang.h
+++ b/include/i18nlangtag/lang.h
@@ -277,7 +277,7 @@ typedef unsigned short LanguageType;
 #define LANGUAGE_KONKANI                    0x0457
 #define LANGUAGE_KOREAN                     0x0412
 #define LANGUAGE_KOREAN_JOHAB               0x0812  /* not mentioned in MS-LCID.pdf, oh joy */
-#define LANGUAGE_KURDISH_ARABIC_IRAQ        0x0492  /* TODO: obsoletes LANGUAGE_USER_KURDISH_IRAQ 0x0E26 */
+#define LANGUAGE_KURDISH_ARABIC_IRAQ        0x0492  /* obsoletes LANGUAGE_USER_KURDISH_IRAQ 0x0E26 */
 #define LANGUAGE_KURDISH_ARABIC_LSO         0x7C92
 #define LANGUAGE_LAO                        0x0454
 #define LANGUAGE_LATIN_LSO                  0x0476  /* obsoletes LANGUAGE_USER_LATIN 0x0610 */
@@ -531,8 +531,10 @@ typedef unsigned short LanguageType;
 #define LANGUAGE_USER_KOREAN_NORTH          0x8012  /* North Korean as opposed to South Korean, makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_KOREAN)) */
 #define LANGUAGE_USER_KURDISH_TURKEY        0x0626  /* sublang 0x01, Latin script */
 #define LANGUAGE_USER_KURDISH_SYRIA         0x0A26  /* sublang 0x02, Latin script */
-#define LANGUAGE_USER_KURDISH_IRAQ          0x0E26  /* sublang 0x03, Arabic script */
+#define LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ 0x0E26  /* sublang 0x03, Arabic script */
+#define LANGUAGE_USER_KURDISH_IRAQ          LANGUAGE_KURDISH_ARABIC_IRAQ
 #define LANGUAGE_USER_KURDISH_IRAN          0x1226  /* sublang 0x04, Arabic script */
+#define LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ 0x8092  /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_KURDISH_ARABIC_LSO)) */
 #define LANGUAGE_USER_SARDINIAN             0x0627
 /* was reserved for Dzongkha but turned down with #i53497#: 0x0628 */  /* obsoleted by LANGUAGE_DZONGKHA */
 #define LANGUAGE_USER_DZONGKHA_MAP_LONLY    0xF851  /* to map "dz" only, because of the MS error, and preserve CTL information, sub 0x3e */
diff --git a/svtools/source/misc/langtab.src b/svtools/source/misc/langtab.src
index 7494617..0f5f584 100644
--- a/svtools/source/misc/langtab.src
+++ b/svtools/source/misc/langtab.src
@@ -212,10 +212,11 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
         < "Bengali (Bangladesh)" ; LANGUAGE_BENGALI_BANGLADESH ; > ;
         < "Occitan" ; LANGUAGE_USER_OCCITAN ; > ;
         < "Khmer" ; LANGUAGE_KHMER ; > ;
-        < "Kurdish (Turkey)" ; LANGUAGE_USER_KURDISH_TURKEY ; > ;
-        < "Kurdish (Syria)" ; LANGUAGE_USER_KURDISH_SYRIA ; > ;
-        < "Kurdish (Iraq)" ; LANGUAGE_USER_KURDISH_IRAQ ; > ;
-        < "Kurdish (Iran)" ; LANGUAGE_USER_KURDISH_IRAN ; > ;
+        < "Kurdish, Northern (Turkey)" ; LANGUAGE_USER_KURDISH_TURKEY ; > ;
+        < "Kurdish, Northern (Syria)" ; LANGUAGE_USER_KURDISH_SYRIA ; > ;
+        < "Kurdish, Central (Iraq)" ; LANGUAGE_USER_KURDISH_IRAQ ; > ;
+        < "Kurdish, Central (Iran)" ; LANGUAGE_USER_KURDISH_IRAN ; > ;
+        < "Kurdish, Southern (Iraq)" ; LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ ; > ;
         < "Sardinian" ; LANGUAGE_USER_SARDINIAN ; > ;
         < "Dzongkha" ; LANGUAGE_DZONGKHA ; > ;
         < "Swahili (Tanzania)" ; LANGUAGE_USER_SWAHILI_TANZANIA ; > ;


More information about the Libreoffice-commits mailing list