[Libreoffice-commits] core.git: 3 commits - filter/source i18nlangtag/qa i18nlangtag/source include/i18nlangtag svtools/source vcl/source
Eike Rathke
erack at redhat.com
Wed Oct 23 16:55:25 PDT 2013
filter/source/msfilter/countryid.cxx | 4
i18nlangtag/qa/cppunit/test_languagetag.cxx | 32 +++-
i18nlangtag/source/isolang/isolang.cxx | 193 ++++++++++++++++++-------
i18nlangtag/source/isolang/langid.pl | 12 -
i18nlangtag/source/isolang/mslangid.cxx | 47 ++++--
i18nlangtag/source/languagetag/languagetag.cxx | 22 +-
include/i18nlangtag/lang.h | 128 +++++++++++++---
svtools/source/misc/langtab.src | 10 -
svtools/source/misc/sampletext.cxx | 14 +
vcl/source/gdi/sallayout.cxx | 17 +-
10 files changed, 363 insertions(+), 116 deletions(-)
New commits:
commit 6b678c135da07f34da1bf7b19562366361e95085
Author: Eike Rathke <erack at redhat.com>
Date: Thu Oct 24 01:47:25 2013 +0200
added/checked/aligned MS-LangIDs and mappings from new MS-LCID.pdf
... found at http://msdn.microsoft.com/library/cc233965.aspx (the 5th or
6th place I'm aware of where MS defines LCIDs, all different ...)
* a bunch of new definitions up to Windows 8.1
* lots of cross-checks done with SIL, Ethnologue, ...
Change-Id: Ifae8e676558c6712fe752856acca600d05d0a63f
diff --git a/filter/source/msfilter/countryid.cxx b/filter/source/msfilter/countryid.cxx
index eac02e5..b36f51b 100644
--- a/filter/source/msfilter/countryid.cxx
+++ b/filter/source/msfilter/countryid.cxx
@@ -197,7 +197,7 @@ static const CountryEntry pTable[] =
{ COUNTRY_BELARUS, LANGUAGE_BELARUSIAN, false },
{ COUNTRY_MONACO, LANGUAGE_FRENCH_MONACO, true },
{ COUNTRY_UKRAINE, LANGUAGE_UKRAINIAN, false },
- { COUNTRY_SERBIA, LANGUAGE_SERBIAN_LATIN, false },
+ { COUNTRY_SERBIA, LANGUAGE_SERBIAN_LATIN_SAM, false },
{ COUNTRY_CROATIA, LANGUAGE_CROATIAN, true }, // sub type of LANGUAGE_SERBIAN
{ COUNTRY_SLOVENIA, LANGUAGE_SLOVENIAN, false },
{ COUNTRY_MACEDONIA, LANGUAGE_MACEDONIAN, false },
@@ -235,7 +235,7 @@ static const CountryEntry pTable[] =
{ COUNTRY_ISRAEL, LANGUAGE_HEBREW, false },
{ COUNTRY_BAHRAIN, LANGUAGE_ARABIC_BAHRAIN, true },
{ COUNTRY_QATAR, LANGUAGE_ARABIC_QATAR, true },
- { COUNTRY_MONGOLIA, LANGUAGE_MONGOLIAN, false },
+ { COUNTRY_MONGOLIA, LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, false },
{ COUNTRY_NEPAL, LANGUAGE_NEPALI, false },
{ COUNTRY_IRAN, LANGUAGE_FARSI, false },
{ COUNTRY_TAJIKISTAN, LANGUAGE_TAJIK, false },
diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx
index 6f7cc2d..0d083db 100644
--- a/i18nlangtag/qa/cppunit/test_languagetag.cxx
+++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx
@@ -154,7 +154,7 @@ void TestLanguageTag::testAllTags()
CPPUNIT_ASSERT( aLocale.Language == "qlt" );
CPPUNIT_ASSERT( aLocale.Country == "CS" );
CPPUNIT_ASSERT( aLocale.Variant == s_sr_Latn_CS );
- CPPUNIT_ASSERT( sr_Latn_CS.getLanguageType() == LANGUAGE_SERBIAN_LATIN );
+ CPPUNIT_ASSERT( sr_Latn_CS.getLanguageType() == LANGUAGE_SERBIAN_LATIN_SAM );
CPPUNIT_ASSERT( sr_Latn_CS.isValidBcp47() == true );
CPPUNIT_ASSERT( sr_Latn_CS.isIsoLocale() == false );
CPPUNIT_ASSERT( sr_Latn_CS.isIsoODF() == true );
@@ -352,14 +352,15 @@ void TestLanguageTag::testAllTags()
}
// "no", "nb" and "nn" share the same primary language ID, which even is
- // assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly.
+ // assigned to "no-NO" for legacy so none gets it assigned, all on-the-fly
+ // except if there is a defined MS-LCID for LanguageScriptOnly (LSO).
{
LanguageTag no( "no", true );
CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( no.getLanguageType()) );
LanguageTag nb( "nb", true );
- CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nb.getLanguageType()) );
+ CPPUNIT_ASSERT( nb.getLanguageType() == LANGUAGE_NORWEGIAN_BOKMAL_LSO );
LanguageTag nn( "nn", true );
- CPPUNIT_ASSERT( LanguageTag::isOnTheFlyID( nn.getLanguageType()) );
+ CPPUNIT_ASSERT( nn.getLanguageType() == LANGUAGE_NORWEGIAN_NYNORSK_LSO );
LanguageTag no_NO( "no-NO", true );
CPPUNIT_ASSERT( no_NO.getLanguageType() == LANGUAGE_NORWEGIAN );
}
@@ -410,6 +411,29 @@ void TestLanguageTag::testAllTags()
// 'en-oed' is not a valid fallback!
}
+#if USE_LIBLANGTAG
+ // 'zh-yue-HK' uses extlang and should be preferred 'yue-HK'
+ {
+ OUString s_zh_yue_HK( "zh-yue-HK" );
+ LanguageTag zh_yue_HK( s_zh_yue_HK );
+ lang::Locale aLocale = zh_yue_HK.getLocale();
+ CPPUNIT_ASSERT( zh_yue_HK.getBcp47() == "yue-HK" );
+ CPPUNIT_ASSERT( aLocale.Language == "yue" );
+ CPPUNIT_ASSERT( aLocale.Country == "HK" );
+ CPPUNIT_ASSERT( aLocale.Variant == "" );
+ CPPUNIT_ASSERT( zh_yue_HK.getLanguageType() == LANGUAGE_YUE_CHINESE_HONGKONG );
+ CPPUNIT_ASSERT( zh_yue_HK.isValidBcp47() == true );
+ CPPUNIT_ASSERT( zh_yue_HK.isIsoLocale() == true );
+ CPPUNIT_ASSERT( zh_yue_HK.isIsoODF() == true );
+ CPPUNIT_ASSERT( zh_yue_HK.getLanguageAndScript() == "yue" );
+ CPPUNIT_ASSERT( zh_yue_HK.getVariants() == "" );
+ ::std::vector< OUString > zh_yue_HK_Fallbacks( zh_yue_HK.getFallbackStrings( true));
+ CPPUNIT_ASSERT( zh_yue_HK_Fallbacks.size() == 2);
+ CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[0] == "yue-HK");
+ CPPUNIT_ASSERT( zh_yue_HK_Fallbacks[1] == "yue");
+ }
+#endif
+
// 'qtz' is a local use known pseudolocale for key ID resource
{
OUString s_qtz( "qtz" );
diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx
index cf4f2e1..f904b78b 100644
--- a/i18nlangtag/source/isolang/isolang.cxx
+++ b/i18nlangtag/source/isolang/isolang.cxx
@@ -161,10 +161,11 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_CHINESE_SIMPLIFIED, "zh", "CN", false },
{ LANGUAGE_CHINESE_SIMPLIFIED_LEGACY, "zh", "CN", false },
{ LANGUAGE_CHINESE_TRADITIONAL, "zh", "TW", false },
- { LANGUAGE_CHINESE_TRADITIONAL_LEGACY, "zh", "TW", false },
{ LANGUAGE_CHINESE_HONGKONG, "zh", "HK", false },
{ LANGUAGE_CHINESE_SINGAPORE, "zh", "SG", false },
{ LANGUAGE_CHINESE_MACAU, "zh", "MO", false },
+ { LANGUAGE_CHINESE_LSO, "zh", "" , false },
+ { LANGUAGE_YUE_CHINESE_HONGKONG, "yue", "HK", false },
{ LANGUAGE_ENGLISH_HONG_KONG_SAR, "en", "HK", false },
{ LANGUAGE_JAPANESE, "ja", "JP", false },
{ LANGUAGE_KOREAN, "ko", "KR", false },
@@ -236,7 +237,9 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_INDONESIAN, "in", "ID", true }, // old: new is "id"
{ LANGUAGE_NORWEGIAN, "no", "NO", false },
{ LANGUAGE_NORWEGIAN_BOKMAL, "nb", "NO", false },
+ { LANGUAGE_NORWEGIAN_BOKMAL_LSO, "nb", "" , false },
{ LANGUAGE_NORWEGIAN_NYNORSK, "nn", "NO", false },
+ { LANGUAGE_NORWEGIAN_NYNORSK_LSO, "nn", "" , false },
{ LANGUAGE_POLISH, "pl", "PL", false },
{ LANGUAGE_RHAETO_ROMAN, "rm", "CH", false },
{ LANGUAGE_ROMANIAN, "ro", "RO", false },
@@ -276,21 +279,28 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_LITHUANIAN_CLASSIC, "lt", "LT", false },
{ LANGUAGE_CROATIAN, "hr", "HR", false }, // Croatian in Croatia
{ LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA, "hr", "BA", false },
- { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA", false },
- { LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, "sr", "RS", false }, // Serbian Cyrillic in Serbia
- { LANGUAGE_SERBIAN_CYRILLIC, "sr", "CS", false }, // Serbian Cyrillic in Serbia and Montenegro
- { LANGUAGE_SERBIAN_CYRILLIC, "sr", "YU", true }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, sr_CS not supported by ICU 2.6 (3.4 does)
- { LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME", false },
+ { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA", false },
+ { LANGUAGE_BOSNIAN_LSO, "bs", "" , false }, // so what is 'bs' vs 'bs-Latn'?
+ { LANGUAGE_SERBIAN_CYRILLIC_SERBIA, "sr", "RS", false }, // Serbian Cyrillic in Serbia
+ { LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA,"sr", "RS", false },
+ { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr", "CS", false }, // Serbian Cyrillic in Serbia and Montenegro
+ { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr", "YU", true }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, sr_CS not supported by ICU 2.6 (3.4 does)
+ { LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME", false },
+ { LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO,"sr", "ME", false },
{ LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr", "BA", false },
- { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS", true }, // legacy kludge, is sr-Latn-RS now
- { LANGUAGE_SERBIAN_LATIN, "sh", "CS", true }, // legacy kludge, is sr-Latn-CS now
- { LANGUAGE_SERBIAN_LATIN, "sh", "YU", true }, // legacy kludge, is sr-Latn-YU now
- { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sh", "ME", true }, // legacy kludge, is sr-Latn-ME now
+ { LANGUAGE_SERBIAN_CYRILLIC_LSO, "sr", "" , false },
+ { LANGUAGE_SERBIAN_LATIN_SERBIA, "sh", "RS", true }, // legacy kludge, is sr-Latn-RS now
+ { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS", true }, // legacy kludge, is sr-Latn-RS now
+ { LANGUAGE_SERBIAN_LATIN_SAM, "sh", "CS", true }, // legacy kludge, is sr-Latn-CS now
+ { LANGUAGE_SERBIAN_LATIN_SAM, "sh", "YU", true }, // legacy kludge, is sr-Latn-YU now
+ { LANGUAGE_SERBIAN_LATIN_MONTENEGRO, "sh", "ME", true }, // legacy kludge, is sr-Latn-ME now
+ { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO,"sh", "ME", true }, // legacy kludge, is sr-Latn-ME now
{ LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sh", "BA", true }, // legacy kludge, is sr-Latn-BA now
- { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sh", "" , true }, // legacy kludge, is sr-Latn now
+ { LANGUAGE_SERBIAN_LATIN_LSO, "sh", "" , true }, // legacy kludge, is sr-Latn now
{ LANGUAGE_ARMENIAN, "hy", "AM", false },
{ LANGUAGE_AZERI_LATIN, "az", "AZ", false }, // macrolanguage code
{ LANGUAGE_UZBEK_LATIN, "uz", "UZ", false }, // macrolanguage code
+ { LANGUAGE_UZBEK_LATIN_LSO, "uz", "" , false }, // macrolanguage code
{ LANGUAGE_BENGALI_BANGLADESH, "bn", "BD", false },
{ LANGUAGE_BENGALI, "bn", "IN", false },
{ LANGUAGE_BURMESE, "my", "MM", false },
@@ -313,12 +323,15 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_ORIYA, "or", "IN", false },
{ LANGUAGE_PUNJABI, "pa", "IN", false },
{ LANGUAGE_SANSKRIT, "sa", "IN", false },
- { LANGUAGE_SINDHI, "sd", "IN", false },
+ { LANGUAGE_SINDHI, "sd", "IN", false }, // TODO: there's Deva(nagari) and Arab(ic) script, which do we use in 'sd' translation? MS maps this to 'sd-Deva-IN'
{ LANGUAGE_TAMIL, "ta", "IN", false },
+ { LANGUAGE_TAMIL_SRI_LANKA, "ta", "LK", false },
{ LANGUAGE_TELUGU, "te", "IN", false },
- { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK", false }, // preferring "lah" over "pa" for Western Punjabi, see http://www.ethnologue.com/show_language.asp?code=PNB
- { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK", false },
- { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK", false },
+ { LANGUAGE_PUNJABI_PAKISTAN, "pnb", "PK", false },
+ { LANGUAGE_PUNJABI_ARABIC_LSO, "pnb", "" , false },
+ { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK", true }, // macrolanguage code, earlier preferred 'lah' over 'pa' for Western Panjabi, now there is 'pnb'
+ { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK", true }, // MS maps this to 'pa-Arab-PK', but 'pa'='pan' Eastern Panjabi is not used in PK, only in IN
+ { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK", false }, // TODO: there's Deva(nagari) and Arab(ic) script, which do we use in 'sd' translation? MS maps this to 'sd-Arab-PK'
{ LANGUAGE_BELARUSIAN, "be", "BY", false },
{ LANGUAGE_CATALAN, "ca", "ES", false }, // Spain (default)
{ LANGUAGE_CATALAN, "ca", "AD", false }, // Andorra
@@ -334,23 +347,27 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_FRENCH_REUNION, "fr", "RE", false },
{ LANGUAGE_FRENCH, "fr", "" , false }, // needed as a catcher before other "fr" entries!
{ LANGUAGE_FRENCH_NORTH_AFRICA, "fr", "" , false },
- { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" , false }, // unknown ISO country code
+ { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" , false }, // no ISO country code; MS "Neither defined nor reserved"
{ LANGUAGE_FRISIAN_NETHERLANDS, "fy", "NL", false },
{ LANGUAGE_GAELIC_IRELAND, "ga", "IE", false },
{ LANGUAGE_GAELIC_SCOTLAND, "gd", "GB", false },
+ { LANGUAGE_GAELIC_SCOTLAND_LEGACY, "gd", "GB", false },
{ LANGUAGE_GALICIAN, "gl", "ES", false },
{ LANGUAGE_GEORGIAN, "ka", "GE", false },
{ LANGUAGE_KHMER, "km", "KH", false },
{ LANGUAGE_KIRGHIZ, "ky", "KG", false },
{ LANGUAGE_LAO, "lo", "LA", false },
{ LANGUAGE_MALTESE, "mt", "MT", false },
- { LANGUAGE_MONGOLIAN, "mn", "MN", true }, // Cyrillic script
+ { LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, "mn", "MN", false }, // macrolanguage code; should be khk-MN; Cyrillic script
+ { LANGUAGE_MONGOLIAN_CYRILLIC_LSO, "mn", "" , false }, // macrolanguage code; should be khk; Cyrillic script
{ LANGUAGE_RUSSIAN_MOLDOVA, "mo", "MD", false },
{ LANGUAGE_SWAHILI, "sw", "KE", false },
{ LANGUAGE_USER_SWAHILI_TANZANIA, "sw", "TZ", false },
{ LANGUAGE_TAJIK, "tg", "TJ", false },
+ { LANGUAGE_TAJIK_LSO, "tg", "" , false },
{ LANGUAGE_TIBETAN, "bo", "CN", false }, // CN politically correct?
{ LANGUAGE_USER_TIBETAN_INDIA, "bo", "IN", false },
+ { LANGUAGE_USER_TIBETAN_BHUTAN, "bo", "BT", false }, // MS reserved, but with the ID error instead
{ LANGUAGE_DZONGKHA, "dz", "BT", false },
{ LANGUAGE_USER_DZONGKHA_MAP_LONLY, "dz", "" , false }, // because of the MS error, see lang.h
{ LANGUAGE_TURKMEN, "tk", "TM", false },
@@ -366,9 +383,12 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_VENDA, "ven", "ZA", true }, // 639-2 may have been used temporarily since 2004-07-23
{ LANGUAGE_XHOSA, "xh", "ZA", false },
{ LANGUAGE_ZULU, "zu", "ZA", false },
- { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC", false },
- { LANGUAGE_QUECHUA_PERU, "qu", "PE", false },
- { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO", false }, // macro: quh-BO, qul-BO
+// { LANGUAGE_QUECHUA_COLOMBIA, "quc", "CO", false }, // MS reserved, and looks wrong, quc would be in Guatemala, not Colombia
+ { LANGUAGE_QUECHUA_ECUADOR, "quz", "EC", false }, // MS
+ { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC", true }, // macrolanguage code
+ { LANGUAGE_QUECHUA_PERU, "quz", "PE", false }, // MS
+ { LANGUAGE_QUECHUA_PERU, "qu", "PE", true }, // macrolanguage code
+ { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO", false }, // macrolanguage code, TODO instead: quh-BO or qul-BO; MS says quz-BO which is wrong
{ LANGUAGE_PASHTO, "ps", "AF", false },
{ LANGUAGE_OROMO, "om", "ET", false },
{ LANGUAGE_DHIVEHI, "dv", "MV", false },
@@ -379,7 +399,9 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_GUARANI_PARAGUAY, "gug", "PY", false },
{ LANGUAGE_HAWAIIAN_UNITED_STATES, "haw", "US", false },
{ LANGUAGE_EDO, "bin", "NG", false },
- { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG", false },
+ { LANGUAGE_FULFULDE_NIGERIA, "fuv", "NG", false },
+ { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG", true }, // macrolanguage code
+ { LANGUAGE_FULFULDE_SENEGAL, "ff", "SN", false }, // macrolanguage code
{ LANGUAGE_HAUSA_NIGERIA, "ha", "NG", false },
{ LANGUAGE_USER_HAUSA_GHANA, "ha", "GH", false },
{ LANGUAGE_IGBO_NIGERIA, "ig", "NG", false },
@@ -397,17 +419,21 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_SYRIAC, "syr", "TR", false }, // "TR" according to http://www.ethnologue.com/show_language.asp?code=SYC
{ LANGUAGE_SINHALESE_SRI_LANKA, "si", "LK", false },
{ LANGUAGE_CHEROKEE_UNITED_STATES, "chr", "US", false },
- { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA", false },
-// { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu", "CA", false }, // script codes not supported yet
+ { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA", true }, // macrolanguage code
+ { LANGUAGE_INUKTITUT_LATIN_LSO, "iu", "" , true }, // macrolanguage code
{ LANGUAGE_SAMI_NORTHERN_NORWAY, "se", "NO", false },
{ LANGUAGE_SAMI_INARI, "smn", "FI", false },
+ { LANGUAGE_SAMI_INARI_LSO, "smn", "" , false },
{ LANGUAGE_SAMI_LULE_NORWAY, "smj", "NO", false },
{ LANGUAGE_SAMI_LULE_SWEDEN, "smj", "SE", false },
+ { LANGUAGE_SAMI_LULE_LSO, "smj", "" , false },
{ LANGUAGE_SAMI_NORTHERN_FINLAND, "se", "FI", false },
{ LANGUAGE_SAMI_NORTHERN_SWEDEN, "se", "SE", false },
{ LANGUAGE_SAMI_SKOLT, "sms", "FI", false },
+ { LANGUAGE_SAMI_SKOLT_LSO, "sms", "" , false },
{ LANGUAGE_SAMI_SOUTHERN_NORWAY, "sma", "NO", false },
{ LANGUAGE_SAMI_SOUTHERN_SWEDEN, "sma", "SE", false },
+ { LANGUAGE_SAMI_SOUTHERN_LSO, "sma", "" , false },
{ LANGUAGE_USER_SAMI_KILDIN_RUSSIA, "sjd", "RU", false },
{ LANGUAGE_MAPUDUNGUN_CHILE, "arn", "CL", false },
{ LANGUAGE_CORSICAN_FRANCE, "co", "FR", false },
@@ -416,18 +442,27 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_MOHAWK_CANADA, "moh", "CA", false },
{ LANGUAGE_BASHKIR_RUSSIA, "ba", "RU", false },
{ LANGUAGE_KICHE_GUATEMALA, "qut", "GT", false },
- { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF", false },
+ { LANGUAGE_DARI_AFGHANISTAN, "prs", "AF", false },
+ { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF", true }, // was an error
{ LANGUAGE_WOLOF_SENEGAL, "wo", "SN", false },
{ LANGUAGE_FILIPINO, "fil", "PH", false },
{ LANGUAGE_USER_TAGALOG, "tl", "PH", false },
{ LANGUAGE_ENGLISH_PHILIPPINES, "en", "PH", false },
-// { LANGUAGE_IBIBIO_NIGERIA, "nic", "NG", false }, // ISO "nic" is only a collective language code
+ { LANGUAGE_IBIBIO_NIGERIA, "ibb", "NG", false },
{ LANGUAGE_YI, "ii", "CN", false },
- { LANGUAGE_TAMAZIGHT_LATIN, "kab", "DZ", false }, // In practice Kabyle is the language used for this
+ { LANGUAGE_ENGLISH_ARAB_EMIRATES, "en", "AE", false }, // MS reserved
+ { LANGUAGE_ENGLISH_BAHRAIN, "en", "BH", false }, // MS reserved
+ { LANGUAGE_ENGLISH_EGYPT, "en", "EG", false }, // MS reserved
+ { LANGUAGE_ENGLISH_JORDAN, "en", "JO", false }, // MS reserved
+ { LANGUAGE_ENGLISH_KUWAIT, "en", "KW", false }, // MS reserved
+ { LANGUAGE_ENGLISH_TURKEY, "en", "TR", false }, // MS reserved
+ { LANGUAGE_ENGLISH_YEMEN, "en", "YE", false }, // MS reserved
+ { LANGUAGE_TAMAZIGHT_LATIN_ALGERIA, "kab", "DZ", false }, // In practice Kabyle is the language used for this
{ LANGUAGE_OBSOLETE_USER_KABYLE, "kab", "DZ", false },
- { LANGUAGE_TAMAZIGHT_LATIN, "ber", "DZ", false }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there.
- { LANGUAGE_TAMAZIGHT_TIFINAGH, "ber", "MA", false }, // Morocco is officially using Tifinagh for its Berber languages so store it to distinguish explicitly from LANGUAGE_TAMAZIGHT_LATIN, even though as a collective language its not of much use
-// { LANGUAGE_TAMAZIGHT_ARABIC, "ber", "" , false }, // ISO "ber" only collective!
+ { LANGUAGE_TAMAZIGHT_LATIN_ALGERIA, "ber", "DZ", true }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there.
+ { LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO, "tmz", "MA", true },
+ { LANGUAGE_TAMAZIGHT_MOROCCO, "tmz", "MA", false }, // MS reserved
+ { LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO, "ber", "MA", true }, // Morocco is officially using Tifinagh for its Berber languages, old kludge to distinguish from LANGUAGE_TAMAZIGHT_LATIN_ALGERIA
{ LANGUAGE_LATIN, "la", "VA", false },
{ LANGUAGE_OBSOLETE_USER_LATIN, "la", "VA", false },
{ LANGUAGE_USER_ESPERANTO, "eo", "" , false },
@@ -439,6 +474,7 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_UPPER_SORBIAN_GERMANY, "hsb", "DE", false }, // MS maps this to 'wen-DE', which is nonsense. 'wen' is a collective language code, 'WEN' is a SIL code, see http://www.ethnologue.com/14/show_iso639.asp?code=wen and http://www.ethnologue.com/14/show_language.asp?code=WEN
{ LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN,"hsb", "DE", false },
{ LANGUAGE_LOWER_SORBIAN_GERMANY, "dsb", "DE", false }, // MS maps this to 'wee-DE', which is nonsense. 'WEE' is a SIL code, see http://www.ethnologue.com/14/show_language.asp?code=WEE
+ { LANGUAGE_LOWER_SORBIAN_LSO, "dsb", "" , false },
{ LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE", false },
{ LANGUAGE_OCCITAN_FRANCE, "oc", "FR", false },
{ LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR", false },
@@ -457,7 +493,8 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_OBSOLETE_USER_KALAALLISUT, "kl", "GL", false },
{ LANGUAGE_USER_SWAZI, "ss", "ZA", false },
{ LANGUAGE_USER_NDEBELE_SOUTH, "nr", "ZA", false },
- { LANGUAGE_USER_TSWANA_BOTSWANA, "tn", "BW", false },
+ { LANGUAGE_TSWANA_BOTSWANA, "tn", "BW", false },
+ { LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA, "tn", "BW", false },
{ LANGUAGE_USER_MOORE, "mos", "BF", false },
{ LANGUAGE_USER_BAMBARA, "bm", "ML", false },
{ LANGUAGE_USER_AKAN, "ak", "GH", false },
@@ -499,8 +536,9 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
{ LANGUAGE_USER_MAORE, "swb", "YT", false },
{ LANGUAGE_USER_BUSHI, "buc", "YT", false },
{ LANGUAGE_USER_TAHITIAN, "ty", "PF", false },
- { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG", false },
- { LANGUAGE_USER_MALAGASY_PLATEAU, "mg", "MG", false },
+ { LANGUAGE_MALAGASY_PLATEAU, "plt", "MG", false },
+ { LANGUAGE_MALAGASY_PLATEAU, "mg", "MG", true },
+ { LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU, "plt", "MG", false },
{ LANGUAGE_USER_BAFIA, "ksf", "CM", false },
{ LANGUAGE_USER_GIKUYU, "ki", "KE", false },
{ LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA", false },
@@ -574,21 +612,75 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
static IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] =
{
- // MS-LangID ISO639-ISO15924, ISO3166
- { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr-Latn", "RS" },
- { LANGUAGE_SERBIAN_LATIN, "sr-Latn", "CS" }, // Serbian Latin in Serbia and Montenegro; note that not all applications may know about the 'CS' reusage mess, see https://en.wikipedia.org/wiki/ISO_3166-2:CS
- { LANGUAGE_SERBIAN_LATIN, "sr-Latn", "YU" }, // legacy Serbian Latin in Yugoslavia
- { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr-Latn", "ME" },
- { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA,"sr-Latn", "BA" },
- { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr-Latn", "" },
+ // MS-LangID ISO639-ISO15924, ISO3166
+ { LANGUAGE_SERBIAN_LATIN_SERBIA, "sr-Latn", "RS" },
+ { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA, "sr-Latn", "RS" },
+ { LANGUAGE_SERBIAN_LATIN_MONTENEGRO, "sr-Latn", "ME" },
+ { LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO,"sr-Latn", "ME" },
+ { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sr-Latn", "BA" },
+ { LANGUAGE_SERBIAN_LATIN_SAM, "sr-Latn", "CS" }, // Serbian Latin in Serbia and Montenegro; note that not all applications may know about the 'CS' reusage mess, see https://en.wikipedia.org/wiki/ISO_3166-2:CS
+ { LANGUAGE_SERBIAN_LATIN_SAM, "sr-Latn", "YU" }, // legacy Serbian Latin in Yugoslavia
+ { LANGUAGE_SERBIAN_LATIN_LSO, "sr-Latn", "" },
+ { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr-Latn", "" }, // MS lists this as 'sr' only, what a mess
+ { LANGUAGE_SERBIAN_CYRILLIC_SERBIA, "sr-Cyrl", "RS" }, // MS
+ { LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO, "sr-Cyrl", "ME" }, // MS
+ { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr-Cyrl", "BA" }, // MS
+ { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr-Cyrl", "CS" }, // MS
+ { LANGUAGE_SERBIAN_CYRILLIC_LSO, "sr-Cyrl", "" }, // MS
{ LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs-Cyrl", "BA" },
- { LANGUAGE_AZERI_CYRILLIC, "az-Cyrl", "AZ" }, // macrolanguage code
- { LANGUAGE_UZBEK_CYRILLIC, "uz-Cyrl", "UZ" }, // macrolanguage code
- { LANGUAGE_MONGOLIAN, "mn-Cyrl", "MN" }, // macrolanguage code
- { LANGUAGE_MONGOLIAN_MONGOLIAN, "mn-Mong", "MN" }, // macrolanguage code
- { LANGUAGE_USER_PALI_LATIN, "pi-Latn", "" },
- { LANGUAGE_USER_KARAKALPAK_LATIN, "kaa-Latn", "UZ" },
- { LANGUAGE_DONTKNOW, "", "" } // marks end of table
+ { LANGUAGE_BOSNIAN_CYRILLIC_LSO, "bs-Cyrl", "" },
+ { LANGUAGE_AZERI_CYRILLIC, "az-Cyrl", "AZ" }, // macrolanguage code
+ { LANGUAGE_AZERI_CYRILLIC_LSO, "az-Cyrl", "" }, // macrolanguage code
+ { LANGUAGE_UZBEK_CYRILLIC, "uz-Cyrl", "UZ" }, // macrolanguage code
+ { LANGUAGE_UZBEK_CYRILLIC_LSO, "uz-Cyrl", "" }, // macrolanguage code
+ { LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA, "mn-Cyrl", "MN" }, // macrolanguage code; should be khk-MN or khk-Cyrl-MN
+ { LANGUAGE_MONGOLIAN_CYRILLIC_LSO, "mn-Cyrl", "" }, // macrolanguage code; MS, should be khk or khk-Cyrl
+ { LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA, "mn-Mong", "MN" }, // macrolanguage code; MS, should be khk-Mong-MN
+ { LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA, "mn-Mong", "CN" }, // macrolanguage code; MS, should actually be mvf-CN
+ { LANGUAGE_MONGOLIAN_MONGOLIAN_LSO, "mn-Mong", "" }, // macrolanguage code
+ { LANGUAGE_USER_PALI_LATIN, "pi-Latn", "" },
+ { LANGUAGE_USER_KARAKALPAK_LATIN, "kaa-Latn", "UZ" },
+ { LANGUAGE_TAJIK, "tg-Cyrl", "TJ" }, // MS
+ { LANGUAGE_TAJIK_LSO, "tg-Cyrl", "" }, // MS
+ { LANGUAGE_AZERI_LATIN, "az-Latn", "AZ" }, // macrolanguage code; MS
+ { LANGUAGE_AZERI_LATIN_LSO, "az-Latn", "" }, // macrolanguage code; MS
+ { LANGUAGE_USER_YIDDISH_US, "yi-Hebr", "US" }, // macrolanguage code; MS
+ { LANGUAGE_YIDDISH, "yi-Hebr", "IL" }, // macrolanguage code; MS
+ { LANGUAGE_UZBEK_LATIN, "uz-Latn", "UZ" }, // macrolanguage code
+ { LANGUAGE_UZBEK_LATIN_LSO, "uz-Latn", "" },
+// { LANGUAGE_SINDHI, "sd-Deva", "IN" }, // MS, TODO: see comment above in aImplIsoLangEntries
+// { LANGUAGE_SINDHI_PAKISTAN, "sd-Arab", "PK" }, // MS, TODO: see comment above in aImplIsoLangEntries
+ { LANGUAGE_SINDHI_ARABIC_LSO, "sd-Arab", "" },
+ { LANGUAGE_CHEROKEE_UNITED_STATES, "chr-Cher", "US" }, // MS
+ { LANGUAGE_CHEROKEE_CHEROKEE_LSO, "chr-Cher", "" },
+ { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu-Cans", "CA" }, // macrolanguage code, MS
+ { LANGUAGE_INUKTITUT_SYLLABICS_LSO, "iu-Cans", "" }, // macrolanguage code, MS
+ { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu-Latn", "CA" }, // macrolanguage code, MS
+ { LANGUAGE_INUKTITUT_LATIN_LSO, "iu-Latn", "" }, // macrolanguage code, MS
+ { LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO, "tzm-Tfng", "MA" },
+ { LANGUAGE_TAMAZIGHT_TIFINAGH_LSO, "tzm-Tfng", "" },
+ { LANGUAGE_KASHMIRI_INDIA, "ks-Deva", "IN" }, // MS
+ { LANGUAGE_KASHMIRI, "ks-Arab", "" }, // MS, Kashmiri in "Jammu and Kashmir" ... no ISO3166 code for that
+ { LANGUAGE_HAUSA_NIGERIA, "ha-Latn", "NG" }, // MS
+ { LANGUAGE_USER_HAUSA_GHANA, "ha-Latn", "GH" }, // MS
+ { LANGUAGE_HAUSA_LATIN_LSO, "ha-Latn", "" },
+ { LANGUAGE_LATIN, "la-Latn", "" }, // MS
+ { LANGUAGE_TAI_NUA_CHINA, "tdd-Tale", "CN" }, // MS reserved
+ { LANGUAGE_LU_CHINA, "khb-Talu", "CN" }, // MS reserved
+ { LANGUAGE_KURDISH_ARABIC_IRAQ, "ku-Arab", "IQ" }, // macrolanguage code, MS
+ { LANGUAGE_KURDISH_ARABIC_LSO, "ku-Arab", "" }, // macrolanguage code
+ { LANGUAGE_PUNJABI_PAKISTAN, "pnb-Arab", "PK" },
+ { LANGUAGE_PUNJABI_ARABIC_LSO, "pnb-Arab", "" },
+ { LANGUAGE_PUNJABI_PAKISTAN, "pa-Arab", "PK" }, // MS, incorrect
+ { LANGUAGE_PUNJABI_ARABIC_LSO, "pa-Arab", "" }, // MS, incorrect
+ { LANGUAGE_TAMAZIGHT_LATIN_ALGERIA, "tzm-Latn", "DZ" }, // MS
+ { LANGUAGE_TAMAZIGHT_LATIN_LSO, "tzm-Latn", "" }, // MS
+ { LANGUAGE_FULFULDE_SENEGAL, "ff-Latn", "SN" }, // macrolanguage code, MS
+ { LANGUAGE_FULFULDE_LATIN_LSO, "ff-Latn", "" }, // macrolanguage code
+ { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs-Latn", "BA" }, // MS, though Latn is suppress-script
+ { LANGUAGE_BOSNIAN_LATIN_LSO, "bs-Latn", "" }, // MS, though Latn is suppress-script
+ { LANGUAGE_CHINESE_TRADITIONAL_LSO, "zh-Hant", "" },
+ { LANGUAGE_DONTKNOW, "", "" } // marks end of table
};
static Bcp47CountryEntry const aImplBcp47CountryEntries[] =
@@ -597,6 +689,7 @@ static Bcp47CountryEntry const aImplBcp47CountryEntries[] =
{ LANGUAGE_CATALAN_VALENCIAN, "ca-ES-valencia", "ES", "ca-valencia" },
{ LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN, "ca-ES-valencia", "ES", "" }, // In case MS format files using the old value escaped into the wild, map them back.
{ LANGUAGE_USER_ENGLISH_UK_OED, "en-GB-oed", "GB", "" }, // grandfathered
+// { LANGUAGE_YUE_CHINESE_HONGKONG, "zh-yue-HK", "HK", "" }, // MS reserved, prefer yue-HK; do not add unless LanguageTag::simpleExtract() can handle it to not call liblangtag for rsc!
{ LANGUAGE_DONTKNOW, "", "", "" } // marks end of table
};
@@ -719,8 +812,8 @@ static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries[] =
{
{ LANGUAGE_NORWEGIAN_BOKMAL, "no", "BOK" }, // registered subtags for "no" in rfc1766
{ LANGUAGE_NORWEGIAN_NYNORSK, "no", "NYN" }, // registered subtags for "no" in rfc1766
- { LANGUAGE_SERBIAN_LATIN, "sr", "latin" },
- { LANGUAGE_SERBIAN_CYRILLIC, "sr", "cyrillic" },
+ { LANGUAGE_SERBIAN_LATIN_SAM, "sr", "latin" },
+ { LANGUAGE_SERBIAN_CYRILLIC_SAM, "sr", "cyrillic" },
{ LANGUAGE_AZERI_LATIN, "az", "latin" },
{ LANGUAGE_AZERI_CYRILLIC, "az", "cyrillic" },
{ LANGUAGE_DONTKNOW, "", "" } // marks end of table
@@ -1138,9 +1231,9 @@ static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] =
// MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier
{ LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" },
{ LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia
- { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro
+ { LANGUAGE_SERBIAN_LATIN_SAM, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro
{ LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro
- { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" },
+ { LANGUAGE_SERBIAN_LATIN_LSO, "sr", "", "latin" },
{ LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" },
{ LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" },
{ LANGUAGE_DONTKNOW, "", "", "" } // marks end of table
diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx
index 36d5d8a..72d5051 100644
--- a/i18nlangtag/source/isolang/mslangid.cxx
+++ b/i18nlangtag/source/isolang/mslangid.cxx
@@ -252,7 +252,8 @@ bool MsLangId::isTraditionalChinese( const ::com::sun::star::lang::Locale & rLoc
//static
bool MsLangId::isChinese( LanguageType nLang )
{
- return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE);
+ return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE) ||
+ MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_YUE_CHINESE_HONGKONG);
}
//static
@@ -266,9 +267,10 @@ bool MsLangId::isCJK( LanguageType nLang )
{
switch (nLang & LANGUAGE_MASK_PRIMARY)
{
- case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
- case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
- case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_YUE_CHINESE_HONGKONG & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
return true;
default:
break;
@@ -313,7 +315,9 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
switch( nLang )
{
// CTL
- case LANGUAGE_MONGOLIAN_MONGOLIAN:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
case LANGUAGE_USER_KURDISH_IRAN:
case LANGUAGE_USER_KURDISH_IRAQ:
case LANGUAGE_USER_KYRGYZ_CHINA:
@@ -321,7 +325,8 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
break;
// "Western"
- case LANGUAGE_MONGOLIAN:
+ case LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA:
+ case LANGUAGE_MONGOLIAN_CYRILLIC_LSO:
case LANGUAGE_USER_KURDISH_SYRIA:
case LANGUAGE_USER_KURDISH_TURKEY:
nScript = ::com::sun::star::i18n::ScriptType::LATIN;
@@ -341,9 +346,10 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
switch ( nLang & LANGUAGE_MASK_PRIMARY )
{
// CJK catcher
- case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
- case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
- case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_YUE_CHINESE_HONGKONG & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
+ case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY:
nScript = ::com::sun::star::i18n::ScriptType::ASIAN;
break;
@@ -433,11 +439,32 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo
nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG;
break;
case LANGUAGE_OBSOLETE_USER_KABYLE:
- nLang = LANGUAGE_TAMAZIGHT_LATIN;
+ nLang = LANGUAGE_TAMAZIGHT_LATIN_ALGERIA;
break;
case LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN:
nLang = LANGUAGE_CATALAN_VALENCIAN;
break;
+ case LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU:
+ nLang = LANGUAGE_MALAGASY_PLATEAU;
+ break;
+ case LANGUAGE_GAELIC_SCOTLAND_LEGACY:
+ nLang = LANGUAGE_GAELIC_SCOTLAND;
+ break;
+ case LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA:
+ nLang = LANGUAGE_TSWANA_BOTSWANA;
+ break;
+ case LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA:
+ nLang = LANGUAGE_SERBIAN_LATIN_SERBIA;
+ break;
+ case LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO:
+ nLang = LANGUAGE_SERBIAN_LATIN_MONTENEGRO;
+ break;
+ case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA:
+ nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA;
+ break;
+ case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO:
+ nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO;
+ break;
// The following are not strictly obsolete but should be mapped to a
// replacement locale when encountered.
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 4f8af9a..689a985 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -1174,7 +1174,7 @@ bool LanguageTagImpl::canonicalize()
return bChanged; // that's it
}
meIsLiblangtagNeeded = DECISION_YES;
- SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for " << maBcp47);
+ SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
if (!mpImplLangtag)
{
@@ -1187,7 +1187,7 @@ bool LanguageTagImpl::canonicalize()
if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
{
char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
- SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize " << maBcp47);
+ SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
if (pTag)
{
OUString aOld( maBcp47);
@@ -1201,7 +1201,7 @@ bool LanguageTagImpl::canonicalize()
meIsIsoODF = DECISION_DONTKNOW;
if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
{
- SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse " << maBcp47);
+ SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
free( pTag);
meIsValid = DECISION_NO;
return bChanged;
@@ -1214,7 +1214,7 @@ bool LanguageTagImpl::canonicalize()
}
else
{
- SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse " << maBcp47);
+ SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
}
meIsValid = DECISION_NO;
return bChanged;
@@ -1536,11 +1536,13 @@ OUString LanguageTagImpl::getLanguageFromLangtag()
if (mpImplLangtag)
{
const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
- SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
+ SAL_WARN_IF( !pLangT, "i18nlangtag",
+ "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
if (!pLangT)
return aLanguage;
const char* pLang = lt_lang_get_tag( pLangT);
- SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
+ SAL_WARN_IF( !pLang, "i18nlangtag",
+ "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
if (pLang)
aLanguage = OUString::createFromAscii( pLang);
}
@@ -1595,11 +1597,12 @@ OUString LanguageTagImpl::getRegionFromLangtag()
SAL_WARN_IF( !pRegionT &&
maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
- "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
+ "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
if (!pRegionT)
return aRegion;
const char* pRegion = lt_region_get_tag( pRegionT);
- SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
+ SAL_WARN_IF( !pRegion, "i18nlangtag",
+ "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
if (pRegion)
aRegion = OUString::createFromAscii( pRegion);
}
@@ -2399,7 +2402,10 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
}
}
if (eRet == EXTRACTED_NONE)
+ {
+ SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
rLanguage = rScript = rCountry = rVariants = OUString();
+ }
return eRet;
}
diff --git a/include/i18nlangtag/lang.h b/include/i18nlangtag/lang.h
index 185f7bc..f66cd59 100644
--- a/include/i18nlangtag/lang.h
+++ b/include/i18nlangtag/lang.h
@@ -63,6 +63,17 @@
!
! Use THAT ^^^ as of 2013-09-17 it includes also Windows 8
+ ! BUT, you can download a PDF document from
+ ! http://msdn.microsoft.com/library/cc233965.aspx
+ ! that has YET MORE definitions, sigh.. didn't cross-check if any are missing
+ ! from that.. however, it also contains a few MS-reserved definitions that use
+ ! ISO 639-3 codes reserved for local use, such as 'qps-ploc' and 'qps-ploca'
+ ! (sic!), or strange things like 'ar-Ploc-SA' and 'ja-Ploc-JP' ('Ploc'??).
+ !
+ ! Use THAT ^^^ as the ultimate reference source (haha?) as of 2013-10-17 it
+ ! inludes also Windows 8.1 (Release: Monday, July 22, 2013; well, its table
+ ! says 08/08/2013 Revision 6.0, but hey, who cares ...)
+
For completeness, you can never have enough lists:
List of supported locale identifiers in Word
Applies to Microsoft Word 2000 and 2002
@@ -121,38 +132,45 @@ typedef unsigned short LanguageType;
#define LANGUAGE_ARABIC_LIBYA 0x1001
#define LANGUAGE_ARABIC_MOROCCO 0x1801
#define LANGUAGE_ARABIC_OMAN 0x2001
+#define LANGUAGE_ARABIC_PRIMARY_ONLY 0x0001 /* primary only, not a locale! */
#define LANGUAGE_ARABIC_QATAR 0x4001
#define LANGUAGE_ARABIC_SAUDI_ARABIA 0x0401
#define LANGUAGE_ARABIC_SYRIA 0x2801
#define LANGUAGE_ARABIC_TUNISIA 0x1C01
#define LANGUAGE_ARABIC_UAE 0x3801
#define LANGUAGE_ARABIC_YEMEN 0x2401
-#define LANGUAGE_ARABIC_PRIMARY_ONLY 0x0001 /* primary only, not a locale! */
#define LANGUAGE_ARMENIAN 0x042B
#define LANGUAGE_ASSAMESE 0x044D
#define LANGUAGE_AZERI_CYRILLIC 0x082C
+#define LANGUAGE_AZERI_CYRILLIC_LSO 0x742C
#define LANGUAGE_AZERI_LATIN 0x042C
+#define LANGUAGE_AZERI_LATIN_LSO 0x782C
#define LANGUAGE_BASHKIR_RUSSIA 0x046D
#define LANGUAGE_BASQUE 0x042D
#define LANGUAGE_BELARUSIAN 0x0423
#define LANGUAGE_BENGALI 0x0445 /* in India */
#define LANGUAGE_BENGALI_BANGLADESH 0x0845
-#define LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA 0x141A
+#define LANGUAGE_BOSNIAN_CYRILLIC_LSO 0x641A
#define LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA 0x201A
+#define LANGUAGE_BOSNIAN_LATIN_LSO 0x681A
+#define LANGUAGE_BOSNIAN_LSO 0x781A
+#define LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA 0x141A
#define LANGUAGE_BOSNIAN_BOSNIA_HERZEGOVINA LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA /* TODO: remove, only for langtab.src & localize.sdf compatibility */
#define LANGUAGE_BRETON_FRANCE 0x047E /* obsoletes LANGUAGE_USER_BRETON 0x0629 */
#define LANGUAGE_BULGARIAN 0x0402
#define LANGUAGE_BURMESE 0x0455
#define LANGUAGE_CATALAN 0x0403
-#define LANGUAGE_CATALAN_VALENCIAN 0x0803
+#define LANGUAGE_CATALAN_VALENCIAN 0x0803 /* obsoletes LANGUAGE_USER_CATALAN_VALENCIAN */
#define LANGUAGE_CHEROKEE_UNITED_STATES 0x045C
+#define LANGUAGE_CHEROKEE_CHEROKEE_LSO 0x7C5C
#define LANGUAGE_CHINESE_HONGKONG 0x0C04
+#define LANGUAGE_CHINESE_LSO 0x7804
#define LANGUAGE_CHINESE_MACAU 0x1404
#define LANGUAGE_CHINESE_SIMPLIFIED 0x0804
#define LANGUAGE_CHINESE_SINGAPORE 0x1004
#define LANGUAGE_CHINESE_TRADITIONAL 0x0404
#define LANGUAGE_CHINESE_SIMPLIFIED_LEGACY 0x0004 /* MS-.NET 'zh-CHS', primary only! but maps to 'zh-CN' */
-#define LANGUAGE_CHINESE_TRADITIONAL_LEGACY 0x7C04 /* MS-.NET 'zh-CHT' */
+#define LANGUAGE_CHINESE_TRADITIONAL_LSO 0x7C04 /* MS-.NET 'zh-CHT' but maps to 'zh-Hant' */
#define LANGUAGE_CHINESE LANGUAGE_CHINESE_SIMPLIFIED /* most code uses LANGUAGE_CHINESE */
#define LANGUAGE_CORSICAN_FRANCE 0x0483
#define LANGUAGE_CROATIAN 0x041A
@@ -165,23 +183,30 @@ typedef unsigned short LanguageType;
#define LANGUAGE_DUTCH_BELGIAN 0x0813
#define LANGUAGE_EDO 0x0466
#define LANGUAGE_ENGLISH 0x0009 /* primary only, not a locale! */
+#define LANGUAGE_ENGLISH_ARAB_EMIRATES 0x4C09
#define LANGUAGE_ENGLISH_AUS 0x0C09
+#define LANGUAGE_ENGLISH_BAHRAIN 0x5009
#define LANGUAGE_ENGLISH_BELIZE 0x2809
#define LANGUAGE_ENGLISH_CAN 0x1009
#define LANGUAGE_ENGLISH_CARRIBEAN 0x2409
+#define LANGUAGE_ENGLISH_EGYPT 0x5409
#define LANGUAGE_ENGLISH_EIRE 0x1809
#define LANGUAGE_ENGLISH_HONG_KONG_SAR 0x3C09
#define LANGUAGE_ENGLISH_INDIA 0x4009
#define LANGUAGE_ENGLISH_INDONESIA 0x3809
#define LANGUAGE_ENGLISH_JAMAICA 0x2009
+#define LANGUAGE_ENGLISH_JORDAN 0x5809
+#define LANGUAGE_ENGLISH_KUWAIT 0x5C09
#define LANGUAGE_ENGLISH_MALAYSIA 0x4409
#define LANGUAGE_ENGLISH_NZ 0x1409
#define LANGUAGE_ENGLISH_PHILIPPINES 0x3409
#define LANGUAGE_ENGLISH_SAFRICA 0x1C09
#define LANGUAGE_ENGLISH_SINGAPORE 0x4809
#define LANGUAGE_ENGLISH_TRINIDAD 0x2C09
+#define LANGUAGE_ENGLISH_TURKEY 0x6009
#define LANGUAGE_ENGLISH_UK 0x0809
#define LANGUAGE_ENGLISH_US 0x0409
+#define LANGUAGE_ENGLISH_YEMEN 0x6409
#define LANGUAGE_ENGLISH_ZIMBABWE 0x3009
#define LANGUAGE_ESTONIAN 0x0425
#define LANGUAGE_FAEROESE 0x0438
@@ -203,12 +228,15 @@ typedef unsigned short LanguageType;
#define LANGUAGE_FRENCH_REUNION 0x200C
#define LANGUAGE_FRENCH_SENEGAL 0x280C
#define LANGUAGE_FRENCH_SWISS 0x100C
-#define LANGUAGE_FRENCH_WEST_INDIES 0x1C0C
+#define LANGUAGE_FRENCH_WEST_INDIES 0x1C0C /* MS in its MS-LCID.pdf now says "Neither defined nor reserved" */
#define LANGUAGE_FRENCH_ZAIRE 0x240C
#define LANGUAGE_FRISIAN_NETHERLANDS 0x0462
+#define LANGUAGE_FULFULDE_LATIN_LSO 0x7C67
#define LANGUAGE_FULFULDE_NIGERIA 0x0467
+#define LANGUAGE_FULFULDE_SENEGAL 0x0867
#define LANGUAGE_GAELIC_IRELAND 0x083C
-#define LANGUAGE_GAELIC_SCOTLAND 0x043C
+#define LANGUAGE_GAELIC_SCOTLAND 0x0491 /* apparently it occurred to MS that those are different languages */
+#define LANGUAGE_GAELIC_SCOTLAND_LEGACY 0x043C
#define LANGUAGE_GALICIAN 0x0456
#define LANGUAGE_GEORGIAN 0x0437
#define LANGUAGE_GERMAN 0x0407
@@ -219,6 +247,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_GREEK 0x0408
#define LANGUAGE_GUARANI_PARAGUAY 0x0474
#define LANGUAGE_GUJARATI 0x0447
+#define LANGUAGE_HAUSA_LATIN_LSO 0x7C68
#define LANGUAGE_HAUSA_NIGERIA 0x0468
#define LANGUAGE_HAWAIIAN_UNITED_STATES 0x0475
#define LANGUAGE_HEBREW 0x040D
@@ -229,7 +258,9 @@ typedef unsigned short LanguageType;
#define LANGUAGE_IGBO_NIGERIA 0x0470
#define LANGUAGE_INDONESIAN 0x0421
#define LANGUAGE_INUKTITUT_SYLLABICS_CANADA 0x045D
+#define LANGUAGE_INUKTITUT_SYLLABICS_LSO 0x785D
#define LANGUAGE_INUKTITUT_LATIN_CANADA 0x085D
+#define LANGUAGE_INUKTITUT_LATIN_LSO 0x7C5D
#define LANGUAGE_ITALIAN 0x0410
#define LANGUAGE_ITALIAN_SWISS 0x0810
#define LANGUAGE_JAPANESE 0x0411
@@ -246,13 +277,17 @@ typedef unsigned short LanguageType;
#define LANGUAGE_KONKANI 0x0457
#define LANGUAGE_KOREAN 0x0412
#define LANGUAGE_KOREAN_JOHAB 0x0812
+#define LANGUAGE_KURDISH_ARABIC_IRAQ 0x0492 /* TODO: obsoletes LANGUAGE_USER_KURDISH_IRAQ */
+#define LANGUAGE_KURDISH_ARABIC_LSO 0x7C92
#define LANGUAGE_LAO 0x0454
#define LANGUAGE_LATIN 0x0476 /* obsoletes LANGUAGE_USER_LATIN 0x0610 */
#define LANGUAGE_LATVIAN 0x0426
#define LANGUAGE_LITHUANIAN 0x0427
-#define LANGUAGE_LITHUANIAN_CLASSIC 0x0827
+#define LANGUAGE_LITHUANIAN_CLASSIC 0x0827 /* MS in its MS-LCID.pdf now says "Neither defined nor reserved" */
+#define LANGUAGE_LU_CHINA 0x0490
#define LANGUAGE_LUXEMBOURGISH_LUXEMBOURG 0x046E /* obsoletes LANGUAGE_USER_LUXEMBOURGISH 0x0630 */
#define LANGUAGE_MACEDONIAN 0x042F
+#define LANGUAGE_MALAGASY_PLATEAU 0x048D /* obsoletes LANGUAGE_USER_MALAGASY_PLATEAU */
#define LANGUAGE_MALAYALAM 0x044C /* in India */
#define LANGUAGE_MALAY_BRUNEI_DARUSSALAM 0x083E
#define LANGUAGE_MALAY_MALAYSIA 0x043E
@@ -262,13 +297,18 @@ typedef unsigned short LanguageType;
#define LANGUAGE_MAPUDUNGUN_CHILE 0x047A /* AKA Araucanian */
#define LANGUAGE_MARATHI 0x044E
#define LANGUAGE_MOHAWK_CANADA 0x047C
-#define LANGUAGE_MONGOLIAN 0x0450 /* Cyrillic script */
-#define LANGUAGE_MONGOLIAN_MONGOLIAN 0x0850
+#define LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA 0x0450
+#define LANGUAGE_MONGOLIAN_CYRILLIC_LSO 0x7850
+#define LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA 0x0C50
+#define LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA 0x0850
+#define LANGUAGE_MONGOLIAN_MONGOLIAN_LSO 0x7C50
#define LANGUAGE_NEPALI 0x0461
#define LANGUAGE_NEPALI_INDIA 0x0861
#define LANGUAGE_NORWEGIAN 0x0014 /* primary only, not a locale! */
#define LANGUAGE_NORWEGIAN_BOKMAL 0x0414
+#define LANGUAGE_NORWEGIAN_BOKMAL_LSO 0x7C14
#define LANGUAGE_NORWEGIAN_NYNORSK 0x0814
+#define LANGUAGE_NORWEGIAN_NYNORSK_LSO 0x7814
#define LANGUAGE_OCCITAN_FRANCE 0x0482 /* obsoletes LANGUAGE_USER_OCCITAN 0x0625 */
#define LANGUAGE_ORIYA 0x0448
#define LANGUAGE_OROMO 0x0472
@@ -278,8 +318,10 @@ typedef unsigned short LanguageType;
#define LANGUAGE_PORTUGUESE 0x0816
#define LANGUAGE_PORTUGUESE_BRAZILIAN 0x0416
#define LANGUAGE_PUNJABI 0x0446
+#define LANGUAGE_PUNJABI_ARABIC_LSO 0x7C46
#define LANGUAGE_PUNJABI_PAKISTAN 0x0846
#define LANGUAGE_QUECHUA_BOLIVIA 0x046B
+#define LANGUAGE_QUECHUA_COLOMBIA 0x0493 /* different primary ID */
#define LANGUAGE_QUECHUA_ECUADOR 0x086B
#define LANGUAGE_QUECHUA_PERU 0x0C6B
#define LANGUAGE_RHAETO_ROMAN 0x0417
@@ -290,23 +332,34 @@ typedef unsigned short LanguageType;
#define LANGUAGE_SAMI_NORTHERN_NORWAY 0x043B
#define LANGUAGE_SAMI_LAPPISH LANGUAGE_SAMI_NORTHERN_NORWAY /* the old MS definition */
#define LANGUAGE_SAMI_INARI 0x243B
+#define LANGUAGE_SAMI_INARI_LSO 0x703B
+#define LANGUAGE_SAMI_LULE_LSO 0x7C3B
#define LANGUAGE_SAMI_LULE_NORWAY 0x103B
#define LANGUAGE_SAMI_LULE_SWEDEN 0x143B
#define LANGUAGE_SAMI_NORTHERN_FINLAND 0x0C3B
#define LANGUAGE_SAMI_NORTHERN_SWEDEN 0x083B
#define LANGUAGE_SAMI_SKOLT 0x203B
+#define LANGUAGE_SAMI_SKOLT_LSO 0x743B
+#define LANGUAGE_SAMI_SOUTHERN_LSO 0x783B
#define LANGUAGE_SAMI_SOUTHERN_NORWAY 0x183B
#define LANGUAGE_SAMI_SOUTHERN_SWEDEN 0x1C3B
#define LANGUAGE_SANSKRIT 0x044F
#define LANGUAGE_SEPEDI 0x046C
#define LANGUAGE_NORTHERNSOTHO LANGUAGE_SEPEDI /* just an alias for the already existing localization */
-#define LANGUAGE_SERBIAN_CYRILLIC 0x0C1A /* MS lists this as Serbian (Cyrillic, Serbia) 'sr-Cyrl-SP', but they use 'SP' since at least Windows2003 where it was Serbia and Montenegro! */
+#define LANGUAGE_SERBIAN_CYRILLIC_LSO 0x6C1A
+#define LANGUAGE_SERBIAN_CYRILLIC_SAM 0x0C1A /* Serbia and Montenegro (former) */
#define LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA 0x1C1A
-#define LANGUAGE_SERBIAN_LATIN 0x081A /* MS lists this as Serbian (Latin, Serbia) 'sr-Latn-SP', but they use 'SP' since at least Windows2003 where it was Serbia and Montenegro! */
+#define LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO 0x301A
+#define LANGUAGE_SERBIAN_CYRILLIC_SERBIA 0x281A
+#define LANGUAGE_SERBIAN_LATIN_LSO 0x701A
+#define LANGUAGE_SERBIAN_LATIN_SAM 0x081A
#define LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA 0x181A
-#define LANGUAGE_SERBIAN_LATIN_NEUTRAL 0x7C1A /* MS lists this as 'sr' only. What a mess. */
+#define LANGUAGE_SERBIAN_LATIN_MONTENEGRO 0x2C1A
+#define LANGUAGE_SERBIAN_LATIN_NEUTRAL 0x7C1A
+#define LANGUAGE_SERBIAN_LATIN_SERBIA 0x241A
#define LANGUAGE_SESOTHO 0x0430 /* also called Sutu now by MS */
#define LANGUAGE_SINDHI 0x0459
+#define LANGUAGE_SINDHI_ARABIC_LSO 0x7C59
#define LANGUAGE_SINDHI_PAKISTAN 0x0859
#define LANGUAGE_SINHALESE_SRI_LANKA 0x045B
#define LANGUAGE_SLOVAK 0x041B
@@ -314,6 +367,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_SOMALI 0x0477
#define LANGUAGE_UPPER_SORBIAN_GERMANY 0x042E /* obsoletes LANGUAGE_USER_UPPER_SORBIAN 0x0623 */
#define LANGUAGE_LOWER_SORBIAN_GERMANY 0x082E /* obsoletes LANGUAGE_USER_LOWER_SORBIAN 0x0624. NOTE: the primary ID is identical to Upper Sorbian, which is not quite correct because they're distinct languages */
+#define LANGUAGE_LOWER_SORBIAN_LSO 0x7C2E
#define LANGUAGE_SORBIAN LANGUAGE_USER_UPPER_SORBIAN /* a strange MS definition */
#define LANGUAGE_SPANISH_DATED 0x040A /* old collation, not supported, see #i94435# */
#define LANGUAGE_SPANISH_ARGENTINA 0x2C0A
@@ -342,11 +396,17 @@ typedef unsigned short LanguageType;
#define LANGUAGE_SWEDISH 0x041D
#define LANGUAGE_SWEDISH_FINLAND 0x081D
#define LANGUAGE_SYRIAC 0x045A
+#define LANGUAGE_TAI_NUA_CHINA 0x048F
#define LANGUAGE_TAJIK 0x0428
-#define LANGUAGE_TAMAZIGHT_ARABIC 0x045F
-#define LANGUAGE_TAMAZIGHT_LATIN 0x085F
-#define LANGUAGE_TAMAZIGHT_TIFINAGH 0x0C5F
+#define LANGUAGE_TAJIK_LSO 0x7C28
+#define LANGUAGE_TAMAZIGHT_ARABIC_MOROCCO 0x045F
+#define LANGUAGE_TAMAZIGHT_LATIN_ALGERIA 0x085F
+#define LANGUAGE_TAMAZIGHT_LATIN_LSO 0x7C5F
+#define LANGUAGE_TAMAZIGHT_MOROCCO 0x0C5F
+#define LANGUAGE_TAMAZIGHT_TIFINAGH_MOROCCO 0x105F
+#define LANGUAGE_TAMAZIGHT_TIFINAGH_LSO 0x785F
#define LANGUAGE_TAMIL 0x0449
+#define LANGUAGE_TAMIL_SRI_LANKA 0x0849
#define LANGUAGE_TATAR 0x0444
#define LANGUAGE_TELUGU 0x044A
#define LANGUAGE_THAI 0x041E
@@ -357,6 +417,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_TIGRIGNA_ETHIOPIA 0x0473
#define LANGUAGE_TSONGA 0x0431
#define LANGUAGE_TSWANA 0x0432 /* AKA Setsuana, for South Africa */
+#define LANGUAGE_TSWANA_BOTSWANA 0x0832 /* obsoletes LANGUAGE_USER_TSWANA_BOTSWANA */
#define LANGUAGE_TURKISH 0x041F
#define LANGUAGE_TURKMEN 0x0442
#define LANGUAGE_UIGHUR_CHINA 0x0480
@@ -364,7 +425,9 @@ typedef unsigned short LanguageType;
#define LANGUAGE_URDU_INDIA 0x0820
#define LANGUAGE_URDU_PAKISTAN 0x0420
#define LANGUAGE_UZBEK_CYRILLIC 0x0843
+#define LANGUAGE_UZBEK_CYRILLIC_LSO 0x7843
#define LANGUAGE_UZBEK_LATIN 0x0443
+#define LANGUAGE_UZBEK_LATIN_LSO 0x7C43
#define LANGUAGE_VENDA 0x0433
#define LANGUAGE_VIETNAMESE 0x042A
#define LANGUAGE_WELSH 0x0452
@@ -374,8 +437,20 @@ typedef unsigned short LanguageType;
#define LANGUAGE_YI 0x0478 /* Sichuan Yi */
#define LANGUAGE_YIDDISH 0x043D
#define LANGUAGE_YORUBA 0x046A
+#define LANGUAGE_YUE_CHINESE_HONGKONG 0x048E
#define LANGUAGE_ZULU 0x0435
+#define LANGUAGE_qps_ploc 0x0501 /* 'qps-ploc', qps is a reserved for local use code */
+#define LANGUAGE_qps_ploca 0x05FE /* 'qps-ploca', qps is a reserved for local use code */
+#define LANGUAGE_qps_plocm 0x09FF /* 'qps-plocm', qps is a reserved for local use code */
+
+#define LANGUAGE_ar_Ploc_SA__reserved 0x4401 /* 'ar-Ploc-SA', 'Ploc'?? */
+#define LANGUAGE_ja_Ploc_JP__reserved 0x0811 /* 'ja-Ploc-JP', 'Ploc'?? */
+#define LANGUAGE_pap_029__reserved 0x0479 /* 'pap-029' */
+#define LANGUAGE_ar_145__reserved 0x4801 /* 'ar-145' */
+#define LANGUAGE_es_419 0x580A /* 'es-419', not reserved, used? */
+#define LANGUAGE_Neither_defined_nor_reserved_0x2008 0x2008
+
/*! use only for import/export of MS documents, number formatter maps it to
*! LANGUAGE_SYSTEM and then to effective system language */
#define LANGUAGE_SYSTEM_DEFAULT 0x0800
@@ -454,7 +529,8 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_KALAALLISUT LANGUAGE_KALAALLISUT_GREENLAND
#define LANGUAGE_USER_SWAZI 0x062B
#define LANGUAGE_USER_NDEBELE_SOUTH 0x062C
-#define LANGUAGE_USER_TSWANA_BOTSWANA 0x8032 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_TSWANA)) */
+#define LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA 0x8032 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_TSWANA)) */
+#define LANGUAGE_USER_TSWANA_BOTSWANA LANGUAGE_TSWANA_BOTSWANA
#define LANGUAGE_USER_MOORE 0x062D
#define LANGUAGE_USER_BAMBARA 0x062E
#define LANGUAGE_USER_AKAN 0x062F
@@ -486,10 +562,14 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_TETUN 0x0640
#define LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA 0x0641
#define LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA 0x0642
-#define LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA 0x8C1A /* makeLangID( 0x20+0x03, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC)) */
-#define LANGUAGE_USER_SERBIAN_LATIN_SERBIA 0x881A /* makeLangID( 0x20+0x02, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN)) */
-#define LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO 0xCC1A /* makeLangID( 0x20+0x13, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC)) */
-#define LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO 0xC81A /* makeLangID( 0x20+0x12, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN)) */
+#define LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA 0x8C1A /* makeLangID( 0x20+0x03, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC_LSO)) */
+#define LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA LANGUAGE_SERBIAN_CYRILLIC_SERBIA
+#define LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA 0x881A /* makeLangID( 0x20+0x02, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN_LSO)) */
+#define LANGUAGE_USER_SERBIAN_LATIN_SERBIA LANGUAGE_SERBIAN_LATIN_SERBIA
+#define LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO 0xCC1A /* makeLangID( 0x20+0x13, getPrimaryLanguage( LANGUAGE_SERBIAN_CYRILLIC_LSO)) */
+#define LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO
+#define LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO 0xC81A /* makeLangID( 0x20+0x12, getPrimaryLanguage( LANGUAGE_SERBIAN_LATIN_LSO)) */
+#define LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO LANGUAGE_SERBIAN_LATIN_MONTENEGRO
#define LANGUAGE_USER_SAMI_KILDIN_RUSSIA 0x803B /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_SAMI_NORTHERN_NORWAY)) */
#define LANGUAGE_USER_BODO_INDIA 0x0643
#define LANGUAGE_USER_DOGRI_INDIA 0x0644
@@ -513,7 +593,8 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_MAORE 0x064C
#define LANGUAGE_USER_BUSHI 0x064D
#define LANGUAGE_USER_TAHITIAN 0x064E
-#define LANGUAGE_USER_MALAGASY_PLATEAU 0x064F
+#define LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU 0x064F
+#define LANGUAGE_USER_MALAGASY_PLATEAU LANGUAGE_MALAGASY_PLATEAU
#define LANGUAGE_USER_PAPIAMENTU_ARUBA 0x8079 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_PAPIAMENTU)) */
#define LANGUAGE_USER_SARDINIAN_CAMPIDANESE 0x0650
#define LANGUAGE_USER_SARDINIAN_GALLURESE 0x0651
@@ -527,7 +608,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_LIMBU 0x0657
#define LANGUAGE_USER_LOJBAN 0x0658 /* no locale */
#define LANGUAGE_OBSOLETE_USER_KABYLE 0x0659
-#define LANGUAGE_USER_KABYLE LANGUAGE_TAMAZIGHT_LATIN
+#define LANGUAGE_USER_KABYLE LANGUAGE_TAMAZIGHT_LATIN_ALGERIA
#define LANGUAGE_USER_HAITIAN 0x065A
#define LANGUAGE_USER_BEEMBE 0x065B
#define LANGUAGE_USER_BEKWEL 0x065C
@@ -592,6 +673,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_NOGAI 0x068B
#define LANGUAGE_USER_KARAKALPAK_LATIN 0x068C
#define LANGUAGE_USER_LADIN 0x068D
+#define LANGUAGE_USER_TIBETAN_BHUTAN 0x8451 /* makeLangID( 0x21, getPrimaryLanguage( LANGUAGE_TIBETAN)) */
/* Primary language ID range for on-the-fly assignment. */
diff --git a/svtools/source/misc/langtab.src b/svtools/source/misc/langtab.src
index d782bce..42f93e8 100644
--- a/svtools/source/misc/langtab.src
+++ b/svtools/source/misc/langtab.src
@@ -152,8 +152,8 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Romanian (Moldova)" ; LANGUAGE_ROMANIAN_MOLDOVA ; > ;
< "Russian" ; LANGUAGE_RUSSIAN ; > ;
< "Sanskrit" ; LANGUAGE_SANSKRIT ; > ;
- < "Serbian Cyrillic (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_CYRILLIC ; > ;
- < "Serbian Latin (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_LATIN ; > ;
+ < "Serbian Cyrillic (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_CYRILLIC_SAM ; > ;
+ < "Serbian Latin (Serbia and Montenegro)" ; LANGUAGE_SERBIAN_LATIN_SAM ; > ;
< "Serbian Cyrillic (Serbia)" ; LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA ; > ;
< "Serbian Latin (Serbia)" ; LANGUAGE_USER_SERBIAN_LATIN_SERBIA ; > ;
< "Serbian Cyrillic (Montenegro)" ; LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO ; > ;
@@ -205,8 +205,8 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Dhivehi" ; LANGUAGE_DHIVEHI ; > ;
< "Northern Sotho" ; LANGUAGE_SEPEDI ; > ;
< "Gaelic (Scotland)" ; LANGUAGE_GAELIC_SCOTLAND ; > ;
- < "Mongolian Cyrillic" ; LANGUAGE_MONGOLIAN ; > ;
- < "Mongolian Mongolian" ; LANGUAGE_MONGOLIAN_MONGOLIAN ; > ;
+ < "Mongolian Cyrillic" ; LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA ; > ;
+ < "Mongolian Mongolian" ; LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA ; > ;
< "Interlingua" ; LANGUAGE_USER_INTERLINGUA ; > ;
< "Bosnian" ; LANGUAGE_BOSNIAN_BOSNIA_HERZEGOVINA ; > ;
< "Bengali (Bangladesh)" ; LANGUAGE_BENGALI_BANGLADESH ; > ;
@@ -315,7 +315,7 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Yoruba" ; LANGUAGE_YORUBA ; > ;
< "Rusyn (Ukraine)" ; LANGUAGE_USER_RUSYN_UKRAINE ; > ;
< "Rusyn (Slovakia)" ; LANGUAGE_USER_RUSYN_SLOVAKIA ; > ;
- < "Kabyle Latin" ; LANGUAGE_TAMAZIGHT_LATIN ; > ;
+ < "Kabyle Latin" ; LANGUAGE_TAMAZIGHT_LATIN_ALGERIA ; > ;
< "Yiddish (USA)" ; LANGUAGE_USER_YIDDISH_US ; > ;
< "Hawaiian" ; LANGUAGE_HAWAIIAN_UNITED_STATES ; > ;
< "Limbu" ; LANGUAGE_USER_LIMBU ; > ;
diff --git a/svtools/source/misc/sampletext.cxx b/svtools/source/misc/sampletext.cxx
index c8ba114..3523383 100644
--- a/svtools/source/misc/sampletext.cxx
+++ b/svtools/source/misc/sampletext.cxx
@@ -586,9 +586,17 @@ OUString makeRepresentativeTextForLanguage(LanguageType eLang)
case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY:
sRet = makeRepresentativeTextForScript(USCRIPT_KHMER);
break;
- case LANGUAGE_MONGOLIAN & LANGUAGE_MASK_PRIMARY:
- if (eLang == LANGUAGE_MONGOLIAN_MONGOLIAN)
- sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN);
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO & LANGUAGE_MASK_PRIMARY:
+ switch (eLang)
+ {
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
+ sRet = makeRepresentativeTextForScript(USCRIPT_MONGOLIAN);
+ break;
+ default:
+ break;
+ }
break;
case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY:
sRet = makeRepresentativeTextForScript(USCRIPT_JAPANESE);
diff --git a/vcl/source/gdi/sallayout.cxx b/vcl/source/gdi/sallayout.cxx
index fe01037..f395936 100644
--- a/vcl/source/gdi/sallayout.cxx
+++ b/vcl/source/gdi/sallayout.cxx
@@ -287,11 +287,18 @@ VCL_DLLPUBLIC sal_UCS4 GetLocalizedChar( sal_UCS4 nChar, LanguageType eLang )
case LANGUAGE_MALAYALAM & LANGUAGE_MASK_PRIMARY:
nOffset = 0x0D66 - '0'; // malayalam
break;
- case LANGUAGE_MONGOLIAN & LANGUAGE_MASK_PRIMARY:
- if (eLang == LANGUAGE_MONGOLIAN_MONGOLIAN)
- nOffset = 0x1810 - '0'; // mongolian
- else
- nOffset = 0; // mongolian cyrillic
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO & LANGUAGE_MASK_PRIMARY:
+ switch (eLang)
+ {
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
+ case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
+ nOffset = 0x1810 - '0'; // mongolian
+ break;
+ default:
+ nOffset = 0; // mongolian cyrillic
+ break;
+ }
break;
case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY:
nOffset = 0x1040 - '0'; // myanmar
commit 315a0e277ab929ab77bb6ecd761abb24af0beb8b
Author: Eike Rathke <erack at redhat.com>
Date: Tue Oct 22 00:55:05 2013 +0200
adapt to new IsoLanguageCountryEntry struct
IsoLanguageScriptCountryEntry and Bcp47CountryEntry are still missing.
Change-Id: Ie22c6031a6bf8598ee71f4720b4d4b9d5bac97d4
diff --git a/i18nlangtag/source/isolang/langid.pl b/i18nlangtag/source/isolang/langid.pl
index 16f1b07..838ef2c 100755
--- a/i18nlangtag/source/isolang/langid.pl
+++ b/i18nlangtag/source/isolang/langid.pl
@@ -267,13 +267,13 @@ sub main()
my $coun = $2;
$lang = lc($lang);
$coun = uc($coun);
- # { LANGUAGE_AFRIKAANS, "af", "ZA" },
+ # { LANGUAGE_AFRIKAANS, "af", "ZA", false },
@resultlist = grepFile(
- '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,',
+ '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*,\s*\w+\s*\}\s*,',
"$SRC_ROOT", "i18nlangtag", "source/isolang/isolang.cxx", ());
for $result (@resultlist)
{
- if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
+ if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*,\s*\w+\s*\}\s*,/)
{
push( @greplist, '\b' . $1 . '\b');
$modifier = ""; # complete identifier now case sensitive
@@ -305,15 +305,15 @@ sub main()
}
}
- # { LANGUAGE_AFRIKAANS, "af", "ZA" },
+ # { LANGUAGE_AFRIKAANS, "af", "ZA", false },
@resultlist = grepFile(
- $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,',
+ $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*,\s*\w+\s*\}\s*,',
"$SRC_ROOT", "i18nlangtag", "source/isolang/isolang.cxx", ());
my @langcoungreplist;
for $result (@resultlist)
{
- if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
+ if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*,\s*\w+\s*\}\s*,/)
{
my $lang = $1;
my $coun = $2;
commit 9bffd1170cf2a1642ee956d3b9b27130cac8251d
Author: Eike Rathke <erack at redhat.com>
Date: Mon Oct 21 23:32:24 2013 +0200
LANGUAGE_USER_YIDDISH_US ought to be 0x803D instead of 0x083D
Change-Id: Idbb6837802ee6f12e95914283a5eb5d61982f4f8
diff --git a/include/i18nlangtag/lang.h b/include/i18nlangtag/lang.h
index dfbf7b2..185f7bc 100644
--- a/include/i18nlangtag/lang.h
+++ b/include/i18nlangtag/lang.h
@@ -523,7 +523,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_GIKUYU 0x0655
#define LANGUAGE_USER_RUSYN_UKRAINE 0x0656
#define LANGUAGE_USER_RUSYN_SLOVAKIA 0x8256 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_USER_RUSYN_UKRAINE)) */
-#define LANGUAGE_USER_YIDDISH_US 0x083D /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_YIDDISH)) */
+#define LANGUAGE_USER_YIDDISH_US 0x803D /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_YIDDISH)) */
#define LANGUAGE_USER_LIMBU 0x0657
#define LANGUAGE_USER_LOJBAN 0x0658 /* no locale */
#define LANGUAGE_OBSOLETE_USER_KABYLE 0x0659
More information about the Libreoffice-commits
mailing list