[Libreoffice-commits] core.git: 2 commits - i18nlangtag/source

Eike Rathke erack at redhat.com
Thu Oct 24 02:48:26 PDT 2013


 i18nlangtag/source/isolang/MS-LCID-to-list.sh |   88 ++++
 i18nlangtag/source/isolang/MS-LCID.lst        |  460 ++++++++++++++++++++++++++
 2 files changed, 548 insertions(+)

New commits:
commit 5dcee8893a41ace34aad77530cb8781ecde8e2cd
Author: Eike Rathke <erack at redhat.com>
Date:   Thu Oct 24 11:44:49 2013 +0200

    raw LangID list for reference
    
    ... to hopefully be able to generate a diff when the next release of
    MS-LCID.pdf will be available at
    http://msdn.microsoft.com/library/cc233965.aspx
    
    Change-Id: I10877cdc8d4b90c6b971dfa5e05ad796fd2a2d00

diff --git a/i18nlangtag/source/isolang/MS-LCID.lst b/i18nlangtag/source/isolang/MS-LCID.lst
new file mode 100644
index 0000000..219157f
--- /dev/null
+++ b/i18nlangtag/source/isolang/MS-LCID.lst
@@ -0,0 +1,460 @@
+        0x0001              ar
+        0x0002              bg
+        0x0003              ca
+        0x0004              zh-Hans
+        0x0005              cs
+        0x0006              da
+        0x0007              de
+        0x0008              el
+        0x0009              en
+        0x000a              es
+        0x000b              fi
+        0x000c              fr
+        0x000d              he
+        0x000e              hu
+        0x000f              is
+        0x0010              it
+        0x0011              ja
+        0x0012              ko
+        0x0013              nl
+        0x0014              no
+        0x0015              pl
+        0x0016              pt
+        0x0017              rm
+        0x0018              ro
+        0x0019              ru
+        0x001a              bs, hr, or sr
+        0x001b              sk
+        0x001c              sq
+        0x001d              sv
+        0x001e              th
+        0x001f              tr
+        0x0020              ur
+        0x0021              id
+        0x0022              uk
+        0x0023              be
+        0x0024              sl
+        0x0025              et
+        0x0026              lv
+        0x0027              lt
+        0x0028              tg
+        0x0029              fa
+        0x002a              vi
+        0x002b              hy
+        0x002c              az
+        0x002d              eu
+        0x002e              dsb or hsb
+        0x002f              mk
+        0x0030              st
+        0x0031              ts
+        0x0032              tn
+        0x0033              ve, reserved
+        0x0034              xh
+        0x0035              zu
+        0x0036              af
+        0x0037              ka
+        0x0038              fo
+        0x0039              hi
+        0x003a              mt
+        0x003b              se
+        0x003c              ga
+        0x003d              yi, reserved
+        0x003e              ms
+        0x003f              kk
+        0x0040              ky
+        0x0041              sw
+        0x0042              tk
+        0x0043              uz
+        0x0044              tt
+        0x0045              bn
+        0x0046              pa
+        0x0047              gu
+        0x0048              or
+        0x0049              ta
+        0x004a              te
+        0x004b              kn
+        0x004c              ml
+        0x004d              as
+        0x004e              mr
+        0x004f              sa
+        0x0050              mn
+        0x0051              bo
+        0x0052              cy
+        0x0053              km
+        0x0054              lo
+        0x0055              my
+        0x0056              gl
+        0x0057              kok
+        0x0058              mni, reserved
+        0x0059              sd
+        0x005a              syr
+        0x005b              si
+        0x005c              chr
+        0x005d              iu
+        0x005e              am
+        0x005f              tzm
+        0x0060              ks, reserved
+        0x0061              ne
+        0x0062              fy
+        0x0063              ps
+        0x0064              fil
+        0x0065              dv
+        0x0066              bin, reserved
+        0x0067              ff
+        0x0068              ha
+        0x0069              ibb, reserved
+        0x006a              yo
+        0x006b              quz
+        0x006c              nso
+        0x006d              ba
+        0x006e              lb
+        0x006f              kl
+        0x0070              ig
+        0x0071              kr, reserved
+        0x0072              om
+        0x0073              ti
+        0x0074              gn
+        0x0075              haw
+        0x0076              la, reserved
+        0x0077              so, reserved
+        0x0078              ii
+        0x0079              pap, reserved
+        0x007a              arn
+        0x007b              Neither defined nor reserved
+        0x007c              moh
+        0x007d              Neither defined nor reserved
+        0x007e              br
+        0x007f              Reserved for invariant locale behavior
+        0x0080              ug
+        0x0081              mi
+        0x0082              oc
+        0x0083              co
+        0x0084              gsw
+        0x0085              sah
+        0x0086              qut
+        0x0087              rw
+        0x0088              wo
+        0x0089              Neither defined nor reserved
+        0x008a              Neither defined nor reserved
+        0x008b              Neither defined nor reserved
+        0x008c              prs
+        0x008d              Neither defined nor reserved
+        0x008e              Neither defined nor reserved
+        0x008f              Neither defined nor reserved
+        0x0090              Neither defined nor reserved
+        0x0091              gd
+        0x0092              ku
+        0x0093              quc, reserved
+        0x0401              ar-SA
+        0x0402              bg-BG
+        0x0403              ca-ES
+        0x0404              zh-TW
+        0x0405              cs-CZ
+        0x0406              da-DK
+        0x0407              de-DE
+        0x0408              el-GR
+        0x0409              en-US
+        0x040a              es-ES_tradnl
+        0x040b              fi-FI
+        0x040c              fr-FR
+        0x040d              he-IL
+        0x040e              hu-HU
+        0x040f              is-IS
+        0x0410              it-IT
+        0x0411              ja-JP
+        0x0412              ko-KR
+        0x0413              nl-NL
+        0x0414              nb-NO
+        0x0415              pl-PL
+        0x0416              pt-BR
+        0x0417              rm-CH
+        0x0418              ro-RO
+        0x0419              ru-RU
+        0x041a              hr-HR
+        0x041b              sk-SK
+        0x041c              sq-AL
+        0x041d              sv-SE
+        0x041e              th-TH
+        0x041f              tr-TR
+        0x0420              ur-PK
+        0x0421              id-ID
+        0x0422              uk-UA
+        0x0423              be-BY
+        0x0424              sl-SI
+        0x0425              et-EE
+        0x0426              lv-LV
+        0x0427              lt-LT
+        0x0428              tg-Cyrl-TJ
+        0x0429              fa-IR
+        0x042a              vi-VN
+        0x042b              hy-AM
+        0x042c              az-Latn-AZ
+        0x042d              eu-ES
+        0x042e              hsb-DE
+        0x042f              mk-MK
+        0x0430              st-ZA
+        0x0431              ts-ZA
+        0x0432              tn-ZA
+        0x0433              ve-ZA, reserved
+        0x0434              xh-ZA
+        0x0435              zu-ZA
+        0x0436              af-ZA
+        0x0437              ka-GE
+        0x0438              fo-FO
+        0x0439              hi-IN
+        0x043a              mt-MT
+        0x043b              se-NO
+        0x043d              yi-Hebr, reserved
+        0x043e              ms-MY
+        0x043f              kk-KZ
+        0x0440              ky-KG
+        0x0441              sw-KE
+        0x0442              tk-TM
+        0x0443              uz-Latn-UZ
+        0x0444              tt-RU
+        0x0445              bn-IN
+        0x0446              pa-IN
+        0x0447              gu-IN
+        0x0448              or-IN
+        0x0449              ta-IN
+        0x044a              te-IN
+        0x044b              kn-IN
+        0x044c              ml-IN
+        0x044d              as-IN
+        0x044e              mr-IN
+        0x044f              sa-IN
+        0x0450              mn-MN
+        0x0451              bo-CN
+        0x0452              cy-GB
+        0x0453              km-KH
+        0x0454              lo-LA
+        0x0455              my-MM
+        0x0456              gl-ES
+        0x0457              kok-IN
+        0x0458              mni-IN, reserved
+        0x0459              sd-Deva-IN, reserved
+        0x045a              syr-SY
+        0x045b              si-LK
+        0x045c              chr-Cher-US
+        0x045d              iu-Cans-CA
+        0x045e              am-ET
+        0x045f              tzm-Arab-MA, reserved
+        0x0460              ks-Arab, reserved
+        0x0461              ne-NP
+        0x0462              fy-NL
+        0x0463              ps-AF
+        0x0464              fil-PH
+        0x0465              dv-MV
+        0x0466              bin-NG, reserved
+        0x0467              fuv-NG, reserved
+        0x0468              ha-Latn-NG
+        0x0469              ibb-NG, reserved
+        0x046a              yo-NG
+        0x046b              quz-BO
+        0x046c              nso-ZA
+        0x046d              ba-RU
+        0x046e              lb-LU
+        0x046f              kl-GL
+        0x0470              ig-NG
+        0x0471              kr-NG, reserved
+        0x0472              om-ET
+        0x0473              ti-ET
+        0x0474              gn-PY
+        0x0475              haw-US
+        0x0476              la-Latn, reserved
+        0x0477              so-SO
+        0x0478              ii-CN
+        0x0479              pap-029, reserved
+        0x047a              arn-CL
+        0x047c              moh-CA
+        0x047e              br-FR
+        0x0480              ug-CN
+        0x0481              mi-NZ
+        0x0482              oc-FR
+        0x0483              co-FR
+        0x0484              gsw-FR
+        0x0485              sah-RU
+        0x0486              qut-GT
+        0x0487              rw-RW
+        0x0488              wo-SN
+        0x048c              prs-AF
+        0x048d              plt-MG, reserved
+        0x048e              zh-yue-HK, reserved
+        0x048f              tdd-Tale-CN, reserved
+        0x0490              khb-Talu-CN, reserved
+        0x0491              gd-GB
+        0x0492              ku-Arab-IQ
+        0x0493              quc-CO, reserved
+        0x0501              qps-ploc
+        0x05fe              qps-ploca
+        0x0801              ar-IQ
+        0x0803              ca-ES-valencia
+        0x0804              zh-CN
+        0x0807              de-CH
+        0x0809              en-GB
+        0x080a              es-MX
+        0x080c              fr-BE
+        0x0810              it-CH
+        0x0811              ja-Ploc-JP, reserved
+        0x0813              nl-BE
+        0x0814              nn-NO
+        0x0816              pt-PT
+        0x0818              ro-MD
+        0x0819              ru-MD, reserved
+        0x081a              sr-Latn-CS
+        0x081d              sv-FI
+        0x0820              ur-IN
+        0x0827              Neither defined nor reserved
+        0x082c              az-Cyrl-AZ
+        0x082e              dsb-DE
+        0x0832              tn-BW
+        0x083b              se-SE
+        0x083c              ga-IE
+        0x083e              ms-BN
+        0x0843              uz-Cyrl-UZ
+        0x0845              bn-BD
+        0x0846              pa-Arab-PK
+        0x0849              ta-LK
+        0x0850              mn-Mong-CN
+        0x0851              bo-BT, reserved
+        0x0859              sd-Arab-PK
+        0x085d              iu-Latn-CA
+        0x085f              tzm-Latn-DZ
+        0x0860              ks-Deva, reserved
+        0x0861              ne-IN
+        0x0867              ff-Latn-SN
+        0x086b              quz-EC
+        0x0873              ti-ER
+        0x09ff              qps-plocm
+        0x0c01              ar-EG
+        0x0c04              zh-HK
+        0x0c07              de-AT
+        0x0c09              en-AU
+        0x0c0a              es-ES
+        0x0c0c              fr-CA
+        0x0c1a              sr-Cyrl-CS
+        0x0c3b              se-FI
+        0x0c50              mn-Mong-MN
+        0x0c5f              tmz-MA, reserved
+        0x0c6b              quz-PE
+        0x1001              ar-LY
+        0x1004              zh-SG
+        0x1007              de-LU
+        0x1009              en-CA
+        0x100a              es-GT
+        0x100c              fr-CH
+        0x101a              hr-BA
+        0x103b              smj-NO
+        0x105f              tzm-Tfng-MA
+        0x1401              ar-DZ
+        0x1404              zh-MO
+        0x1407              de-LI
+        0x1409              en-NZ
+        0x140a              es-CR
+        0x140c              fr-LU
+        0x141a              bs-Latn-BA
+        0x143b              smj-SE
+        0x1801              ar-MA
+        0x1809              en-IE
+        0x180a              es-PA
+        0x180c              fr-MC
+        0x181a              sr-Latn-BA
+        0x183b              sma-NO
+        0x1c01              ar-TN
+        0x1c09              en-ZA
+        0x1c0a              es-DO
+        0x1c0c              Neither defined nor reserved
+        0x1c1a              sr-Cyrl-BA
+        0x1c3b              sma-SE
+        0x2001              ar-OM
+        0x2008              Neither defined nor reserved
+        0x2009              en-JM
+        0x200a              es-VE
+        0x200c              fr-RE
+        0x201a              bs-Cyrl-BA
+        0x203b              sms-FI
+        0x2401              ar-YE
+        0x2409              en-029
+        0x240a              es-CO
+        0x240c              fr-CD
+        0x241a              sr-Latn-RS
+        0x243b              smn-FI
+        0x2801              ar-SY
+        0x2809              en-BZ
+        0x280a              es-PE
+        0x280c              fr-SN
+        0x281a              sr-Cyrl-RS
+        0x2c01              ar-JO
+        0x2c09              en-TT
+        0x2c0a              es-AR
+        0x2c0c              fr-CM
+        0x2c1a              sr-Latn-ME
+        0x3001              ar-LB
+        0x3009              en-ZW
+        0x300a              es-EC
+        0x300c              fr-CI
+        0x301a              sr-Cyrl-ME
+        0x3401              ar-KW
+        0x3409              en-PH
+        0x340a              es-CL
+        0x340c              fr-ML
+        0x3801              ar-AE
+        0x3809              en-ID, reserved
+        0x380a              es-UY
+        0x380c              fr-MA
+        0x3c01              ar-BH
+        0x3c09              en-HK
+        0x3c0a              es-PY
+        0x3c0c              fr-HT
+        0x4001              ar-QA
+        0x4009              en-IN
+        0x400a              es-BO
+        0x4401              ar-Ploc-SA, reserved
+        0x4409              en-MY
+        0x440a              es-SV
+        0x4801              ar-145, reserved
+        0x4809              en-SG
+        0x480a              es-HN
+        0x4c09              en-AE, reserved
+        0x4c0a              es-NI
+        0x5009              en-BH, reserved
+        0x500a              es-PR
+        0x5409              en-EG, reserved
+        0x540a              es-US
+        0x5809              en-JO, reserved
+        0x580a              es-419
+        0x5c09              en-KW, reserved
+        0x6009              en-TR, reserved
+        0x6409              en-YE, reserved
+        0x641a              bs-Cyrl
+        0x681a              bs-Latn
+        0x6c1a              sr-Cyrl
+        0x701a              sr-Latn
+        0x703b              smn
+        0x742c              az-Cyrl
+        0x743b              sms
+        0x7804              zh
+        0x7814              nn
+        0x781a              bs
+        0x782c              az-Latn
+        0x783b              sma
+        0x7843              uz-Cyrl
+        0x7850              mn-Cyrl
+        0x785d              iu-Cans
+        0x785f              tzm-Tfng
+        0x7c04              zh-Hant
+         0x7c14             nb
+         0x7c1a             sr
+         0x7c28             tg-Cyrl
+         0x7c2e             dsb
+         0x7c3b             smj
+         0x7c43             uz-Latn
+         0x7c46             pa-Arab
+         0x7c50             mn-Mong
+         0x7c59             sd-Arab
+         0x7c5c             chr-Cher
+         0x7c5d             iu-Latn
+         0x7c5f             tzm-Latn
+         0x7c67             ff-Latn
+         0x7c68             ha-Latn
+         0x7c92             ku-Arab
commit d4c86d0d1d36c0e972c5772bfa62932651d75363
Author: Eike Rathke <erack at redhat.com>
Date:   Thu Oct 24 11:40:08 2013 +0200

    shell script to extract LangIDs and mappings from MS-LCID.pdf
    
    ... as downloaded from http://msdn.microsoft.com/library/cc233965.aspx
    
    Change-Id: I07f81ca0d6230c38f1f80f93f262debdf939ca87

diff --git a/i18nlangtag/source/isolang/MS-LCID-to-list.sh b/i18nlangtag/source/isolang/MS-LCID-to-list.sh
new file mode 100755
index 0000000..34fd709
--- /dev/null
+++ b/i18nlangtag/source/isolang/MS-LCID-to-list.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Generates language ID table and defines and mappings of
+# http://download.microsoft.com/download/9/5/E/95EF66AF-9026-4BB0-A41D-A4F81802D92C/%5BMS-LCID%5D.pdf
+# downloaded from http://msdn.microsoft.com/library/cc233965.aspx
+# At least this worked for Release: Monday, July 22, 2013; 08/08/2013 Revision 6.0
+# downloaded on 2013-10-17
+#
+# Uses pdftotext (from poppler-utils), grep and gawk.
+# Files created/overwritten: MS-LCID.txt, MS-LCID.lst, MS-LCID.lst.h
+
+pdftotext -layout MS-LCID.pdf
+grep '^ *0x[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] ' MS-LCID.txt > MS-LCID.lst
+gawk -e '
+{
+    val = "0x" toupper( substr( $1, 3));
+    tag = $2;
+    tag = gensub( /,.*/, "", 1, tag);
+    def = $2;
+    for (i=3; i<=NF; ++i)
+    {
+        def = def "_" $i;
+    }
+    def = gensub( /[^a-zA-Z0-9_]/, "_", "g", def);
+    def = "LANGUAGE_" def
+    if (def == "LANGUAGE_Neither_defined_nor_reserved")
+    {
+        def = def "_" val
+    }
+    usedef = def ","
+    n = split( tag, arr, /-/);
+    switch (n)
+    {
+        case 1:
+            # lll
+            mapping = sprintf( "    { %-36s %5s, \"\"  , false },", usedef, "\"" arr[1] "\"");
+            break;
+        case 2:
+            if (length(arr[2]) == 2)
+            {
+                # lll-CC
+                mapping = sprintf( "    { %-36s %5s, \"%s\", false },", usedef, "\"" arr[1] "\"", arr[2]);
+            }
+            else if (length(arr[2]) == 4)
+            {
+                # lll-Ssss
+                mapping = sprintf( "    { %-44s %10s, \"\"   },", usedef, "\"" tag "\"");
+            }
+            else
+            {
+                # lll-### or lll-vvvvvvvv
+                mapping = sprintf( "    { %-33s %16s,   \"\", \"\" },", usedef, "\"" tag "\"");
+            }
+            break;
+        default:
+            if (length(arr[2]) == 2)
+            {
+                # lll-CC-vvvvvvvv
+                mapping = sprintf( "    { %-33s %16s, \"%s\", \"%s\" },", usedef, "\"" tag "\"", arr[2], arr[1] "-" arr[3]);
+            }
+            else if (length(arr[2]) == 4)
+            {
+                # lll-Ssss-CC
+                mapping = sprintf( "    { %-44s %10s, \"%s\" },", usedef, "\"" arr[1] "-" arr[2] "\"", arr[3]);
+            }
+            else
+            {
+                # grandfathered or stuff
+                if (length(arr[3] == 2))
+                    mapping = sprintf( "    { %-33s %16s, \"%s\", \"\" },", usedef, "\"" tag "\"", arr[3]);
+                else
+                    mapping = sprintf( "    { %-33s %16s, \"\", \"\" },", usedef, "\"" tag "\"");
+            }
+            break;
+    }
+    printf "#define %-35s %s\n", def, val;
+    print mapping;
+    print ""
+}
+' MS-LCID.lst > MS-LCID.lst.h
+
+# vim: set noet sw=4 ts=4:


More information about the Libreoffice-commits mailing list