[Libreoffice-commits] core.git: 2 commits - i18nlangtag/source
Eike Rathke
erack at redhat.com
Thu Oct 24 02:48:26 PDT 2013
i18nlangtag/source/isolang/MS-LCID-to-list.sh | 88 ++++
i18nlangtag/source/isolang/MS-LCID.lst | 460 ++++++++++++++++++++++++++
2 files changed, 548 insertions(+)
New commits:
commit 5dcee8893a41ace34aad77530cb8781ecde8e2cd
Author: Eike Rathke <erack at redhat.com>
Date: Thu Oct 24 11:44:49 2013 +0200
raw LangID list for reference
... to hopefully be able to generate a diff when the next release of
MS-LCID.pdf will be available at
http://msdn.microsoft.com/library/cc233965.aspx
Change-Id: I10877cdc8d4b90c6b971dfa5e05ad796fd2a2d00
diff --git a/i18nlangtag/source/isolang/MS-LCID.lst b/i18nlangtag/source/isolang/MS-LCID.lst
new file mode 100644
index 0000000..219157f
--- /dev/null
+++ b/i18nlangtag/source/isolang/MS-LCID.lst
@@ -0,0 +1,460 @@
+ 0x0001 ar
+ 0x0002 bg
+ 0x0003 ca
+ 0x0004 zh-Hans
+ 0x0005 cs
+ 0x0006 da
+ 0x0007 de
+ 0x0008 el
+ 0x0009 en
+ 0x000a es
+ 0x000b fi
+ 0x000c fr
+ 0x000d he
+ 0x000e hu
+ 0x000f is
+ 0x0010 it
+ 0x0011 ja
+ 0x0012 ko
+ 0x0013 nl
+ 0x0014 no
+ 0x0015 pl
+ 0x0016 pt
+ 0x0017 rm
+ 0x0018 ro
+ 0x0019 ru
+ 0x001a bs, hr, or sr
+ 0x001b sk
+ 0x001c sq
+ 0x001d sv
+ 0x001e th
+ 0x001f tr
+ 0x0020 ur
+ 0x0021 id
+ 0x0022 uk
+ 0x0023 be
+ 0x0024 sl
+ 0x0025 et
+ 0x0026 lv
+ 0x0027 lt
+ 0x0028 tg
+ 0x0029 fa
+ 0x002a vi
+ 0x002b hy
+ 0x002c az
+ 0x002d eu
+ 0x002e dsb or hsb
+ 0x002f mk
+ 0x0030 st
+ 0x0031 ts
+ 0x0032 tn
+ 0x0033 ve, reserved
+ 0x0034 xh
+ 0x0035 zu
+ 0x0036 af
+ 0x0037 ka
+ 0x0038 fo
+ 0x0039 hi
+ 0x003a mt
+ 0x003b se
+ 0x003c ga
+ 0x003d yi, reserved
+ 0x003e ms
+ 0x003f kk
+ 0x0040 ky
+ 0x0041 sw
+ 0x0042 tk
+ 0x0043 uz
+ 0x0044 tt
+ 0x0045 bn
+ 0x0046 pa
+ 0x0047 gu
+ 0x0048 or
+ 0x0049 ta
+ 0x004a te
+ 0x004b kn
+ 0x004c ml
+ 0x004d as
+ 0x004e mr
+ 0x004f sa
+ 0x0050 mn
+ 0x0051 bo
+ 0x0052 cy
+ 0x0053 km
+ 0x0054 lo
+ 0x0055 my
+ 0x0056 gl
+ 0x0057 kok
+ 0x0058 mni, reserved
+ 0x0059 sd
+ 0x005a syr
+ 0x005b si
+ 0x005c chr
+ 0x005d iu
+ 0x005e am
+ 0x005f tzm
+ 0x0060 ks, reserved
+ 0x0061 ne
+ 0x0062 fy
+ 0x0063 ps
+ 0x0064 fil
+ 0x0065 dv
+ 0x0066 bin, reserved
+ 0x0067 ff
+ 0x0068 ha
+ 0x0069 ibb, reserved
+ 0x006a yo
+ 0x006b quz
+ 0x006c nso
+ 0x006d ba
+ 0x006e lb
+ 0x006f kl
+ 0x0070 ig
+ 0x0071 kr, reserved
+ 0x0072 om
+ 0x0073 ti
+ 0x0074 gn
+ 0x0075 haw
+ 0x0076 la, reserved
+ 0x0077 so, reserved
+ 0x0078 ii
+ 0x0079 pap, reserved
+ 0x007a arn
+ 0x007b Neither defined nor reserved
+ 0x007c moh
+ 0x007d Neither defined nor reserved
+ 0x007e br
+ 0x007f Reserved for invariant locale behavior
+ 0x0080 ug
+ 0x0081 mi
+ 0x0082 oc
+ 0x0083 co
+ 0x0084 gsw
+ 0x0085 sah
+ 0x0086 qut
+ 0x0087 rw
+ 0x0088 wo
+ 0x0089 Neither defined nor reserved
+ 0x008a Neither defined nor reserved
+ 0x008b Neither defined nor reserved
+ 0x008c prs
+ 0x008d Neither defined nor reserved
+ 0x008e Neither defined nor reserved
+ 0x008f Neither defined nor reserved
+ 0x0090 Neither defined nor reserved
+ 0x0091 gd
+ 0x0092 ku
+ 0x0093 quc, reserved
+ 0x0401 ar-SA
+ 0x0402 bg-BG
+ 0x0403 ca-ES
+ 0x0404 zh-TW
+ 0x0405 cs-CZ
+ 0x0406 da-DK
+ 0x0407 de-DE
+ 0x0408 el-GR
+ 0x0409 en-US
+ 0x040a es-ES_tradnl
+ 0x040b fi-FI
+ 0x040c fr-FR
+ 0x040d he-IL
+ 0x040e hu-HU
+ 0x040f is-IS
+ 0x0410 it-IT
+ 0x0411 ja-JP
+ 0x0412 ko-KR
+ 0x0413 nl-NL
+ 0x0414 nb-NO
+ 0x0415 pl-PL
+ 0x0416 pt-BR
+ 0x0417 rm-CH
+ 0x0418 ro-RO
+ 0x0419 ru-RU
+ 0x041a hr-HR
+ 0x041b sk-SK
+ 0x041c sq-AL
+ 0x041d sv-SE
+ 0x041e th-TH
+ 0x041f tr-TR
+ 0x0420 ur-PK
+ 0x0421 id-ID
+ 0x0422 uk-UA
+ 0x0423 be-BY
+ 0x0424 sl-SI
+ 0x0425 et-EE
+ 0x0426 lv-LV
+ 0x0427 lt-LT
+ 0x0428 tg-Cyrl-TJ
+ 0x0429 fa-IR
+ 0x042a vi-VN
+ 0x042b hy-AM
+ 0x042c az-Latn-AZ
+ 0x042d eu-ES
+ 0x042e hsb-DE
+ 0x042f mk-MK
+ 0x0430 st-ZA
+ 0x0431 ts-ZA
+ 0x0432 tn-ZA
+ 0x0433 ve-ZA, reserved
+ 0x0434 xh-ZA
+ 0x0435 zu-ZA
+ 0x0436 af-ZA
+ 0x0437 ka-GE
+ 0x0438 fo-FO
+ 0x0439 hi-IN
+ 0x043a mt-MT
+ 0x043b se-NO
+ 0x043d yi-Hebr, reserved
+ 0x043e ms-MY
+ 0x043f kk-KZ
+ 0x0440 ky-KG
+ 0x0441 sw-KE
+ 0x0442 tk-TM
+ 0x0443 uz-Latn-UZ
+ 0x0444 tt-RU
+ 0x0445 bn-IN
+ 0x0446 pa-IN
+ 0x0447 gu-IN
+ 0x0448 or-IN
+ 0x0449 ta-IN
+ 0x044a te-IN
+ 0x044b kn-IN
+ 0x044c ml-IN
+ 0x044d as-IN
+ 0x044e mr-IN
+ 0x044f sa-IN
+ 0x0450 mn-MN
+ 0x0451 bo-CN
+ 0x0452 cy-GB
+ 0x0453 km-KH
+ 0x0454 lo-LA
+ 0x0455 my-MM
+ 0x0456 gl-ES
+ 0x0457 kok-IN
+ 0x0458 mni-IN, reserved
+ 0x0459 sd-Deva-IN, reserved
+ 0x045a syr-SY
+ 0x045b si-LK
+ 0x045c chr-Cher-US
+ 0x045d iu-Cans-CA
+ 0x045e am-ET
+ 0x045f tzm-Arab-MA, reserved
+ 0x0460 ks-Arab, reserved
+ 0x0461 ne-NP
+ 0x0462 fy-NL
+ 0x0463 ps-AF
+ 0x0464 fil-PH
+ 0x0465 dv-MV
+ 0x0466 bin-NG, reserved
+ 0x0467 fuv-NG, reserved
+ 0x0468 ha-Latn-NG
+ 0x0469 ibb-NG, reserved
+ 0x046a yo-NG
+ 0x046b quz-BO
+ 0x046c nso-ZA
+ 0x046d ba-RU
+ 0x046e lb-LU
+ 0x046f kl-GL
+ 0x0470 ig-NG
+ 0x0471 kr-NG, reserved
+ 0x0472 om-ET
+ 0x0473 ti-ET
+ 0x0474 gn-PY
+ 0x0475 haw-US
+ 0x0476 la-Latn, reserved
+ 0x0477 so-SO
+ 0x0478 ii-CN
+ 0x0479 pap-029, reserved
+ 0x047a arn-CL
+ 0x047c moh-CA
+ 0x047e br-FR
+ 0x0480 ug-CN
+ 0x0481 mi-NZ
+ 0x0482 oc-FR
+ 0x0483 co-FR
+ 0x0484 gsw-FR
+ 0x0485 sah-RU
+ 0x0486 qut-GT
+ 0x0487 rw-RW
+ 0x0488 wo-SN
+ 0x048c prs-AF
+ 0x048d plt-MG, reserved
+ 0x048e zh-yue-HK, reserved
+ 0x048f tdd-Tale-CN, reserved
+ 0x0490 khb-Talu-CN, reserved
+ 0x0491 gd-GB
+ 0x0492 ku-Arab-IQ
+ 0x0493 quc-CO, reserved
+ 0x0501 qps-ploc
+ 0x05fe qps-ploca
+ 0x0801 ar-IQ
+ 0x0803 ca-ES-valencia
+ 0x0804 zh-CN
+ 0x0807 de-CH
+ 0x0809 en-GB
+ 0x080a es-MX
+ 0x080c fr-BE
+ 0x0810 it-CH
+ 0x0811 ja-Ploc-JP, reserved
+ 0x0813 nl-BE
+ 0x0814 nn-NO
+ 0x0816 pt-PT
+ 0x0818 ro-MD
+ 0x0819 ru-MD, reserved
+ 0x081a sr-Latn-CS
+ 0x081d sv-FI
+ 0x0820 ur-IN
+ 0x0827 Neither defined nor reserved
+ 0x082c az-Cyrl-AZ
+ 0x082e dsb-DE
+ 0x0832 tn-BW
+ 0x083b se-SE
+ 0x083c ga-IE
+ 0x083e ms-BN
+ 0x0843 uz-Cyrl-UZ
+ 0x0845 bn-BD
+ 0x0846 pa-Arab-PK
+ 0x0849 ta-LK
+ 0x0850 mn-Mong-CN
+ 0x0851 bo-BT, reserved
+ 0x0859 sd-Arab-PK
+ 0x085d iu-Latn-CA
+ 0x085f tzm-Latn-DZ
+ 0x0860 ks-Deva, reserved
+ 0x0861 ne-IN
+ 0x0867 ff-Latn-SN
+ 0x086b quz-EC
+ 0x0873 ti-ER
+ 0x09ff qps-plocm
+ 0x0c01 ar-EG
+ 0x0c04 zh-HK
+ 0x0c07 de-AT
+ 0x0c09 en-AU
+ 0x0c0a es-ES
+ 0x0c0c fr-CA
+ 0x0c1a sr-Cyrl-CS
+ 0x0c3b se-FI
+ 0x0c50 mn-Mong-MN
+ 0x0c5f tmz-MA, reserved
+ 0x0c6b quz-PE
+ 0x1001 ar-LY
+ 0x1004 zh-SG
+ 0x1007 de-LU
+ 0x1009 en-CA
+ 0x100a es-GT
+ 0x100c fr-CH
+ 0x101a hr-BA
+ 0x103b smj-NO
+ 0x105f tzm-Tfng-MA
+ 0x1401 ar-DZ
+ 0x1404 zh-MO
+ 0x1407 de-LI
+ 0x1409 en-NZ
+ 0x140a es-CR
+ 0x140c fr-LU
+ 0x141a bs-Latn-BA
+ 0x143b smj-SE
+ 0x1801 ar-MA
+ 0x1809 en-IE
+ 0x180a es-PA
+ 0x180c fr-MC
+ 0x181a sr-Latn-BA
+ 0x183b sma-NO
+ 0x1c01 ar-TN
+ 0x1c09 en-ZA
+ 0x1c0a es-DO
+ 0x1c0c Neither defined nor reserved
+ 0x1c1a sr-Cyrl-BA
+ 0x1c3b sma-SE
+ 0x2001 ar-OM
+ 0x2008 Neither defined nor reserved
+ 0x2009 en-JM
+ 0x200a es-VE
+ 0x200c fr-RE
+ 0x201a bs-Cyrl-BA
+ 0x203b sms-FI
+ 0x2401 ar-YE
+ 0x2409 en-029
+ 0x240a es-CO
+ 0x240c fr-CD
+ 0x241a sr-Latn-RS
+ 0x243b smn-FI
+ 0x2801 ar-SY
+ 0x2809 en-BZ
+ 0x280a es-PE
+ 0x280c fr-SN
+ 0x281a sr-Cyrl-RS
+ 0x2c01 ar-JO
+ 0x2c09 en-TT
+ 0x2c0a es-AR
+ 0x2c0c fr-CM
+ 0x2c1a sr-Latn-ME
+ 0x3001 ar-LB
+ 0x3009 en-ZW
+ 0x300a es-EC
+ 0x300c fr-CI
+ 0x301a sr-Cyrl-ME
+ 0x3401 ar-KW
+ 0x3409 en-PH
+ 0x340a es-CL
+ 0x340c fr-ML
+ 0x3801 ar-AE
+ 0x3809 en-ID, reserved
+ 0x380a es-UY
+ 0x380c fr-MA
+ 0x3c01 ar-BH
+ 0x3c09 en-HK
+ 0x3c0a es-PY
+ 0x3c0c fr-HT
+ 0x4001 ar-QA
+ 0x4009 en-IN
+ 0x400a es-BO
+ 0x4401 ar-Ploc-SA, reserved
+ 0x4409 en-MY
+ 0x440a es-SV
+ 0x4801 ar-145, reserved
+ 0x4809 en-SG
+ 0x480a es-HN
+ 0x4c09 en-AE, reserved
+ 0x4c0a es-NI
+ 0x5009 en-BH, reserved
+ 0x500a es-PR
+ 0x5409 en-EG, reserved
+ 0x540a es-US
+ 0x5809 en-JO, reserved
+ 0x580a es-419
+ 0x5c09 en-KW, reserved
+ 0x6009 en-TR, reserved
+ 0x6409 en-YE, reserved
+ 0x641a bs-Cyrl
+ 0x681a bs-Latn
+ 0x6c1a sr-Cyrl
+ 0x701a sr-Latn
+ 0x703b smn
+ 0x742c az-Cyrl
+ 0x743b sms
+ 0x7804 zh
+ 0x7814 nn
+ 0x781a bs
+ 0x782c az-Latn
+ 0x783b sma
+ 0x7843 uz-Cyrl
+ 0x7850 mn-Cyrl
+ 0x785d iu-Cans
+ 0x785f tzm-Tfng
+ 0x7c04 zh-Hant
+ 0x7c14 nb
+ 0x7c1a sr
+ 0x7c28 tg-Cyrl
+ 0x7c2e dsb
+ 0x7c3b smj
+ 0x7c43 uz-Latn
+ 0x7c46 pa-Arab
+ 0x7c50 mn-Mong
+ 0x7c59 sd-Arab
+ 0x7c5c chr-Cher
+ 0x7c5d iu-Latn
+ 0x7c5f tzm-Latn
+ 0x7c67 ff-Latn
+ 0x7c68 ha-Latn
+ 0x7c92 ku-Arab
commit d4c86d0d1d36c0e972c5772bfa62932651d75363
Author: Eike Rathke <erack at redhat.com>
Date: Thu Oct 24 11:40:08 2013 +0200
shell script to extract LangIDs and mappings from MS-LCID.pdf
... as downloaded from http://msdn.microsoft.com/library/cc233965.aspx
Change-Id: I07f81ca0d6230c38f1f80f93f262debdf939ca87
diff --git a/i18nlangtag/source/isolang/MS-LCID-to-list.sh b/i18nlangtag/source/isolang/MS-LCID-to-list.sh
new file mode 100755
index 0000000..34fd709
--- /dev/null
+++ b/i18nlangtag/source/isolang/MS-LCID-to-list.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Generates language ID table and defines and mappings of
+# http://download.microsoft.com/download/9/5/E/95EF66AF-9026-4BB0-A41D-A4F81802D92C/%5BMS-LCID%5D.pdf
+# downloaded from http://msdn.microsoft.com/library/cc233965.aspx
+# At least this worked for Release: Monday, July 22, 2013; 08/08/2013 Revision 6.0
+# downloaded on 2013-10-17
+#
+# Uses pdftotext (from poppler-utils), grep and gawk.
+# Files created/overwritten: MS-LCID.txt, MS-LCID.lst, MS-LCID.lst.h
+
+pdftotext -layout MS-LCID.pdf
+grep '^ *0x[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] ' MS-LCID.txt > MS-LCID.lst
+gawk -e '
+{
+ val = "0x" toupper( substr( $1, 3));
+ tag = $2;
+ tag = gensub( /,.*/, "", 1, tag);
+ def = $2;
+ for (i=3; i<=NF; ++i)
+ {
+ def = def "_" $i;
+ }
+ def = gensub( /[^a-zA-Z0-9_]/, "_", "g", def);
+ def = "LANGUAGE_" def
+ if (def == "LANGUAGE_Neither_defined_nor_reserved")
+ {
+ def = def "_" val
+ }
+ usedef = def ","
+ n = split( tag, arr, /-/);
+ switch (n)
+ {
+ case 1:
+ # lll
+ mapping = sprintf( " { %-36s %5s, \"\" , false },", usedef, "\"" arr[1] "\"");
+ break;
+ case 2:
+ if (length(arr[2]) == 2)
+ {
+ # lll-CC
+ mapping = sprintf( " { %-36s %5s, \"%s\", false },", usedef, "\"" arr[1] "\"", arr[2]);
+ }
+ else if (length(arr[2]) == 4)
+ {
+ # lll-Ssss
+ mapping = sprintf( " { %-44s %10s, \"\" },", usedef, "\"" tag "\"");
+ }
+ else
+ {
+ # lll-### or lll-vvvvvvvv
+ mapping = sprintf( " { %-33s %16s, \"\", \"\" },", usedef, "\"" tag "\"");
+ }
+ break;
+ default:
+ if (length(arr[2]) == 2)
+ {
+ # lll-CC-vvvvvvvv
+ mapping = sprintf( " { %-33s %16s, \"%s\", \"%s\" },", usedef, "\"" tag "\"", arr[2], arr[1] "-" arr[3]);
+ }
+ else if (length(arr[2]) == 4)
+ {
+ # lll-Ssss-CC
+ mapping = sprintf( " { %-44s %10s, \"%s\" },", usedef, "\"" arr[1] "-" arr[2] "\"", arr[3]);
+ }
+ else
+ {
+ # grandfathered or stuff
+ if (length(arr[3] == 2))
+ mapping = sprintf( " { %-33s %16s, \"%s\", \"\" },", usedef, "\"" tag "\"", arr[3]);
+ else
+ mapping = sprintf( " { %-33s %16s, \"\", \"\" },", usedef, "\"" tag "\"");
+ }
+ break;
+ }
+ printf "#define %-35s %s\n", def, val;
+ print mapping;
+ print ""
+}
+' MS-LCID.lst > MS-LCID.lst.h
+
+# vim: set noet sw=4 ts=4:
More information about the Libreoffice-commits
mailing list