[HarfBuzz] harfbuzz: Branch 'master'
Behdad Esfahbod
behdad at kemper.freedesktop.org
Wed Jan 18 19:55:24 UTC 2017
src/hb-ot-tag.cc | 77 +++++++++++++++++++++++++++++++++++++++++++------
test/api/test-ot-tag.c | 58 ++++++++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+), 9 deletions(-)
New commits:
commit 1337428e4f8a2a4c78312c581bf4e96cd49d783f
Author: Sascha Brawer <sascha at brawer.ch>
Date: Wed Jan 18 13:51:02 2017 +0100
Update language tags to OpenType 1.8.1 (#403)
Resolves https://github.com/behdad/harfbuzz/issues/324
diff --git a/src/hb-ot-tag.cc b/src/hb-ot-tag.cc
index 5f21ac0..5c348e8 100644
--- a/src/hb-ot-tag.cc
+++ b/src/hb-ot-tag.cc
@@ -201,6 +201,7 @@ static const LangTag ot_languages[] = {
{"alt", HB_TAG('A','L','T',' ')}, /* [Southern] Altai */
{"am", HB_TAG('A','M','H',' ')}, /* Amharic */
{"amf", HB_TAG('H','B','N',' ')}, /* Hammer-Banna */
+ {"amw", HB_TAG('S','Y','R',' ')}, /* Western Neo-Aramaic */
{"an", HB_TAG('A','R','G',' ')}, /* Aragonese */
{"ang", HB_TAG('A','N','G',' ')}, /* Old English (ca. 450-1100) */
{"ar", HB_TAG('A','R','A',' ')}, /* Arabic [macrolanguage] */
@@ -239,6 +240,7 @@ static const LangTag ot_languages[] = {
{"bg", HB_TAG('B','G','R',' ')}, /* Bulgarian */
{"bgc", HB_TAG('B','G','C',' ')}, /* Haryanvi */
{"bgq", HB_TAG('B','G','Q',' ')}, /* Bagri */
+ {"bgr", HB_TAG('Q','I','N',' ')}, /* Bawm Chin */
{"bhb", HB_TAG('B','H','I',' ')}, /* Bhili */
{"bhk", HB_TAG('B','I','K',' ')}, /* Albay Bicolano (retired code) */
{"bho", HB_TAG('B','H','O',' ')}, /* Bhojpuri */
@@ -270,8 +272,10 @@ static const LangTag ot_languages[] = {
{"ca", HB_TAG('C','A','T',' ')}, /* Catalan */
{"cak", HB_TAG('C','A','K',' ')}, /* Kaqchikel */
{"cbk", HB_TAG('C','B','K',' ')}, /* Chavacano */
+ {"cbl", HB_TAG('Q','I','N',' ')}, /* Bualkhaw Chin */
{"ce", HB_TAG('C','H','E',' ')}, /* Chechen */
{"ceb", HB_TAG('C','E','B',' ')}, /* Cebuano */
+ {"cfm", HB_TAG('H','A','L',' ')}, /* Halam/Falam Chin */
{"cgg", HB_TAG('C','G','G',' ')}, /* Chiga */
{"ch", HB_TAG('C','H','A',' ')}, /* Chamorro */
{"chk", HB_TAG('C','H','K','0')}, /* Chuukese */
@@ -279,8 +283,17 @@ static const LangTag ot_languages[] = {
{"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */
{"chr", HB_TAG('C','H','R',' ')}, /* Cherokee */
{"chy", HB_TAG('C','H','Y',' ')}, /* Cheyenne */
+ {"cja", HB_TAG('C','J','A',' ')}, /* Western Cham */
+ {"cjm", HB_TAG('C','J','M',' ')}, /* Eastern Cham */
+ {"cka", HB_TAG('Q','I','N',' ')}, /* Khumi Awa Chin */
{"ckb", HB_TAG('K','U','R',' ')}, /* Central Kurdish (Sorani) */
{"ckt", HB_TAG('C','H','K',' ')}, /* Chukchi */
+ {"cld", HB_TAG('S','Y','R',' ')}, /* Chaldean Neo-Aramaic */
+ {"cmr", HB_TAG('Q','I','N',' ')}, /* Mro-Khimi Chin */
+ {"cnb", HB_TAG('Q','I','N',' ')}, /* Chinbon Chin */
+ {"cnh", HB_TAG('Q','I','N',' ')}, /* Hakha Chin */
+ {"cnk", HB_TAG('Q','I','N',' ')}, /* Khumi Chin */
+ {"cnw", HB_TAG('Q','I','N',' ')}, /* Ngawn Chin */
{"cop", HB_TAG('C','O','P',' ')}, /* Coptic */
{"cpp", HB_TAG('C','P','P',' ')}, /* Creoles */
{"cr", HB_TAG('C','R','E',' ')}, /* Cree */
@@ -293,6 +306,9 @@ static const LangTag ot_languages[] = {
{"crx", HB_TAG('C','R','R',' ')}, /* Carrier */
{"cs", HB_TAG('C','S','Y',' ')}, /* Czech */
{"csb", HB_TAG('C','S','B',' ')}, /* Kashubian */
+ {"csh", HB_TAG('Q','I','N',' ')}, /* Asho Chin */
+ {"csy", HB_TAG('Q','I','N',' ')}, /* Siyin Chin */
+ {"ctd", HB_TAG('Q','I','N',' ')}, /* Tedim Chin */
{"ctg", HB_TAG('C','T','G',' ')}, /* Chittagonian */
{"cts", HB_TAG('B','I','K',' ')}, /* Northern Catanduanes Bikol */
{"cu", HB_TAG('C','S','L',' ')}, /* Church Slavic */
@@ -300,7 +316,9 @@ static const LangTag ot_languages[] = {
{"cv", HB_TAG('C','H','U',' ')}, /* Chuvash */
{"cwd", HB_TAG('D','C','R',' ')}, /* Woods Cree */
{"cy", HB_TAG('W','E','L',' ')}, /* Welsh */
+ {"czt", HB_TAG('Q','I','N',' ')}, /* Zotung Chin */
{"da", HB_TAG('D','A','N',' ')}, /* Danish */
+ {"dao", HB_TAG('Q','I','N',' ')}, /* Daai Chin */
{"dap", HB_TAG('N','I','S',' ')}, /* Nisi (India) */
{"dar", HB_TAG('D','A','R',' ')}, /* Dargwa */
{"dax", HB_TAG('D','A','X',' ')}, /* Dayi */
@@ -343,7 +361,7 @@ static const LangTag ot_languages[] = {
{"fi", HB_TAG('F','I','N',' ')}, /* Finnish */
{"fil", HB_TAG('P','I','L',' ')}, /* Filipino */
{"fj", HB_TAG('F','J','I',' ')}, /* Fijian */
- {"flm", HB_TAG('H','A','L',' ')}, /* Halam */
+ {"flm", HB_TAG('H','A','L',' ')}, /* Halam/Falam Chin [retired ISO639 code] */
{"fo", HB_TAG('F','O','S',' ')}, /* Faroese */
{"fon", HB_TAG('F','O','N',' ')}, /* Fon */
{"fr", HB_TAG('F','R','A',' ')}, /* French */
@@ -390,6 +408,7 @@ static const LangTag ot_languages[] = {
{"he", HB_TAG('I','W','R',' ')}, /* Hebrew */
{"hi", HB_TAG('H','I','N',' ')}, /* Hindi */
{"hil", HB_TAG('H','I','L',' ')}, /* Hiligaynon */
+ {"hlt", HB_TAG('Q','I','N',' ')}, /* Matu Chin */
{"hmn", HB_TAG('H','M','N',' ')}, /* Hmong */
{"hnd", HB_TAG('H','N','D',' ')}, /* [Southern] Hindko */
{"hne", HB_TAG('C','H','H',' ')}, /* Chattisgarhi */
@@ -553,6 +572,7 @@ static const LangTag ot_languages[] = {
{"mos", HB_TAG('M','O','S',' ')}, /* Mossi */
{"mpe", HB_TAG('M','A','J',' ')}, /* Majang */
{"mr", HB_TAG('M','A','R',' ')}, /* Marathi */
+ {"mrh", HB_TAG('Q','I','N',' ')}, /* Mara Chin */
{"mrj", HB_TAG('H','M','A',' ')}, /* High Mari */
{"ms", HB_TAG('M','L','Y',' ')}, /* Malay [macrolanguage] */
{"msc", HB_TAG('M','N','K',' ')}, /* Sankaran Maninka */
@@ -617,6 +637,7 @@ static const LangTag ot_languages[] = {
{"pcc", HB_TAG('P','C','C',' ')}, /* Bouyei */
{"pcd", HB_TAG('P','C','D',' ')}, /* Picard */
{"pce", HB_TAG('P','L','G',' ')}, /* [Ruching] Palaung */
+ {"pck", HB_TAG('Q','I','N',' ')}, /* Paite Chin */
{"pdc", HB_TAG('P','D','C',' ')}, /* Pennsylvania German */
{"pes", HB_TAG('F','A','R',' ')}, /* Iranian Persian */
{"phk", HB_TAG('P','H','K',' ')}, /* Phake */
@@ -674,6 +695,7 @@ static const LangTag ot_languages[] = {
{"se", HB_TAG('N','S','M',' ')}, /* Northern Sami */
{"seh", HB_TAG('S','N','A',' ')}, /* Sena */
{"sel", HB_TAG('S','E','L',' ')}, /* Selkup */
+ {"sez", HB_TAG('Q','I','N',' ')}, /* Senthang Chin */
{"sg", HB_TAG('S','G','O',' ')}, /* Sango */
{"sga", HB_TAG('S','G','A',' ')}, /* Old Irish (to 900) */
{"sgs", HB_TAG('S','G','S',' ')}, /* Samogitian */
@@ -713,12 +735,15 @@ static const LangTag ot_languages[] = {
{"swh", HB_TAG('S','W','K',' ')}, /* Kiswahili/Swahili */
{"swv", HB_TAG('M','A','W',' ')}, /* Shekhawati */
{"sxu", HB_TAG('S','X','U',' ')}, /* Upper Saxon */
+ {"syc", HB_TAG('S','Y','R',' ')}, /* Classical Syriac */
{"syl", HB_TAG('S','Y','L',' ')}, /* Sylheti */
{"syr", HB_TAG('S','Y','R',' ')}, /* Syriac [macrolanguage] */
{"szl", HB_TAG('S','Z','L',' ')}, /* Silesian */
{"ta", HB_TAG('T','A','M',' ')}, /* Tamil */
{"tab", HB_TAG('T','A','B',' ')}, /* Tabasaran */
+ {"tcp", HB_TAG('Q','I','N',' ')}, /* Tawr Chin */
{"tcy", HB_TAG('T','U','L',' ')}, /* Tulu */
+ {"tcz", HB_TAG('Q','I','N',' ')}, /* Thado Chin */
{"tdd", HB_TAG('T','D','D',' ')}, /* Tai Nüa */
{"te", HB_TAG('T','E','L',' ')}, /* Telugu */
{"tem", HB_TAG('T','M','N',' ')}, /* Temne */
@@ -786,11 +811,13 @@ static const LangTag ot_languages[] = {
{"yap", HB_TAG('Y','A','P',' ')}, /* Yapese */
{"yi", HB_TAG('J','I','I',' ')}, /* Yiddish [macrolanguage] */
{"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */
+ {"yos", HB_TAG('Q','I','N',' ')}, /* Yos, deprecated by IANA in favor of Zou [zom] */
{"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */
{"za", HB_TAG('Z','H','A',' ')}, /* Chuang/Zhuang [macrolanguage] */
{"zea", HB_TAG('Z','E','A',' ')}, /* Zeeuws */
{"zgh", HB_TAG('Z','G','H',' ')}, /* Standard Morrocan Tamazigh */
{"zne", HB_TAG('Z','N','D',' ')}, /* Zande */
+ {"zom", HB_TAG('Q','I','N',' ')}, /* Zou */
{"zu", HB_TAG('Z','U','L',' ')}, /* Zulu */
{"zum", HB_TAG('L','R','C',' ')}, /* Kumzari */
{"zza", HB_TAG('Z','Z','A',' ')}, /* Zazaki */
@@ -907,6 +934,30 @@ hb_ot_tag_from_language (hb_language_t language)
return HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */
}
+ /*
+ * "Syre" is a BCP-47 script tag, meaning the Estrangela variant of the Syriac script.
+ * It can be applied to any language.
+ */
+ if (strstr (lang_str, "-syre")) {
+ return HB_TAG('S','Y','R','E'); /* Estrangela Syriac */
+ }
+
+ /*
+ * "Syrj" is a BCP-47 script tag, meaning the Western variant of the Syriac script.
+ * It can be applied to any language.
+ */
+ if (strstr (lang_str, "-syrj")) {
+ return HB_TAG('S','Y','R','J'); /* Western Syriac */
+ }
+
+ /*
+ * "Syrn" is a BCP-47 script tag, meaning the Eastern variant of the Syriac script.
+ * It can be applied to any language.
+ */
+ if (strstr (lang_str, "-syrn")) {
+ return HB_TAG('S','Y','R','N'); /* Eastern Syriac */
+ }
+
/* Find a language matching in the first component */
{
const LangTag *lang_tag;
@@ -962,6 +1013,22 @@ hb_ot_tag_to_language (hb_tag_t tag)
if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
return NULL;
+ /* struct LangTag has only room for 3-letter language tags. */
+ switch (tag) {
+ case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */
+ return hb_language_from_string ("und-fonnapa", -1);
+ case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */
+ return hb_language_from_string ("und-fonipa", -1);
+ case HB_TAG('S','Y','R',' '): /* Syriac [macrolanguage] */
+ return hb_language_from_string ("syr", -1);
+ case HB_TAG('S','Y','R','E'): /* Estrangela Syriac */
+ return hb_language_from_string ("und-Syre", -1);
+ case HB_TAG('S','Y','R','J'): /* Western Syriac */
+ return hb_language_from_string ("und-Syrj", -1);
+ case HB_TAG('S','Y','R','N'): /* Eastern Syriac */
+ return hb_language_from_string ("und-Syrn", -1);
+ }
+
for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
if (ot_languages[i].tag == tag)
return hb_language_from_string (ot_languages[i].language, -1);
@@ -976,14 +1043,6 @@ hb_ot_tag_to_language (hb_tag_t tag)
}
}
- /* struct LangTag has only room for 3-letter language tags. */
- switch (tag) {
- case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */
- return hb_language_from_string ("und-fonnapa", -1);
- case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */
- return hb_language_from_string ("und-fonipa", -1);
- }
-
/* Else return a custom language in the form of "x-hbotABCD" */
{
unsigned char buf[11] = "x-hbot";
diff --git a/test/api/test-ot-tag.c b/test/api/test-ot-tag.c
index e54e552..f5cbd9d 100644
--- a/test/api/test-ot-tag.c
+++ b/test/api/test-ot-tag.c
@@ -188,11 +188,48 @@ test_ot_tag_language (void)
test_language_two_way ("ENG", "en");
test_tag_from_language ("ENG", "en_US");
+ test_language_two_way ("CJA", "cja"); /* Western Cham */
+ test_language_two_way ("CJM", "cjm"); /* Eastern Cham */
test_language_two_way ("EVN", "eve");
+ test_language_two_way ("HAL", "cfm"); /* BCP47 and current ISO639-3 code for Halam/Falam Chin */
+ test_tag_from_language ("HAL", "flm"); /* Retired ISO639-3 code for Halam/Falam Chin */
+
+ test_tag_from_language ("QIN", "bgr"); /* Bawm Chin */
+ test_tag_from_language ("QIN", "cbl"); /* Bualkhaw Chin */
+ test_tag_from_language ("QIN", "cka"); /* Khumi Awa Chin */
+ test_tag_from_language ("QIN", "cmr"); /* Mro-Khimi Chin */
+ test_tag_from_language ("QIN", "cnb"); /* Chinbon Chin */
+ test_tag_from_language ("QIN", "cnh"); /* Hakha Chin */
+ test_tag_from_language ("QIN", "cnk"); /* Khumi Chin */
+ test_tag_from_language ("QIN", "cnw"); /* Ngawn Chin */
+ test_tag_from_language ("QIN", "csh"); /* Asho Chin */
+ test_tag_from_language ("QIN", "csy"); /* Siyin Chin */
+ test_tag_from_language ("QIN", "ctd"); /* Tedim Chin */
+ test_tag_from_language ("QIN", "czt"); /* Zotung Chin */
+ test_tag_from_language ("QIN", "dao"); /* Daai Chin */
+ test_tag_from_language ("QIN", "hlt"); /* Matu Chin */
+ test_tag_from_language ("QIN", "mrh"); /* Mara Chin */
+ test_tag_from_language ("QIN", "pck"); /* Paite Chin */
+ test_tag_from_language ("QIN", "sez"); /* Senthang Chin */
+ test_tag_from_language ("QIN", "tcp"); /* Tawr Chin */
+ test_tag_from_language ("QIN", "tcz"); /* Thado Chin */
+ test_tag_from_language ("QIN", "yos"); /* Yos, deprecated by IANA in favor of Zou [zom] */
+ test_tag_from_language ("QIN", "zom"); /* Zou */
+ test_tag_to_language ("QIN", "bgr"); /* no single BCP47 tag for Chin; picking Bawm Chin */
+
test_language_two_way ("FAR", "fa");
test_tag_from_language ("FAR", "fa_IR");
+ test_language_two_way ("SWA", "aii"); /* Swadaya Aramaic */
+
+ test_language_two_way ("SYR", "syr"); /* Syriac [macrolanguage] */
+ test_tag_from_language ("SYR", "amw"); /* Western Neo-Aramaic */
+ test_tag_from_language ("SYR", "cld"); /* Chaldean Neo-Aramaic */
+ test_tag_from_language ("SYR", "syc"); /* Classical Syriac */
+
+ test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */
+
test_language_two_way ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */
test_tag_from_language ("ZHS", "zh"); /* Chinese */
@@ -238,6 +275,27 @@ test_ot_tag_language (void)
test_tag_from_language ("APPH", "und-fonnapa");
test_tag_to_language ("APPH", "und-fonnapa");
+ /* Estrangela Syriac */
+ test_tag_from_language ("SYRE", "aii-Syre");
+ test_tag_from_language ("SYRE", "de-Syre");
+ test_tag_from_language ("SYRE", "syr-Syre");
+ test_tag_from_language ("SYRE", "und-Syre");
+ test_tag_to_language ("SYRE", "und-Syre");
+
+ /* Western Syriac */
+ test_tag_from_language ("SYRJ", "aii-Syrj");
+ test_tag_from_language ("SYRJ", "de-Syrj");
+ test_tag_from_language ("SYRJ", "syr-Syrj");
+ test_tag_from_language ("SYRJ", "und-Syrj");
+ test_tag_to_language ("SYRJ", "und-Syrj");
+
+ /* Eastern Syriac */
+ test_tag_from_language ("SYRN", "aii-Syrn");
+ test_tag_from_language ("SYRN", "de-Syrn");
+ test_tag_from_language ("SYRN", "syr-Syrn");
+ test_tag_from_language ("SYRN", "und-Syrn");
+ test_tag_to_language ("SYRN", "und-Syrn");
+
/* Test that x-hbot overrides the base language */
test_tag_from_language ("ABC", "fa-x-hbotabc-zxc");
test_tag_from_language ("ABC", "fa-ir-x-hbotabc-zxc");
More information about the HarfBuzz
mailing list