[HarfBuzz] harfbuzz: Branch 'master'

Behdad Esfahbod behdad at kemper.freedesktop.org
Wed Jan 18 19:55:24 UTC 2017


 src/hb-ot-tag.cc       |   77 +++++++++++++++++++++++++++++++++++++++++++------
 test/api/test-ot-tag.c |   58 ++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 9 deletions(-)

New commits:
commit 1337428e4f8a2a4c78312c581bf4e96cd49d783f
Author: Sascha Brawer <sascha at brawer.ch>
Date:   Wed Jan 18 13:51:02 2017 +0100

    Update language tags to OpenType 1.8.1 (#403)
    
    Resolves https://github.com/behdad/harfbuzz/issues/324

diff --git a/src/hb-ot-tag.cc b/src/hb-ot-tag.cc
index 5f21ac0..5c348e8 100644
--- a/src/hb-ot-tag.cc
+++ b/src/hb-ot-tag.cc
@@ -201,6 +201,7 @@ static const LangTag ot_languages[] = {
   {"alt",	HB_TAG('A','L','T',' ')},	/* [Southern] Altai */
   {"am",	HB_TAG('A','M','H',' ')},	/* Amharic */
   {"amf",	HB_TAG('H','B','N',' ')},	/* Hammer-Banna */
+  {"amw",	HB_TAG('S','Y','R',' ')},	/* Western Neo-Aramaic */
   {"an",	HB_TAG('A','R','G',' ')},	/* Aragonese */
   {"ang",	HB_TAG('A','N','G',' ')},	/* Old English (ca. 450-1100) */
   {"ar",	HB_TAG('A','R','A',' ')},	/* Arabic [macrolanguage] */
@@ -239,6 +240,7 @@ static const LangTag ot_languages[] = {
   {"bg",	HB_TAG('B','G','R',' ')},	/* Bulgarian */
   {"bgc",	HB_TAG('B','G','C',' ')},	/* Haryanvi */
   {"bgq",	HB_TAG('B','G','Q',' ')},	/* Bagri */
+  {"bgr",	HB_TAG('Q','I','N',' ')},	/* Bawm Chin */
   {"bhb",	HB_TAG('B','H','I',' ')},	/* Bhili */
   {"bhk",	HB_TAG('B','I','K',' ')},	/* Albay Bicolano (retired code) */
   {"bho",	HB_TAG('B','H','O',' ')},	/* Bhojpuri */
@@ -270,8 +272,10 @@ static const LangTag ot_languages[] = {
   {"ca",	HB_TAG('C','A','T',' ')},	/* Catalan */
   {"cak",	HB_TAG('C','A','K',' ')},	/* Kaqchikel */
   {"cbk",	HB_TAG('C','B','K',' ')},	/* Chavacano */
+  {"cbl",	HB_TAG('Q','I','N',' ')},	/* Bualkhaw Chin */
   {"ce",	HB_TAG('C','H','E',' ')},	/* Chechen */
   {"ceb",	HB_TAG('C','E','B',' ')},	/* Cebuano */
+  {"cfm",	HB_TAG('H','A','L',' ')},	/* Halam/Falam Chin */
   {"cgg",	HB_TAG('C','G','G',' ')},	/* Chiga */
   {"ch",	HB_TAG('C','H','A',' ')},	/* Chamorro */
   {"chk",	HB_TAG('C','H','K','0')},	/* Chuukese */
@@ -279,8 +283,17 @@ static const LangTag ot_languages[] = {
   {"chp",	HB_TAG('C','H','P',' ')},	/* Chipewyan */
   {"chr",	HB_TAG('C','H','R',' ')},	/* Cherokee */
   {"chy",	HB_TAG('C','H','Y',' ')},	/* Cheyenne */
+  {"cja",	HB_TAG('C','J','A',' ')},	/* Western Cham */
+  {"cjm",	HB_TAG('C','J','M',' ')},	/* Eastern Cham */
+  {"cka",	HB_TAG('Q','I','N',' ')},	/* Khumi Awa Chin */
   {"ckb",	HB_TAG('K','U','R',' ')},	/* Central Kurdish (Sorani) */
   {"ckt",	HB_TAG('C','H','K',' ')},	/* Chukchi */
+  {"cld",	HB_TAG('S','Y','R',' ')},	/* Chaldean Neo-Aramaic */
+  {"cmr",	HB_TAG('Q','I','N',' ')},	/* Mro-Khimi Chin */
+  {"cnb",	HB_TAG('Q','I','N',' ')},	/* Chinbon Chin */
+  {"cnh",	HB_TAG('Q','I','N',' ')},	/* Hakha Chin */
+  {"cnk",	HB_TAG('Q','I','N',' ')},	/* Khumi Chin */
+  {"cnw",	HB_TAG('Q','I','N',' ')},	/* Ngawn Chin */
   {"cop",	HB_TAG('C','O','P',' ')},	/* Coptic */
   {"cpp",	HB_TAG('C','P','P',' ')},	/* Creoles */
   {"cr",	HB_TAG('C','R','E',' ')},	/* Cree */
@@ -293,6 +306,9 @@ static const LangTag ot_languages[] = {
   {"crx",	HB_TAG('C','R','R',' ')},	/* Carrier */
   {"cs",	HB_TAG('C','S','Y',' ')},	/* Czech */
   {"csb",	HB_TAG('C','S','B',' ')},	/* Kashubian */
+  {"csh",	HB_TAG('Q','I','N',' ')},	/* Asho Chin */
+  {"csy",	HB_TAG('Q','I','N',' ')},	/* Siyin Chin */
+  {"ctd",	HB_TAG('Q','I','N',' ')},	/* Tedim Chin */
   {"ctg",	HB_TAG('C','T','G',' ')},	/* Chittagonian */
   {"cts",	HB_TAG('B','I','K',' ')},	/* Northern Catanduanes Bikol */
   {"cu",	HB_TAG('C','S','L',' ')},	/* Church Slavic */
@@ -300,7 +316,9 @@ static const LangTag ot_languages[] = {
   {"cv",	HB_TAG('C','H','U',' ')},	/* Chuvash */
   {"cwd",	HB_TAG('D','C','R',' ')},	/* Woods Cree */
   {"cy",	HB_TAG('W','E','L',' ')},	/* Welsh */
+  {"czt",	HB_TAG('Q','I','N',' ')},	/* Zotung Chin */
   {"da",	HB_TAG('D','A','N',' ')},	/* Danish */
+  {"dao",	HB_TAG('Q','I','N',' ')},	/* Daai Chin */
   {"dap",	HB_TAG('N','I','S',' ')},	/* Nisi (India) */
   {"dar",	HB_TAG('D','A','R',' ')},	/* Dargwa */
   {"dax",	HB_TAG('D','A','X',' ')},	/* Dayi */
@@ -343,7 +361,7 @@ static const LangTag ot_languages[] = {
   {"fi",	HB_TAG('F','I','N',' ')},	/* Finnish */
   {"fil",	HB_TAG('P','I','L',' ')},	/* Filipino */
   {"fj",	HB_TAG('F','J','I',' ')},	/* Fijian */
-  {"flm",	HB_TAG('H','A','L',' ')},	/* Halam */
+  {"flm",	HB_TAG('H','A','L',' ')},	/* Halam/Falam Chin [retired ISO639 code] */
   {"fo",	HB_TAG('F','O','S',' ')},	/* Faroese */
   {"fon",	HB_TAG('F','O','N',' ')},	/* Fon */
   {"fr",	HB_TAG('F','R','A',' ')},	/* French */
@@ -390,6 +408,7 @@ static const LangTag ot_languages[] = {
   {"he",	HB_TAG('I','W','R',' ')},	/* Hebrew */
   {"hi",	HB_TAG('H','I','N',' ')},	/* Hindi */
   {"hil",	HB_TAG('H','I','L',' ')},	/* Hiligaynon */
+  {"hlt",	HB_TAG('Q','I','N',' ')},	/* Matu Chin */
   {"hmn",	HB_TAG('H','M','N',' ')},	/* Hmong */
   {"hnd",	HB_TAG('H','N','D',' ')},	/* [Southern] Hindko */
   {"hne",	HB_TAG('C','H','H',' ')},	/* Chattisgarhi */
@@ -553,6 +572,7 @@ static const LangTag ot_languages[] = {
   {"mos",	HB_TAG('M','O','S',' ')},	/* Mossi */
   {"mpe",	HB_TAG('M','A','J',' ')},	/* Majang */
   {"mr",	HB_TAG('M','A','R',' ')},	/* Marathi */
+  {"mrh",	HB_TAG('Q','I','N',' ')},	/* Mara Chin */
   {"mrj",	HB_TAG('H','M','A',' ')},	/* High Mari */
   {"ms",	HB_TAG('M','L','Y',' ')},	/* Malay [macrolanguage] */
   {"msc",	HB_TAG('M','N','K',' ')},	/* Sankaran Maninka */
@@ -617,6 +637,7 @@ static const LangTag ot_languages[] = {
   {"pcc",	HB_TAG('P','C','C',' ')},	/* Bouyei */
   {"pcd",	HB_TAG('P','C','D',' ')},	/* Picard */
   {"pce",	HB_TAG('P','L','G',' ')},	/* [Ruching] Palaung */
+  {"pck",	HB_TAG('Q','I','N',' ')},	/* Paite Chin */
   {"pdc",	HB_TAG('P','D','C',' ')},	/* Pennsylvania German */
   {"pes",	HB_TAG('F','A','R',' ')},	/* Iranian Persian */
   {"phk",	HB_TAG('P','H','K',' ')},	/* Phake */
@@ -674,6 +695,7 @@ static const LangTag ot_languages[] = {
   {"se",	HB_TAG('N','S','M',' ')},	/* Northern Sami */
   {"seh",	HB_TAG('S','N','A',' ')},	/* Sena */
   {"sel",	HB_TAG('S','E','L',' ')},	/* Selkup */
+  {"sez",	HB_TAG('Q','I','N',' ')},	/* Senthang Chin */
   {"sg",	HB_TAG('S','G','O',' ')},	/* Sango */
   {"sga",	HB_TAG('S','G','A',' ')},	/* Old Irish (to 900) */
   {"sgs",	HB_TAG('S','G','S',' ')},	/* Samogitian */
@@ -713,12 +735,15 @@ static const LangTag ot_languages[] = {
   {"swh",	HB_TAG('S','W','K',' ')},	/* Kiswahili/Swahili */
   {"swv",	HB_TAG('M','A','W',' ')},	/* Shekhawati */
   {"sxu",	HB_TAG('S','X','U',' ')},	/* Upper Saxon */
+  {"syc",	HB_TAG('S','Y','R',' ')},	/* Classical Syriac */
   {"syl",	HB_TAG('S','Y','L',' ')},	/* Sylheti */
   {"syr",	HB_TAG('S','Y','R',' ')},	/* Syriac [macrolanguage] */
   {"szl",	HB_TAG('S','Z','L',' ')},	/* Silesian */
   {"ta",	HB_TAG('T','A','M',' ')},	/* Tamil */
   {"tab",	HB_TAG('T','A','B',' ')},	/* Tabasaran */
+  {"tcp",	HB_TAG('Q','I','N',' ')},	/* Tawr Chin */
   {"tcy",	HB_TAG('T','U','L',' ')},	/* Tulu */
+  {"tcz",	HB_TAG('Q','I','N',' ')},	/* Thado Chin */
   {"tdd",	HB_TAG('T','D','D',' ')},	/* Tai Nüa */
   {"te",	HB_TAG('T','E','L',' ')},	/* Telugu */
   {"tem",	HB_TAG('T','M','N',' ')},	/* Temne */
@@ -786,11 +811,13 @@ static const LangTag ot_languages[] = {
   {"yap",	HB_TAG('Y','A','P',' ')},	/* Yapese */
   {"yi",	HB_TAG('J','I','I',' ')},	/* Yiddish [macrolanguage] */
   {"yo",	HB_TAG('Y','B','A',' ')},	/* Yoruba */
+  {"yos",	HB_TAG('Q','I','N',' ')},	/* Yos, deprecated by IANA in favor of Zou [zom] */
   {"yso",	HB_TAG('N','I','S',' ')},	/* Nisi (China) */
   {"za",	HB_TAG('Z','H','A',' ')},	/* Chuang/Zhuang [macrolanguage] */
   {"zea",	HB_TAG('Z','E','A',' ')},	/* Zeeuws */
   {"zgh",	HB_TAG('Z','G','H',' ')},	/* Standard Morrocan Tamazigh */
   {"zne",	HB_TAG('Z','N','D',' ')},	/* Zande */
+  {"zom",	HB_TAG('Q','I','N',' ')},	/* Zou */
   {"zu",	HB_TAG('Z','U','L',' ')}, 	/* Zulu */
   {"zum",	HB_TAG('L','R','C',' ')},	/* Kumzari */
   {"zza",	HB_TAG('Z','Z','A',' ')},	/* Zazaki */
@@ -907,6 +934,30 @@ hb_ot_tag_from_language (hb_language_t language)
     return HB_TAG('A','P','P','H');  /* Phonetic transcription—Americanist conventions */
   }
 
+  /*
+   * "Syre" is a BCP-47 script tag, meaning the Estrangela variant of the Syriac script.
+   * It can be applied to any language.
+   */
+  if (strstr (lang_str, "-syre")) {
+    return HB_TAG('S','Y','R','E');  /* Estrangela Syriac */
+  }
+
+  /*
+   * "Syrj" is a BCP-47 script tag, meaning the Western variant of the Syriac script.
+   * It can be applied to any language.
+   */
+  if (strstr (lang_str, "-syrj")) {
+    return HB_TAG('S','Y','R','J');  /* Western Syriac */
+  }
+
+  /*
+   * "Syrn" is a BCP-47 script tag, meaning the Eastern variant of the Syriac script.
+   * It can be applied to any language.
+   */
+  if (strstr (lang_str, "-syrn")) {
+    return HB_TAG('S','Y','R','N');  /* Eastern Syriac */
+  }
+
   /* Find a language matching in the first component */
   {
     const LangTag *lang_tag;
@@ -962,6 +1013,22 @@ hb_ot_tag_to_language (hb_tag_t tag)
   if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
     return NULL;
 
+  /* struct LangTag has only room for 3-letter language tags. */
+  switch (tag) {
+  case HB_TAG('A','P','P','H'):  /* Phonetic transcription—Americanist conventions */
+    return hb_language_from_string ("und-fonnapa", -1);
+  case HB_TAG('I','P','P','H'):  /* Phonetic transcription—IPA conventions */
+    return hb_language_from_string ("und-fonipa", -1);
+  case HB_TAG('S','Y','R',' '):  /* Syriac [macrolanguage] */
+    return hb_language_from_string ("syr", -1);
+  case HB_TAG('S','Y','R','E'):  /* Estrangela Syriac */
+    return hb_language_from_string ("und-Syre", -1);
+  case HB_TAG('S','Y','R','J'):  /* Western Syriac */
+    return hb_language_from_string ("und-Syrj", -1);
+  case HB_TAG('S','Y','R','N'):  /* Eastern Syriac */
+    return hb_language_from_string ("und-Syrn", -1);
+  }
+
   for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
     if (ot_languages[i].tag == tag)
       return hb_language_from_string (ot_languages[i].language, -1);
@@ -976,14 +1043,6 @@ hb_ot_tag_to_language (hb_tag_t tag)
     }
   }
 
-  /* struct LangTag has only room for 3-letter language tags. */
-  switch (tag) {
-  case HB_TAG('A','P','P','H'):  /* Phonetic transcription—Americanist conventions */
-    return hb_language_from_string ("und-fonnapa", -1);
-  case HB_TAG('I','P','P','H'):  /* Phonetic transcription—IPA conventions */
-    return hb_language_from_string ("und-fonipa", -1);
-  }
-
   /* Else return a custom language in the form of "x-hbotABCD" */
   {
     unsigned char buf[11] = "x-hbot";
diff --git a/test/api/test-ot-tag.c b/test/api/test-ot-tag.c
index e54e552..f5cbd9d 100644
--- a/test/api/test-ot-tag.c
+++ b/test/api/test-ot-tag.c
@@ -188,11 +188,48 @@ test_ot_tag_language (void)
   test_language_two_way ("ENG", "en");
   test_tag_from_language ("ENG", "en_US");
 
+  test_language_two_way ("CJA", "cja"); /* Western Cham */
+  test_language_two_way ("CJM", "cjm"); /* Eastern Cham */
   test_language_two_way ("EVN", "eve");
 
+  test_language_two_way ("HAL", "cfm"); /* BCP47 and current ISO639-3 code for Halam/Falam Chin */
+  test_tag_from_language ("HAL", "flm"); /* Retired ISO639-3 code for Halam/Falam Chin */
+
+  test_tag_from_language ("QIN", "bgr"); /* Bawm Chin */
+  test_tag_from_language ("QIN", "cbl"); /* Bualkhaw Chin */
+  test_tag_from_language ("QIN", "cka"); /* Khumi Awa Chin */
+  test_tag_from_language ("QIN", "cmr"); /* Mro-Khimi Chin */
+  test_tag_from_language ("QIN", "cnb"); /* Chinbon Chin */
+  test_tag_from_language ("QIN", "cnh"); /* Hakha Chin */
+  test_tag_from_language ("QIN", "cnk"); /* Khumi Chin */
+  test_tag_from_language ("QIN", "cnw"); /* Ngawn Chin */
+  test_tag_from_language ("QIN", "csh"); /* Asho Chin */
+  test_tag_from_language ("QIN", "csy"); /* Siyin Chin */
+  test_tag_from_language ("QIN", "ctd"); /* Tedim Chin */
+  test_tag_from_language ("QIN", "czt"); /* Zotung Chin */
+  test_tag_from_language ("QIN", "dao"); /* Daai Chin */
+  test_tag_from_language ("QIN", "hlt"); /* Matu Chin */
+  test_tag_from_language ("QIN", "mrh"); /* Mara Chin */
+  test_tag_from_language ("QIN", "pck"); /* Paite Chin */
+  test_tag_from_language ("QIN", "sez"); /* Senthang Chin */
+  test_tag_from_language ("QIN", "tcp"); /* Tawr Chin */
+  test_tag_from_language ("QIN", "tcz"); /* Thado Chin */
+  test_tag_from_language ("QIN", "yos"); /* Yos, deprecated by IANA in favor of Zou [zom] */
+  test_tag_from_language ("QIN", "zom"); /* Zou */
+  test_tag_to_language ("QIN", "bgr");   /* no single BCP47 tag for Chin; picking Bawm Chin */
+
   test_language_two_way ("FAR", "fa");
   test_tag_from_language ("FAR", "fa_IR");
 
+  test_language_two_way ("SWA", "aii"); /* Swadaya Aramaic */
+
+  test_language_two_way ("SYR", "syr"); /* Syriac [macrolanguage] */
+  test_tag_from_language ("SYR", "amw"); /* Western Neo-Aramaic */
+  test_tag_from_language ("SYR", "cld"); /* Chaldean Neo-Aramaic */
+  test_tag_from_language ("SYR", "syc"); /* Classical Syriac */
+
+  test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */
+
   test_language_two_way ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */
 
   test_tag_from_language ("ZHS", "zh"); /* Chinese */
@@ -238,6 +275,27 @@ test_ot_tag_language (void)
   test_tag_from_language ("APPH", "und-fonnapa");
   test_tag_to_language ("APPH", "und-fonnapa");
 
+  /* Estrangela Syriac */
+  test_tag_from_language ("SYRE", "aii-Syre");
+  test_tag_from_language ("SYRE", "de-Syre");
+  test_tag_from_language ("SYRE", "syr-Syre");
+  test_tag_from_language ("SYRE", "und-Syre");
+  test_tag_to_language ("SYRE", "und-Syre");
+
+  /* Western Syriac */
+  test_tag_from_language ("SYRJ", "aii-Syrj");
+  test_tag_from_language ("SYRJ", "de-Syrj");
+  test_tag_from_language ("SYRJ", "syr-Syrj");
+  test_tag_from_language ("SYRJ", "und-Syrj");
+  test_tag_to_language ("SYRJ", "und-Syrj");
+
+  /* Eastern Syriac */
+  test_tag_from_language ("SYRN", "aii-Syrn");
+  test_tag_from_language ("SYRN", "de-Syrn");
+  test_tag_from_language ("SYRN", "syr-Syrn");
+  test_tag_from_language ("SYRN", "und-Syrn");
+  test_tag_to_language ("SYRN", "und-Syrn");
+
   /* Test that x-hbot overrides the base language */
   test_tag_from_language ("ABC", "fa-x-hbotabc-zxc");
   test_tag_from_language ("ABC", "fa-ir-x-hbotabc-zxc");


More information about the HarfBuzz mailing list