[HarfBuzz] harfbuzz: Branch 'master' - 8 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Thu May 22 16:42:09 PDT 2014


 src/hb-ot-shape-complex-indic-machine.rl |    2 +-
 src/hb-ot-shape-complex-indic.cc         |   31 ++++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 6 deletions(-)

New commits:
commit a498565cedf0441ae723c5e5969f637d792a15e7
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 19:39:56 2014 -0400

    [indic] Support U+1CF2,U+1CF3

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 60e4583..c20ffd0 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -215,6 +215,12 @@ set_indic_properties (hb_glyph_info_t &info)
    * For now, just treat them like regular tone marks. */
   else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2, 0x1CE8)))
     cat = OT_A;
+  /* The following are Visarga variants. */
+  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CF2, 0x1CF3)))
+  {
+    cat = OT_SM;
+    ASSERT_STATIC ((int) INDIC_SYLLABIC_CATEGORY_VISARGA == OT_SM);
+  }
 
   if (unlikely (u == 0x17D1))
     cat = OT_X;
commit ecb98babbaa065940b40ca8954a454f0e2cdcff0
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 19:36:21 2014 -0400

    [indic] Support U+1CE2..U+1CE8

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 023d089..60e4583 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -211,6 +211,10 @@ set_indic_properties (hb_glyph_info_t &info)
   /* The following act like consonants. */
   else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CF5, 0x1CF6)))
     cat = OT_C;
+  /* TODO: The following should only be allowed after a Visarga.
+   * For now, just treat them like regular tone marks. */
+  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2, 0x1CE8)))
+    cat = OT_A;
 
   if (unlikely (u == 0x17D1))
     cat = OT_X;
commit 37bf2c9224e32fdc99c20158c6dc0a4602ec1292
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 19:35:17 2014 -0400

    Minor

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 80ac993..023d089 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -200,7 +200,7 @@ set_indic_properties (hb_glyph_info_t &info)
   if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0951, 0x0952,
 						 0x1CD0, 0x1CD2,
 						 0x1CD4, 0x1CE1) ||
-		u == 0x1CF4))
+					    u == 0x1CF4))
     cat = OT_A;
   /* The following act more like the Bindus. */
   else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953, 0x0954)))
commit 131e17ff9ae792cafa7a500043acb373802ee872
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 19:32:51 2014 -0400

    [indic] Support U+1CF5,1CF6

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 5649c69..80ac993 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -208,6 +208,9 @@ set_indic_properties (hb_glyph_info_t &info)
   /* Cantillation marks. */
   else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xA8E0, 0xA8F1)))
     cat = OT_VD;
+  /* The following act like consonants. */
+  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CF5, 0x1CF6)))
+    cat = OT_C;
 
   if (unlikely (u == 0x17D1))
     cat = OT_X;
commit 72ead0cc72dac4d1c985ead065bb820f93f14a1d
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 19:12:10 2014 -0400

    [indic] Treat U+1CE1 as a tone-mark too
    
    It's spacing, but otherwise the same as the other ones.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index f11e7f3..5649c69 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -199,7 +199,7 @@ set_indic_properties (hb_glyph_info_t &info)
    */
   if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0951, 0x0952,
 						 0x1CD0, 0x1CD2,
-						 0x1CD4, 0x1CE0) ||
+						 0x1CD4, 0x1CE1) ||
 		u == 0x1CF4))
     cat = OT_A;
   /* The following act more like the Bindus. */
commit e848bfae7c975a6fae434daf8e3db4d69914df9f
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 18:50:34 2014 -0400

    [indic] Recategorize U+A8E0..A8F1 as OT_VD
    
    Up to two of them come after all OT_A characters.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 05f62f2..5e28843 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -69,7 +69,7 @@ cn = c.ZWJ?.n?;
 forced_rakar = ZWJ H ZWJ Ra;
 avagraha = Avag.N?;
 matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
-syllable_tail2 = (SM.SM?.ZWNJ?)? A{0,3}? VD?;
+syllable_tail2 = (SM.SM?.ZWNJ?)? A{0,3}? VD{0,2};
 syllable_tail =  (Coeng (cn|V))? avagraha? syllable_tail2;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.(ZWJ.N?)?);
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 01d6d96..f11e7f3 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -198,14 +198,16 @@ set_indic_properties (hb_glyph_info_t &info)
    * U+092E,U+0952,U+0951
    */
   if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0951, 0x0952,
-						 0xA8E0, 0xA8F1) &&
-		hb_in_ranges<hb_codepoint_t> (u, 0x1CD0, 0x1CD2,
-						 0x1CD4, 0x1CE0,
-						 0x1CF4, 0x1CF4)))
+						 0x1CD0, 0x1CD2,
+						 0x1CD4, 0x1CE0) ||
+		u == 0x1CF4))
     cat = OT_A;
   /* The following act more like the Bindus. */
   else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953, 0x0954)))
     cat = OT_SM;
+  /* Cantillation marks. */
+  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xA8E0, 0xA8F1)))
+    cat = OT_VD;
 
   if (unlikely (u == 0x17D1))
     cat = OT_X;
commit c519536c34c842304da558dd4a9e3844fc261b20
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 18:43:14 2014 -0400

    [indic] Allow up to three tone marks
    
    According to Roozbeh, there are valid combinations in Unicode
    proposals for up to three.  Previously we were allowing up to two.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 5c3e734..05f62f2 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -69,7 +69,7 @@ cn = c.ZWJ?.n?;
 forced_rakar = ZWJ H ZWJ Ra;
 avagraha = Avag.N?;
 matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
-syllable_tail2 = (SM.SM?.ZWNJ?)? (A.A?)? VD?;
+syllable_tail2 = (SM.SM?.ZWNJ?)? A{0,3}? VD?;
 syllable_tail =  (Coeng (cn|V))? avagraha? syllable_tail2;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.(ZWJ.N?)?);
commit c11fc6833980fce6d70c5ae0c6623de97a3eb30a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu May 22 18:41:49 2014 -0400

    [indic] Support more extended Devanagari tone marks
    
    Also adjust U+0953,0954 handling.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 6fea164..01d6d96 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -188,18 +188,24 @@ set_indic_properties (hb_glyph_info_t &info)
 
 
   /* The spec says U+0952 is OT_A.  However, testing shows that Uniscribe
-   * treats U+0951..U+0954 all behave similarly.
-   * TESTS:
+   * treats a whole bunch of characters similarly.
+   * TESTS: For example, for U+0951:
    * U+092E,U+0947,U+0952
    * U+092E,U+0952,U+0947
    * U+092E,U+0947,U+0951
    * U+092E,U+0951,U+0947
+   * U+092E,U+0951,U+0952
+   * U+092E,U+0952,U+0951
    */
-  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
-    cat = OT_A;
-  /* Same for the following... */
-  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xA8E0, 0xA8F1)))
+  if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0951, 0x0952,
+						 0xA8E0, 0xA8F1) &&
+		hb_in_ranges<hb_codepoint_t> (u, 0x1CD0, 0x1CD2,
+						 0x1CD4, 0x1CE0,
+						 0x1CF4, 0x1CF4)))
     cat = OT_A;
+  /* The following act more like the Bindus. */
+  else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953, 0x0954)))
+    cat = OT_SM;
 
   if (unlikely (u == 0x17D1))
     cat = OT_X;


More information about the HarfBuzz mailing list