[HarfBuzz] harfbuzz: Branch 'master' - 8 commits

Wed Oct 16 11:02:40 PDT 2013

src/hb-ot-layout-private.hh                                                   |   29 +++-
 src/hb-ot-shape-complex-arabic.cc                                             |   18 ++
 src/hb-ot-shape-complex-indic-machine.rl                                      |   12 +
 src/hb-ot-shape-complex-indic-private.hh                                      |    2 
 src/hb-ot-shape-complex-indic.cc                                              |   62 +++++++---
 test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt |    1 
 6 files changed, 96 insertions(+), 28 deletions(-)

New commits:
commit 1a7de1ba9876b0554c758acbc6459366d9d98a5d
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 19:55:06 2013 +0200

    [indic] Improve Avagraha support in machine

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 1c372e1..e964655 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -66,8 +66,10 @@ reph = (Ra H | Repha);		# possible reph
 
 cn = c.ZWJ?.n?;
 forced_rakar = ZWJ H ZWJ Ra;
+avagraha = Avag.N?;
 matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
-syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.SM?.ZWNJ?)? (A.A?)? VD?;
+syllable_tail2 = (SM.SM?.ZWNJ?)? (A.A?)? VD?;
+syllable_tail =  (Coeng (cn|V))? avagraha? syllable_tail2;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.(ZWJ.N?)?);
 final_halant_group = halant_group | h.ZWNJ;
@@ -77,7 +79,7 @@ halant_or_matra_group = (CM.CM* | final_halant_group | (h.ZWJ)? matra_group{0,4}
 consonant_syllable =	Repha? (cn.halant_group){0,4} cn halant_or_matra_group? syllable_tail;
 vowel_syllable =	reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
 standalone_cluster =	reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
-avagraha_cluster = 	Avag.N? (SM.ZWNJ?)? (VD VD?)?;
+avagraha_cluster = 	avagraha syllable_tail2;
 broken_cluster =	reph? n? (halant_group.cn){0,4} halant_or_matra_group syllable_tail;
 other =			any;
 
commit 3756efaf4e14ec3b5b1def700a1b5985f162372b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 19:06:29 2013 +0200

    [indic] Misc harmless fixes!
    
    First, we were abusing OT_VD instead of OT_A.  Fix that
    but moving OT_A in the grammar where it belongs (which
    is different from what the spec says).
    
    Also, only allow medial consonants after all other
    consonants.  This doesn't affect any current character.
    
    Finally, fix Halant attachment in presence of medial
    consonants.  Again, this currently doesn't affect any
    sequence.
    
    I lied.  There's Gurmukhi U+0A75 which is Consonant_Medial.
    Uniscribe allows one of those in each of these positions:
    before matras, after matras and before syllable modifiers,
    and after syllable modifiers!  We currently just allow
    unlimited numbers of it, before matras.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index f0c66c4..1c372e1 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -58,7 +58,7 @@ Ra    = 16;
 CM    = 17;
 Avag  = 18;
 
-c = (C | Ra)CM*;		# is_consonant
+c = (C | Ra);			# is_consonant
 n = ((ZWNJ?.RS)? (N.N?)?);	# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
@@ -67,14 +67,14 @@ reph = (Ra H | Repha);		# possible reph
 cn = c.ZWJ?.n?;
 forced_rakar = ZWJ H ZWJ Ra;
 matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
-syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.SM?.ZWNJ?)? (VD.VD?)?;
+syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.SM?.ZWNJ?)? (A.A?)? VD?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.(ZWJ.N?)?);
 final_halant_group = halant_group | h.ZWNJ;
-halant_or_matra_group = (final_halant_group | (h.ZWJ)? matra_group{0,4});
+halant_or_matra_group = (CM.CM* | final_halant_group | (h.ZWJ)? matra_group{0,4});
 
 
-consonant_syllable =	Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
+consonant_syllable =	Repha? (cn.halant_group){0,4} cn halant_or_matra_group? syllable_tail;
 vowel_syllable =	reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
 standalone_cluster =	reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
 avagraha_cluster = 	Avag.N? (SM.ZWNJ?)? (VD VD?)?;
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 552a121..cee1572 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -102,7 +102,7 @@ enum indic_syllabic_category_t {
   INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER	= OT_C,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL	= OT_CM,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER	= OT_NBSP,
-  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_C,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_CM,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_Repha,
   INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER	= OT_X,
   INDIC_SYLLABIC_CATEGORY_NUKTA			= OT_N,
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index c0ea562..a98c540 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -194,15 +194,15 @@ set_indic_properties (hb_glyph_info_t &info)
 
 
   /* The spec says U+0952 is OT_A.  However, testing shows that Uniscribe
-   * treats U+0951..U+0952 all as OT_VD.
+   * treats U+0951..U+0954 all behave similarly.
    * TESTS:
    * U+092E,U+0947,U+0952
    * U+092E,U+0952,U+0947
    * U+092E,U+0947,U+0951
    * U+092E,U+0951,U+0947
-   * */
+   */
   if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954)))
-    cat = OT_VD;
+    cat = OT_A;
 
   if (unlikely (u == 0x17D1))
     cat = OT_X;
@@ -220,7 +220,7 @@ set_indic_properties (hb_glyph_info_t &info)
   else if (unlikely (u == 0x200C)) cat = OT_ZWNJ;
   else if (unlikely (u == 0x200D)) cat = OT_ZWJ;
   else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE;
-  else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK.  More like consonant medial. like 0A75. */
+  else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK.  Move it to the end. */
 
   if (cat == OT_Repha) {
     /* There are two kinds of characters marked as Repha:
@@ -249,7 +249,7 @@ set_indic_properties (hb_glyph_info_t &info)
   {
     pos = matra_position (u, pos);
   }
-  else if (cat == OT_SM || cat == OT_VD || cat == OT_Avag)
+  else if ((FLAG (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Avag))))
   {
     pos = POS_SMVD;
   }
@@ -933,6 +933,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
 	for (unsigned int j = last_halant; j < i; j++)
 	  if (info[j].indic_position() != POS_SMVD)
 	    info[j].indic_position() = info[i].indic_position();
+	last_halant = end;
       }
   }
 
commit c52ddab72e025d1bee8274c8f3416208b12f68f1
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 13:42:38 2013 +0200

    [arabic] Make ZWJ prevent ligatures instead of facilitating it
    
    Unicode 6.2.0 Section 16.2 / Figure 16.3 says:
    
    "For backward compatibility, between Arabic characters a ZWJ acts just
    like the sequence <ZWJ, ZWNJ, ZWJ>, preventing a ligature from forming
    instead of requesting the use of a ligature that would not normally be
    used. As a result, there is no plain text mechanism for requesting the
    use of a ligature in Arabic text."
    
    As such, we flip internal zwj to zwnj flags for GSUB matching, which
    means it will block ligation in all features, unless the font
    explicitly matches U+200D glyph.  This doesn't affect joining behavior.

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 4e20a8f..3f02c80 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -107,6 +107,12 @@ _hb_glyph_info_is_zwj (const hb_glyph_info_t *info)
   return !!(info->unicode_props0() & MASK0_ZWJ);
 }
 
+inline void
+_hb_glyph_info_flip_joiners (hb_glyph_info_t *info)
+{
+  info->unicode_props0() ^= MASK0_ZWNJ | MASK0_ZWJ;
+}
+
 
 #define hb_ot_layout_from_face(face) ((hb_ot_layout_t *) face->shaper_data.ot)
 
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index a57e81a..d549945 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -157,6 +157,11 @@ static const struct arabic_state_table_entry {
 
 
 static void
+nuke_joiners (const hb_ot_shape_plan_t *plan,
+	      hb_font_t *font,
+	      hb_buffer_t *buffer);
+
+static void
 arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
 		       hb_font_t *font,
 		       hb_buffer_t *buffer);
@@ -176,6 +181,8 @@ collect_features_arabic (hb_ot_shape_planner_t *plan)
    * TODO: Add test cases for these two.
    */
 
+  map->add_gsub_pause (nuke_joiners);
+
   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 
@@ -314,6 +321,17 @@ setup_masks_arabic (const hb_ot_shape_plan_t *plan,
 
 
 static void
+nuke_joiners (const hb_ot_shape_plan_t *plan HB_UNUSED,
+	      hb_font_t *font HB_UNUSED,
+	      hb_buffer_t *buffer)
+{
+  unsigned int count = buffer->len;
+  for (unsigned int i = 0; i < count; i++)
+    if (_hb_glyph_info_is_zwj (&buffer->info[i]))
+      _hb_glyph_info_flip_joiners (&buffer->info[i]);
+}
+
+static void
 arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
 		       hb_font_t *font,
 		       hb_buffer_t *buffer)
commit 1a31f9f820c4538015ddaf4ca2b790649c5997ed
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 13:42:18 2013 +0200

    [otlayout] Minor

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index c5ba8b4..4e20a8f 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -48,26 +48,33 @@
 #define unicode_props1()	var2.u8[1]
 
 
+enum {
+  MASK0_ZWJ  = 0x20,
+  MASK0_ZWNJ = 0x40,
+  MASK0_IGNORABLE = 0x80,
+  MASK0_GEN_CAT = 0x1F
+};
+
 inline void
 _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
 {
   info->unicode_props0() = ((unsigned int) unicode->general_category (info->codepoint)) |
-			   (unicode->is_default_ignorable (info->codepoint) ? 0x80 : 0) |
-			   (info->codepoint == 0x200C ? 0x40 : 0) |
-			   (info->codepoint == 0x200D ? 0x20 : 0);
+			   (unicode->is_default_ignorable (info->codepoint) ? MASK0_IGNORABLE : 0) |
+			   (info->codepoint == 0x200C ? MASK0_ZWNJ : 0) |
+			   (info->codepoint == 0x200D ? MASK0_ZWJ : 0);
   info->unicode_props1() = unicode->modified_combining_class (info->codepoint);
 }
 
 inline void
 _hb_glyph_info_set_general_category (hb_glyph_info_t *info, hb_unicode_general_category_t gen_cat)
 {
-  info->unicode_props0() = (unsigned int) gen_cat | ((info->unicode_props0()) & ~0x1F);
+  info->unicode_props0() = (unsigned int) gen_cat | ((info->unicode_props0()) & ~MASK0_GEN_CAT);
 }
 
 inline hb_unicode_general_category_t
 _hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
 {
-  return (hb_unicode_general_category_t) (info->unicode_props0() & 0x1F);
+  return (hb_unicode_general_category_t) (info->unicode_props0() & MASK0_GEN_CAT);
 }
 
 inline void
@@ -85,19 +92,19 @@ _hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
 inline hb_bool_t
 _hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info)
 {
-  return !!(info->unicode_props0() & 0x80);
+  return !!(info->unicode_props0() & MASK0_IGNORABLE);
 }
 
 inline hb_bool_t
 _hb_glyph_info_is_zwnj (const hb_glyph_info_t *info)
 {
-  return !!(info->unicode_props0() & 0x40);
+  return !!(info->unicode_props0() & MASK0_ZWNJ);
 }
 
 inline hb_bool_t
 _hb_glyph_info_is_zwj (const hb_glyph_info_t *info)
 {
-  return !!(info->unicode_props0() & 0x20);
+  return !!(info->unicode_props0() & MASK0_ZWJ);
 }
 
 
commit 28d5daec948e1a24f13e492ce301aeb9abff37c8
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 12:32:12 2013 +0200

    [indic] More granular post-base cluster merging!

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index f111826..c0ea562 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -936,20 +936,53 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
       }
   }
 
+
   {
-    /* Things are out-of-control for post base positions, they may shuffle
-     * around like crazy, so merge clusters.  For pre-base stuff, we handle
-     * cluster issues in final reordering. */
-    buffer->merge_clusters (base, end);
+    /* Use syllable() for sort accounting temporarily. */
+    unsigned int syllable = info[start].syllable();
+    for (unsigned int i = start; i < end; i++)
+      info[i].syllable() = i - start;
+
     /* Sit tight, rock 'n roll! */
     hb_bubble_sort (info + start, end - start, compare_indic_order);
     /* Find base again */
     base = end;
     for (unsigned int i = start; i < end; i++)
-      if (info[i].indic_position() == POS_BASE_C) {
-        base = i;
+      if (info[i].indic_position() == POS_BASE_C)
+      {
+	base = i;
 	break;
       }
+    /* Things are out-of-control for post base positions, they may shuffle
+     * around like crazy.  In old-spec mode, we move halants around, so in
+     * that case merge all clusters after base.  Otherwise, check the sort
+     * order and merge as needed.
+     * For pre-base stuff, we handle cluster issues in final reordering. */
+    if (indic_plan->is_old_spec || end - base > 127)
+      buffer->merge_clusters (base, end);
+    else
+    {
+      /* Note!  syllable() is a one-byte field. */
+      for (unsigned int i = base; i < end; i++)
+        if (info[i].syllable() != 255)
+	{
+	  unsigned int max = i;
+	  unsigned int j = start + info[i].syllable();
+	  while (j != i)
+	  {
+	    max = MAX (max, j);
+	    unsigned int next = start + info[j].syllable();
+	    info[j].syllable() = 255; /* So we don't process j later again. */
+	    j = next;
+	  }
+	  if (i != max)
+	    buffer->merge_clusters (i, max + 1);
+	}
+    }
+
+    /* Put syllable back in. */
+    for (unsigned int i = start; i < end; i++)
+      info[i].syllable() = syllable;
   }
 
   /* Setup masks now */
commit 9cb59d460e80d769087045535a8d54ec9ed7985c
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 11:34:07 2013 +0200

    [indic] Fix cluster merging of left matras
    
    The merge_clusters there was totally broken.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 516272c..f111826 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1298,9 +1298,9 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,
 	  info[new_pos] = tmp;
 	  if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */
 	    base--;
+	  buffer->merge_clusters (new_pos, MIN (end, base + 1));
 	  new_pos--;
 	}
-      buffer->merge_clusters (new_pos, MIN (end, base + 1));
     } else {
       for (unsigned int i = start; i < base; i++)
 	if (info[i].indic_position () == POS_PRE_M) {
commit 190c8f2b60af0851bf692f653c1604cfbf0561a5
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 11:33:18 2013 +0200

    [indic] Adjust cluster merging under uniscribe mode for Tamil
    
    Apparently Uniscribe Tamil shaper doesn't ship chubby clusters
    for Tamil.  Adjust to that.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index e3920d4..516272c 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1548,9 +1548,9 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,
   /*
    * Finish off the clusters and go home!
    */
-  if (hb_options ().uniscribe_bug_compatible)
+  if (hb_options ().uniscribe_bug_compatible && buffer->props.script != HB_SCRIPT_TAMIL)
   {
-    /* Uniscribe merges the entire cluster.
+    /* Uniscribe merges the entire cluster... Except for Tamil.
      * This means, half forms are submerged into the main consonants cluster.
      * This is unnecessary, and makes cursor positioning harder, but that's what
      * Uniscribe does. */
commit 5c558877da5db8c734ba072f01e5e4797876619c
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Oct 16 11:14:15 2013 +0200

    [indic] Allow up to two syllable modifiers
    
    Bug 70509 - Candrabindu+Visarga doesn't work in Devanagari
    https://bugs.freedesktop.org/show_bug.cgi?id=70509
    
    We categorize both bindus and visarga as syllable-modifiers.
    OT spec doesn't actually say what characters go in the syllable
    modifier category, and allows one.  We just allow up to two now.
    
    Test case: U+0930,U+0941,U+0901,U+0903
    
    Uniscribe currently doesn't support that and produces a
    dotted circle.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index fa068c4..f0c66c4 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -67,7 +67,7 @@ reph = (Ra H | Repha);		# possible reph
 cn = c.ZWJ?.n?;
 forced_rakar = ZWJ H ZWJ Ra;
 matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
-syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.ZWNJ?)? (VD VD?)?;
+syllable_tail =  (Coeng (cn|V))? (Avag.N?)? (SM.SM?.ZWNJ?)? (VD.VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.(ZWJ.N?)?);
 final_halant_group = halant_group | h.ZWNJ;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 80bc4a6..abf9760 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -31,3 +31,4 @@
 à¤°à¥à¤†à¥à¤°à¥
 à¤•â€Œà¤¿
 à¤½à¤‚
+à¤°à¥à¤à¤ƒ