[HarfBuzz] harfbuzz-ng: Branch 'master' - 21 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Fri Jul 20 17:39:59 PDT 2012


 src/hb-ot-layout-gpos-table.hh                                             |    4 
 src/hb-ot-shape-complex-indic-machine.rl                                   |    7 
 src/hb-ot-shape-complex-indic-private.hh                                   |    5 
 src/hb-ot-shape-complex-indic.cc                                           |  126 +++++-----
 src/hb-private.hh                                                          |    1 
 src/hb-unicode.cc                                                          |   10 
 src/hb-uniscribe.cc                                                        |   34 +-
 test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt |    1 
 test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST   |    1 
 test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt  |    2 
 10 files changed, 108 insertions(+), 83 deletions(-)

New commits:
commit 3d4c111b7a13700b2f7a0b087eb3992283295f21
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 19:34:39 2012 -0400

    Add a test case

diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
index 843ee4f..35ce952 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt
@@ -49,3 +49,4 @@
 অৗ
 ন্ত্র
 ত্যু
+চ্য্র
commit 92a1ad7bef9efb456ab87bd63818cfbed7da3f6f
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 18:38:27 2012 -0400

    [Indic] Stop searching for base if a post form is found before below form
    
    Improves Bengali and Gurmukhi.  Malayalam regressed a bit.  We will deal
    with that later.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index b5ad4ae..ad55f77 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -505,6 +505,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
     {
       /* -> starting from the end of the syllable, move backwards */
       unsigned int i = end;
+      bool seen_below = false;
       do {
 	i--;
 	/* -> until a consonant is found */
@@ -513,11 +514,13 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 	  /* -> that does not have a below-base or post-base form
 	   * (post-base forms have to follow below-base forms), */
 	  if (info[i].indic_position() != POS_BELOW_C &&
-	      info[i].indic_position() != POS_POST_C)
+	      (info[i].indic_position() != POS_POST_C || seen_below))
 	  {
 	    base = i;
 	    break;
 	  }
+	  if (info[i].indic_position() == POS_BELOW_C)
+	    seen_below = true;
 
 	  /* -> or that is not a pre-base reordering Ra,
 	   *
commit 4c450c703f8e4618c587bcd7ef46dcc1f2c7947b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 18:13:04 2012 -0400

    [Indic] Recompose Bengali Ya,Nukta
    
    This is a bunch of hacks for now.
    
    Improves Bengali a bit.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 0316691..b5ad4ae 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -407,7 +407,8 @@ hb_ot_shape_normalization_mode_t
 _hb_ot_shape_complex_normalization_preference_indic (void)
 {
   /* We want split matras decomposed by the common shaping logic. */
-  return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
+  /* XXX sort this out after adding per-shaper normalizers. */
+  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
 }
 
 
@@ -549,7 +550,6 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
     if (base < start)
       base = start; /* Just in case... */
 
-
     /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
      *    and has more than one consonant, Ra is excluded from candidates for
      *    base consonants. */
diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc
index c527340..140f382 100644
--- a/src/hb-unicode.cc
+++ b/src/hb-unicode.cc
@@ -258,6 +258,14 @@ hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
 		    hb_codepoint_t     *ab)
 {
   *ab = 0;
+  /* XXX, this belongs to indic normalizer. */
+  if ((FLAG (hb_unicode_general_category (ufuncs, a)) &
+       (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
+        FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
+        FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
+    return false;
+  /* XXX, add composition-exclusion exceptions to Indic shaper. */
+  if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
   return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
 }
 
commit e9c0f152a38cb2e76650a3e43f7fdcda266af696
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 17:05:46 2012 -0400

    [Uniscribe] Fix script fallback
    
    Gurmukhi failures half now.  Others changed slightly.

diff --git a/src/hb-uniscribe.cc b/src/hb-uniscribe.cc
index e5b98a8..d41542b 100644
--- a/src/hb-uniscribe.cc
+++ b/src/hb-uniscribe.cc
@@ -331,10 +331,6 @@ retry:
   }
 
   OPENTYPE_TAG language_tag = hb_uint32_swap (hb_ot_tag_from_language (buffer->props.language));
-  hb_tag_t buffer_script_tags[2];
-  hb_ot_tags_from_script (buffer->props.script,
-			  &buffer_script_tags[0],
-			  &buffer_script_tags[1]);
 
   unsigned int glyphs_offset = 0;
   unsigned int glyphs_len;
@@ -345,20 +341,11 @@ retry:
     unsigned int chars_offset = items[i].iCharPos;
     unsigned int item_chars_len = items[i + 1].iCharPos - chars_offset;
 
-    OPENTYPE_TAG script_tag;
-    /* We ignore what script tag Uniscribe chose, except to differentiate
-     * between old/new tags.  Not sure if this picks DFLT up correctly...
-     * This also screws things up as the item.analysis also has an opaque
-     * script member. */
-    if (script_tags[i] == hb_uint32_swap (buffer_script_tags[1]))
-      script_tag = hb_uint32_swap (buffer_script_tags[1]);
-    else
-      script_tag = hb_uint32_swap (buffer_script_tags[0]);
-
+  retry_shape:
     hr = ScriptShapeOpenType (font_data->hdc,
 			      &font_data->script_cache,
 			      &items[i].a,
-			      script_tag,
+			      script_tags[i],
 			      language_tag,
 			      range_char_counts,
 			      range_properties,
@@ -373,9 +360,6 @@ retry:
 			      glyph_props + glyphs_offset,
 			      (int *) &glyphs_len);
 
-    for (unsigned int j = chars_offset; j < chars_offset + item_chars_len; j++)
-      log_clusters[j] += glyphs_offset;
-
     if (unlikely (items[i].a.fNoGlyphIndex))
       FAIL ("ScriptShapeOpenType() set fNoGlyphIndex");
     if (unlikely (hr == E_OUTOFMEMORY))
@@ -386,14 +370,24 @@ retry:
       goto retry;
     }
     if (unlikely (hr == USP_E_SCRIPT_NOT_IN_FONT))
-      FAIL ("ScriptShapeOpenType() failed: Font doesn't support script");
+    {
+      if (items[i].a.eScript == SCRIPT_UNDEFINED)
+	FAIL ("ScriptShapeOpenType() failed: Font doesn't support script");
+      items[i].a.eScript = SCRIPT_UNDEFINED;
+      goto retry_shape;
+    }
     if (unlikely (FAILED (hr)))
+    {
       FAIL ("ScriptShapeOpenType() failed: 0x%08xL", hr);
+    }
+
+    for (unsigned int j = chars_offset; j < chars_offset + item_chars_len; j++)
+      log_clusters[j] += glyphs_offset;
 
     hr = ScriptPlaceOpenType (font_data->hdc,
 			      &font_data->script_cache,
 			      &items[i].a,
-			      script_tag,
+			      script_tags[i],
 			      language_tag,
 			      range_char_counts,
 			      range_properties,
commit 5791f329159c9863317e2b507514c29321be31a7
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 16:26:55 2012 -0400

    [Indic] Allow a ZWNJ after SM's
    
    Malayalam failures go way down.  Other scripts benefitted slightly too.
    Sinhala had one or two test regressions, but...

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 4be7698..62091e2 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -65,7 +65,7 @@ reph = (Ra H | Repha);		# possible reph
 cn = c.n?;
 forced_rakar = ZWJ H ZWJ Ra;
 matra_group = z*.M.N?.(H | forced_rakar)?;
-syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
+syllable_tail = (SM.ZWNJ?)? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.ZWJ?);
 final_halant_group = halant_group | h.ZWNJ;
commit 34ae336f3fae93ef9372881d545c817bce383041
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 16:17:28 2012 -0400

    [Indic] Improve Reph AfterMain positioning
    
    Fixes 20 out of 48 failing Oriya tests.  Failure rate down to 0.066% now.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 228fc63..0316691 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -975,8 +975,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
     {
       new_reph_pos = base;
       /* XXX Skip potential pre-base reordering Ra. */
-      while (new_reph_pos < end &&
-	     !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_BELOW_C) | FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
+      while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN)
 	new_reph_pos++;
       if (new_reph_pos < end)
         goto reph_move;
commit bdd080431a40bc941ece3230f338b94a46bd12a2
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 16:03:09 2012 -0400

    [Indic] Reposition Oriya Candrabindu
    
    Oriya failures down from 0.65% to 0.20%.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index f75a500..228fc63 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -291,6 +291,7 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font)
     pos = POS_SMVD;
   }
 
+  if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
 
 
   info.indic_category() = cat;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST
index 29cfb2f..66a2468 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST
@@ -1 +1,2 @@
+bindu.txt
 misc.txt
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt
new file mode 100644
index 0000000..13de6ee
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt
@@ -0,0 +1,2 @@
+ମୁଁ
+ମୁଂ
commit 5f0eaaad129ff04d56b8756bebf19fbc242718c9
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 15:47:24 2012 -0400

    [Indic] Fix base search in final_reordering
    
    Fixes most Malayalam failures.  Down from 1.6% to 0.38% now.  Fixes a
    few more in other scripts too.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 1a75e78..f75a500 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -712,7 +712,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   }
 
   /* XXX This will not match for old-Indic spec since the Halant-Ra order is reversed already. */
-  if (basic_mask_array[PREF] && base + 3 <= end)
+  if (basic_mask_array[PREF] && base + 2 < end)
   {
     /* Find a Halant,Ra sequence and mark it fore pre-base reordering processing. */
     for (unsigned int i = base + 1; i + 1 < end; i++)
@@ -829,9 +829,13 @@ final_reordering_syllable (hb_buffer_t *buffer,
    */
 
   /* Find base again */
-  unsigned int base = end;
-  while (start < base && info[base - 1].indic_position() >= POS_BASE_C)
-    base--;
+  unsigned int base;
+  for (base = start; base < end; base++)
+    if (info[base].indic_position() >= POS_BASE_C) {
+      if (start < base && info[base].indic_position() > POS_BASE_C)
+        base--;
+      break;
+    }
 
   unsigned int start_of_last_cluster = base;
 
commit 81202bd860e4034c18d9f80c5a4f33d9f48463a3
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 15:10:02 2012 -0400

    [Indic] Don't attach SM/VD to other characters

diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 5f4856e..4a4c8c0 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -83,6 +83,8 @@ enum indic_category_t {
 
 /* Visual positions in a syllable from left to right. */
 enum indic_position_t {
+  POS_START,
+
   POS_RA_TO_BECOME_REPH,
   POS_PRE_M,
   POS_PRE_C,
@@ -102,6 +104,7 @@ enum indic_position_t {
 
   POS_FINAL_C,
   POS_SMVD,
+
   POS_END
 };
 
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 36bf240..1a75e78 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -634,29 +634,32 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       }
   }
 
-  /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */
-  if (!indic_options ().uniscribe_bug_compatible)
+  /* Attach misc marks to previous char to move with them. */
   {
-    /* Please update the Uniscribe branch when touching this! */
-    for (unsigned int i = start + 1; i < end; i++)
-      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
-	info[i].indic_position() = info[i - 1].indic_position();
-  } else {
-    /*
-     * Uniscribe doesn't move the Halant with Left Matra.
-     * TEST: U+092B,U+093F,U+094DE
-     */
-    /* Please update the non-Uniscribe branch when touching this! */
-    for (unsigned int i = start + 1; i < end; i++)
-      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) {
-	info[i].indic_position() = info[i - 1].indic_position();
-	if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M)
+    indic_position_t last_pos = POS_START;
+    for (unsigned int i = start; i < end; i++)
+    {
+      if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | HALANT_OR_COENG_FLAGS)))
+      {
+	info[i].indic_position() = last_pos;
+	if (unlikely (indic_options ().uniscribe_bug_compatible &&
+		      info[i].indic_category() == OT_H &&
+		      info[i].indic_position() == POS_PRE_M))
+	{
+	  /*
+	   * Uniscribe doesn't move the Halant with Left Matra.
+	   * TEST: U+092B,U+093F,U+094DE
+	   */
 	  for (unsigned int j = i; j > start; j--)
 	    if (info[j - 1].indic_position() != POS_PRE_M) {
 	      info[i].indic_position() = info[j - 1].indic_position();
 	      break;
 	    }
+	}
+      } else if (info[i].indic_position() != POS_SMVD) {
+        last_pos = (indic_position_t) info[i].indic_position();
       }
+    }
   }
   /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
   {
@@ -666,7 +669,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
         last_halant = i;
       else if (is_consonant (info[i])) {
 	for (unsigned int j = last_halant; j < i; j++)
-	  info[j].indic_position() = info[i].indic_position();
+	  if (info[j].indic_position() != POS_SMVD)
+	    info[j].indic_position() = info[i].indic_position();
       }
   }
 
commit efb4ad735691837a52447bedc1a66a87d0d9af51
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 14:27:38 2012 -0400

    Fix compiler warnings
    
    If x is not constant, we cannot ASSERT_STATIC on it.

diff --git a/src/hb-private.hh b/src/hb-private.hh
index bdfd3f5..3f710ed 100644
--- a/src/hb-private.hh
+++ b/src/hb-private.hh
@@ -734,7 +734,7 @@ hb_in_range (T u, T lo, T hi)
  * For example, for testing "x ∈ {x1, x2, x3}" use:
  * (FLAG(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3)))
  */
-#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO((x) < 8 * sizeof(int)) + (1<<(x)))
+#define FLAG(x) (1<<(x))
 #define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x))
 
 
commit f31d97e44eeb6fb141f3de928e27e033fc7b1f47
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 14:13:35 2012 -0400

    [Indic] Form Telugu Reph out of Ra,Virama,ZWJ
    
    Apparently this was approved in Feb 2012.  No font yet.

diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index bbf5484..5f4856e 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -177,7 +177,7 @@ static const hb_codepoint_t ra_chars[] = {
   0x0AB0, /* Gujarati */
   0x0B30, /* Oriya */
   0x0BB0, /* Tamil */		/* No Reph */
-  0x0C30, /* Telugu */		/* No Reph */
+  0x0C30, /* Telugu */		/* Reph formed only with ZWJ */
   0x0CB0, /* Kannada */
   0x0D30, /* Malayalam */	/* No Reph, Logical Repha */
 
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index ffba986..36bf240 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -471,8 +471,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 	start + 3 <= end &&
 	info[start].indic_category() == OT_Ra &&
 	info[start + 1].indic_category() == OT_H &&
-	(unlikely (buffer->props.script == HB_SCRIPT_SINHALA) ?
-	 info[start + 2].indic_category() == OT_ZWJ /* In Sinhala, form Reph only if ZWJ is present */:
+	(unlikely (buffer->props.script == HB_SCRIPT_SINHALA || buffer->props.script == HB_SCRIPT_TELUGU) ?
+	 info[start + 2].indic_category() == OT_ZWJ /* In Sinhala & Telugu, form Reph only if ZWJ is present */:
 	 !is_joiner (info[start + 2] /* In other scripts, any joiner blocks Reph formation */ )
 	))
     {
commit 2e193b240ec85cab0d4e2f8a375c5a7f0ef99985
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 14:02:35 2012 -0400

    [Indic] Don't split U+0AC9
    
    Althought IndicMatraCategory.txt classifies it as Top_And_Right matra,
    it does not have Unicode decomposition, and Uniscribe does not do
    anything special about it either.
    
    Gujarati failures down from 0.672% to 0.0130966%.

diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc
index 3e8f807..c527340 100644
--- a/src/hb-unicode.cc
+++ b/src/hb-unicode.cc
@@ -269,7 +269,7 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
 {
   /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
   switch (ab) {
-    case 0x0AC9  : *a = 0x0AC5; *b= 0x0ABE; return true;
+    case 0x0AC9  : return false;
 
     case 0x0931  : return false;
     case 0x0B94  : return false;
commit 30c3d5e9fc61b49c2c6ad4e744300edd6f3e0261
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 13:56:32 2012 -0400

    [Indic] Simplify Uniscribe cluster emulation
    
    Now that we break syllables on Halant,ZWNJ, this code can be simplified.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 7979e24..ffba986 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1110,18 +1110,11 @@ final_reordering_syllable (hb_buffer_t *buffer,
 
   if (indic_options ().uniscribe_bug_compatible)
   {
-    /* This is what Uniscribe does.  Ie. add cluster boundaries after Halant,ZWNJ.
+    /* Uniscribe merges the entire cluster.
      * This means, half forms are submerged into the main consonants cluster.
      * This is unnecessary, and makes cursor positioning harder, but that's what
      * Uniscribe does. */
-    unsigned int cluster_start = start;
-    for (unsigned int i = start + 1; i < start_of_last_cluster; i++)
-      if (is_halant_or_coeng (info[i - 1]) && info[i].indic_category() == OT_ZWNJ) {
-        i++;
-	buffer->merge_clusters (cluster_start, i);
-	cluster_start = i;
-      }
-    start_of_last_cluster = cluster_start;
+    start_of_last_cluster = start;
   }
 
   buffer->merge_clusters (start_of_last_cluster, end);
commit decf6ffca475fe01ff3151b7641f629f031137d2
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 13:51:31 2012 -0400

    [Indic] Minor!

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index e771e57..7979e24 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1108,7 +1108,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
    * Finish off the clusters and go home!
    */
 
-  if (!indic_options ().uniscribe_bug_compatible)
+  if (indic_options ().uniscribe_bug_compatible)
   {
     /* This is what Uniscribe does.  Ie. add cluster boundaries after Halant,ZWNJ.
      * This means, half forms are submerged into the main consonants cluster.
commit 9e4f94a72cea6d65a6a7ba5a47db92e00dbfbb91
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 13:48:03 2012 -0400

    [Indic] Break syllables at Halant,ZWNJ
    
    That's really what Uniscribe does, and explains a lot of pecularities of
    Halant,ZWNJ before the base.
    
    Sent Telugu from 1% failures to 0.03%.  Improved Kannada and Malayalam
    slightly.  Fixed half of Bengali, and did NOT break anything!

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 4501773..4be7698 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -67,8 +67,9 @@ forced_rakar = ZWJ H ZWJ Ra;
 matra_group = z*.M.N?.(H | forced_rakar)?;
 syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
-halant_group = (z?.h.z?);
-halant_or_matra_group = (halant_group | matra_group*);
+halant_group = (z?.h.ZWJ?);
+final_halant_group = halant_group | h.ZWNJ;
+halant_or_matra_group = (final_halant_group | matra_group*);
 
 
 consonant_syllable =	Repha? (cn.halant_group)* cn A? halant_or_matra_group? syllable_tail;
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 42a7e8d..e771e57 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -531,9 +531,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 	}
 	else
 	{
-	  /* A ZWJ at the end of syllable, or any ZWJ/ZWNJ in other places, stop the base
-	   * search (to request explicit half or halant forms. */
-	  if (is_joiner (info[i]) && (i + 1 < end || info[i].indic_category() == OT_ZWJ))
+	  /* A ZWJ stops the base search, and requests an explicit half form. */
+	  if (info[i].indic_category() == OT_ZWJ)
 	    break;
 	}
       } while (i > limit);
commit 2c372b80f6befad69e216e3f218b38640b8cc044
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 13:37:48 2012 -0400

    [Indic] Better check for applying 'init'
    
    Specifically, don't apply 'init' if previous char is a joiner.
    
    Fixes some more of Bengali.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index bbb881e..42a7e8d 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1101,14 +1101,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
   if (info[start].indic_position () == POS_PRE_M &&
       (!start ||
        !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) &
-	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
-	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))))
+	 FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
     info[start].mask |= init_mask;
 
 
diff --git a/src/hb-private.hh b/src/hb-private.hh
index 7b778b7..bdfd3f5 100644
--- a/src/hb-private.hh
+++ b/src/hb-private.hh
@@ -734,7 +734,8 @@ hb_in_range (T u, T lo, T hi)
  * For example, for testing "x ∈ {x1, x2, x3}" use:
  * (FLAG(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3)))
  */
-#define FLAG(x) (1<<(x))
+#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO((x) < 8 * sizeof(int)) + (1<<(x)))
+#define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x))
 
 
 template <typename T, typename T2> inline void
commit 34a7440b7c6c6e53394ddbdbedaad57b23f85105
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 12:32:59 2012 -0400

    [GPOS] Don't zero mark advances
    
    Fixes more of Telugu, Kannada, and Oriya.
    
    May break things (outside Indic...), but we cannot think of any font relying
    on this immediately.

diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index 94055b3..9eadbd6 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -1486,8 +1486,8 @@ fix_mark_attachment (hb_glyph_position_t *pos, unsigned int i, hb_direction_t di
 
   unsigned int j = i - pos[i].attach_lookback();
 
-  pos[i].x_advance = 0;
-  pos[i].y_advance = 0;
+//  pos[i].x_advance = 0;
+//  pos[i].y_advance = 0;
   pos[i].x_offset += pos[j].x_offset;
   pos[i].y_offset += pos[j].y_offset;
 
commit 8ed248de77e5d2ed978e55c0ce1a11727bc9e34c
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 11:42:24 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index ae3af2c..bbb881e 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1112,8 +1112,9 @@ final_reordering_syllable (hb_buffer_t *buffer,
     info[start].mask |= init_mask;
 
 
-
-  /* Finish off the clusters and go home! */
+  /*
+   * Finish off the clusters and go home!
+   */
 
   if (!indic_options ().uniscribe_bug_compatible)
   {
commit d0e68dbd0b9fc9a42c4280d01c8ffd9c5015d550
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 11:25:41 2012 -0400

    [Indic] Implement reph positioning step 5
    
    Not tuned, just copied from step 2.  Fixes another 0.5% of Kannada
    failures.  1% to go.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index ffae430..ae3af2c 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -999,7 +999,17 @@ final_reordering_syllable (hb_buffer_t *buffer,
      */
     reph_step_5:
     {
-      /* XXX */
+      /* Copied from step 2. */
+      new_reph_pos = start + 1;
+      while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos]))
+	new_reph_pos++;
+
+      if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) {
+	/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
+	if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
+	  new_reph_pos++;
+	goto reph_move;
+      }
     }
 
     /*       6. Otherwise, reorder reph to the end of the syllable.
commit a9e45c32e4a0d6da33c52f8427aa694e57f52eb9
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 11:04:15 2012 -0400

    [Indic] Don't let ZWNJ at the end of syllable affect base search
    
    Fixes a few Devanagari, half of remaining Kannada failures, quarter for
    Telugu, and others slightly improved or unchanged.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index a3f20b1..ffae430 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -530,8 +530,12 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 	  base = i;
 	}
 	else
-	  if (is_joiner (info[i]))
+	{
+	  /* A ZWJ at the end of syllable, or any ZWJ/ZWNJ in other places, stop the base
+	   * search (to request explicit half or halant forms. */
+	  if (is_joiner (info[i]) && (i + 1 < end || info[i].indic_category() == OT_ZWJ))
 	    break;
+	}
       } while (i > limit);
     }
     else
commit 20b68e699f73e6ce046c0ec143d40b3d6d48e06b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Jul 20 10:47:46 2012 -0400

    [Indic] Apply 'cjct' globally
    
    Fixes 5 Devanagari failures, and no regressions.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 42e0f70..a3f20b1 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -322,7 +322,7 @@ indic_basic_features[] =
   {HB_TAG('a','b','v','f'), false},
   {HB_TAG('p','s','t','f'), false},
   {HB_TAG('c','f','a','r'), false},
-  {HB_TAG('c','j','c','t'), false},
+  {HB_TAG('c','j','c','t'), true},
   {HB_TAG('v','a','t','u'), true},
 };
 
@@ -338,7 +338,7 @@ enum {
   ABVF,
   PSTF,
   CFAR,
-  CJCT,
+  _CJCT,
   VATU
 };
 
@@ -691,15 +691,15 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       info[i].mask |= basic_mask_array[RPHF];
 
     /* Pre-base */
-    mask = basic_mask_array[HALF] | basic_mask_array[CJCT];
+    mask = basic_mask_array[HALF];
     for (unsigned int i = start; i < base; i++)
       info[i].mask  |= mask;
     /* Base */
-    mask = basic_mask_array[CJCT];
+    mask = 0;
     if (base < end)
       info[base].mask |= mask;
     /* Post-base */
-    mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF] | basic_mask_array[CJCT];
+    mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF];
     for (unsigned int i = base + 1; i < end; i++)
       info[i].mask  |= mask;
   }
@@ -737,7 +737,10 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       do {
 	j--;
 
-	info[j].mask &= ~basic_mask_array[CJCT];
+	/* A ZWJ disables CJCT, however, it's mere presence is enough
+	 * to disable ligation.  No explicit action needed. */
+
+	/* A ZWNJ disables HALF. */
 	if (non_joiner)
 	  info[j].mask &= ~basic_mask_array[HALF];
 



More information about the HarfBuzz mailing list