[HarfBuzz] harfbuzz-ng: Branch 'master' - 19 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Thu Jul 19 06:52:49 PDT 2012


 src/hb-buffer-private.hh                                                            |    2 
 src/hb-ot-shape-complex-indic-machine.rl                                            |   21 +-
 src/hb-ot-shape-complex-indic-private.hh                                            |    6 
 src/hb-ot-shape-complex-indic.cc                                                    |   43 +++--
 src/hb-ot-shape-complex-misc.cc                                                     |   82 ++++------
 src/hb-ot-shape-complex-private.hh                                                  |    7 
 src/hb-unicode.cc                                                                   |    8 
 test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt       |    5 
 test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST          |    1 
 test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt          |    3 
 test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt          |    3 
 test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt |    1 
 12 files changed, 107 insertions(+), 75 deletions(-)

New commits:
commit 422ecd2d3c198a36d07d409341cb82ea57c7ad6b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 23:25:58 2012 -0400

    [Indic] Accept a forced Rakar sequence at the end of syllable
    
    In Sinhala, Rakar is formed by Al-Lakuna,ZWJ,Ra.  If you put that at the
    end of a Consonant,Matra syllable, you get a dotted-circle from
    Uniscribe.  Apparently adding a ZWJ before the Al-Lakuna "fixes" that.
    And people have been encoding that sequence...  So, allow a forced
    "ZWJ,Virama,ZWJ,Ra" sequence at the of syllables.
    
    Fixes some 100 or more of Sinhala failures.  Now at 622 only (0.23%).

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 7fe9a68..4501773 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -63,7 +63,8 @@ h = H | Coeng;			# is_halant_or_coeng
 reph = (Ra H | Repha);		# possible reph
 
 cn = c.n?;
-matra_group = z*.M.N?.H?;
+forced_rakar = ZWJ H ZWJ Ra;
+matra_group = z*.M.N?.(H | forced_rakar)?;
 syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (z?.h.z?);
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index a8491bf..40b5dc5 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -5,3 +5,5 @@
 කෝ
 කෝ
 ස්ට්‍රේ
+ක‍්‍රම
+ශී‍්‍ර
commit 6fc1732003d71cf90d37247482772c3da884687f
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 17:49:19 2012 -0400

    [Indic] Allow joiners on both sides of Halant at the same time
    
    The sequence <ZWJ,Al-Lakuna,ZWJ> is used in Sinhala to explicitly ask
    for Rakar.  Fixes two-thousand Sinhala tests.  Not many left.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index a266a12..7fe9a68 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -66,7 +66,7 @@ cn = c.n?;
 matra_group = z*.M.N?.H?;
 syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
-halant_group = (h.z?|z.h);
+halant_group = (z?.h.z?);
 halant_or_matra_group = (halant_group | matra_group*);
 
 
commit 10cdc94eee2225f14c198c015256a5a0063eecad
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 17:42:34 2012 -0400

    [Indic] In final reordering, find base, even if it disappeared
    
    POS_BASE can disappear if base ligated backward.  Define base as last
    with position not after base.
    
    Fixes a few hundred of Sinhala failures with Iskoola Pota.

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index c0d56eb..2aaac54 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -705,11 +705,8 @@ final_reordering_syllable (hb_buffer_t *buffer,
 
   /* Find base again */
   unsigned int base = end;
-  for (unsigned int i = start; i < end; i++)
-    if (info[i].indic_position() == POS_BASE_C) {
-      base = i;
-      break;
-    }
+  while (start < base && info[base - 1].indic_position() >= POS_BASE_C)
+    base--;
 
   unsigned int start_of_last_cluster = base;
 
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
index 0d772a7..a8491bf 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/misc.txt
@@ -4,3 +4,4 @@
 කේ
 කෝ
 කෝ
+ස්ට්‍රේ
commit 9c4d24a3a677a58ec59c7fb0f8b70b8aad30a032
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 17:29:10 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index fa88d2e..c0d56eb 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -797,6 +797,7 @@ final_reordering_syllable (hb_buffer_t *buffer,
        default:
        case HB_SCRIPT_DEVANAGARI:
        case HB_SCRIPT_GUJARATI:
+       case HB_SCRIPT_SINHALA:
 	 reph_pos = REPH_BEFORE_POSTSCRIPT;
 	 break;
 
commit 3285e107c9a83aeb552e67f9460680ff6d167d88
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 17:22:14 2012 -0400

    [Indic] Implement Sinhala "Al Lakuna" Reph behavior
    
    In Sinhala, Reph is formed only explicitly, by the presence of a ZWJ.

diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index e97fca9..ac11732 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -269,6 +269,8 @@ static const hb_codepoint_t ra_chars[] = {
   0x0CB0, /* Kannada */
   0x0D30, /* Malayalam */	/* No Reph, Logical Repha */
 
+  0x0DBB, /* Sinhala */		/* Reph formed only with ZWJ */
+
   0x179A, /* Khmer */		/* No Reph, Visual Repha */
 };
 
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 9b5b499..fa88d2e 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -356,9 +356,14 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
 	start + 3 <= end &&
 	info[start].indic_category() == OT_Ra &&
 	info[start + 1].indic_category() == OT_H &&
-	!is_joiner (info[start + 2]))
+	(unlikely (buffer->props.script == HB_SCRIPT_SINHALA) ?
+	 info[start + 2].indic_category() == OT_ZWJ /* In Sinhala, form Reph only if ZWJ is present */:
+	 !is_joiner (info[start + 2] /* In other scripts, any joiner blocks Reph formation */ )
+	))
     {
       limit += 2;
+      while (limit < end && is_joiner (info[limit]))
+        limit++;
       base = start;
       has_reph = true;
     };
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
index 29cfb2f..3c2a4fb 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
@@ -1 +1,2 @@
 misc.txt
+reph.txt
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt
new file mode 100644
index 0000000..f5f2f53
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/reph.txt
@@ -0,0 +1,3 @@
+ර්ධ
+ර්‍ධ
+ර්‌ධ
commit 91cade755534c42bb826a6aefcbca8a543d94387
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 16:50:41 2012 -0400

    [Indic/Unicode] Decompose Sinhala split matras the way Uniscribe likes
    
    Makes no visual difference.
    
    Fixes most of the failures.  Down from 15% to 1.3%!

diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc
index 7a5aa6c..cf46aa9 100644
--- a/src/hb-unicode.cc
+++ b/src/hb-unicode.cc
@@ -270,6 +270,14 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
   /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
   switch (ab) {
     case 0x0AC9  : *a = 0x0AC5; *b= 0x0ABE; return true;
+
+    /* These ones have Unicode decompositions, but we do it
+     * this way to be close to what Uniscribe does. */
+    case 0x0DDA  : *a = 0x0DD9; *b= 0x0DDA; return true;
+    case 0x0DDC  : *a = 0x0DD9; *b= 0x0DDC; return true;
+    case 0x0DDD  : *a = 0x0DD9; *b= 0x0DDD; return true;
+    case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;
+
     case 0x0F77  : *a = 0x0FB2; *b= 0x0F81; return true;
     case 0x0F79  : *a = 0x0FB3; *b= 0x0F81; return true;
     case 0x17BE  : *a = 0x17C1; *b= 0x17BE; return true;
commit d8942dcbb4e3249a2d78a6455c119294ed4390bc
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 16:34:10 2012 -0400

    Apply Tibetan (global) features.
    
    Fixes all Tibetan failures.  All 180k of them!
    
    Merges back Hangul into the default shaper.

diff --git a/src/hb-ot-shape-complex-misc.cc b/src/hb-ot-shape-complex-misc.cc
index 3cea734..7a11876 100644
--- a/src/hb-ot-shape-complex-misc.cc
+++ b/src/hb-ot-shape-complex-misc.cc
@@ -29,73 +29,65 @@
 
 /* TODO Add kana, and other small shapers here */
 
-/* When adding trivial shapers, eg. kana, hangul, etc, we can either
- * add a full shaper enum value for them, or switch on the script in
- * the default complex shaper.  The former is faster, so I think that's
- * what we would do, and hence the default complex shaper shall remain
- * empty.
- */
-
-void
-_hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t *map HB_UNUSED,
-					       const hb_segment_properties_t *props HB_UNUSED)
-{
-}
-
-void
-_hb_ot_shape_complex_override_features_default (hb_ot_map_builder_t *map HB_UNUSED,
-					        const hb_segment_properties_t *props HB_UNUSED)
-{
-}
 
-hb_ot_shape_normalization_mode_t
-_hb_ot_shape_complex_normalization_preference_default (void)
-{
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
-}
-
-void
-_hb_ot_shape_complex_setup_masks_default (hb_ot_map_t *map HB_UNUSED,
-					  hb_buffer_t *buffer HB_UNUSED,
-					  hb_font_t *font HB_UNUSED)
-{
-}
-
-
-
-/* Hangul shaper */
+/* The default shaper *only* adds additional per-script features.*/
 
 static const hb_tag_t hangul_features[] =
 {
   HB_TAG('l','j','m','o'),
   HB_TAG('v','j','m','o'),
   HB_TAG('t','j','m','o'),
+  HB_TAG_NONE
+};
+
+static const hb_tag_t tibetan_features[] =
+{
+  HB_TAG('a','b','v','s'),
+  HB_TAG('b','l','w','s'),
+  HB_TAG('a','b','v','m'),
+  HB_TAG('b','l','w','m'),
+  HB_TAG_NONE
 };
 
 void
-_hb_ot_shape_complex_collect_features_hangul (hb_ot_map_builder_t *map,
-					      const hb_segment_properties_t *props HB_UNUSED)
+_hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t *map HB_UNUSED,
+					       const hb_segment_properties_t *props)
 {
-  for (unsigned int i = 0; i < ARRAY_LENGTH (hangul_features); i++)
-    map->add_bool_feature (hangul_features[i]);
+  const hb_tag_t *script_features = NULL;
+
+  switch ((hb_tag_t) props->script)
+  {
+    /* Unicode-1.1 additions */
+    case HB_SCRIPT_HANGUL:
+      script_features = hangul_features;
+      break;
+
+    /* Unicode-2.0 additions */
+    case HB_SCRIPT_TIBETAN:
+      script_features = tibetan_features;
+      break;
+  }
+
+  for (; script_features && *script_features; script_features++)
+    map->add_bool_feature (*script_features);
 }
 
 void
-_hb_ot_shape_complex_override_features_hangul (hb_ot_map_builder_t *map,
-					       const hb_segment_properties_t *props HB_UNUSED)
+_hb_ot_shape_complex_override_features_default (hb_ot_map_builder_t *map HB_UNUSED,
+					        const hb_segment_properties_t *props HB_UNUSED)
 {
 }
 
 hb_ot_shape_normalization_mode_t
-_hb_ot_shape_complex_normalization_preference_hangul (void)
+_hb_ot_shape_complex_normalization_preference_default (void)
 {
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
+  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
 }
 
 void
-_hb_ot_shape_complex_setup_masks_hangul (hb_ot_map_t *map HB_UNUSED,
-					 hb_buffer_t *buffer HB_UNUSED,
-					 hb_font_t *font HB_UNUSED)
+_hb_ot_shape_complex_setup_masks_default (hb_ot_map_t *map HB_UNUSED,
+					  hb_buffer_t *buffer HB_UNUSED,
+					  hb_font_t *font HB_UNUSED)
 {
 }
 
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 7f74e34..867a3c2 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -52,7 +52,6 @@
 #define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \
   HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \
   HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
-  HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
   HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
   HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
   /* ^--- Add new shapers here */
@@ -89,12 +88,6 @@ hb_ot_shape_complex_categorize (const hb_segment_properties_t *props)
 
 
     /* Unicode-1.1 additions */
-    case HB_SCRIPT_HANGUL:
-
-      return hb_ot_complex_shaper_hangul;
-
-
-    /* Unicode-1.1 additions */
     case HB_SCRIPT_THAI:
     case HB_SCRIPT_LAO:
 
commit 552d19b7a11f7dff888587fce4d56d9f8e47e819
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 16:00:49 2012 -0400

    [Indic] Treat Register Shifters like Nukta
    
    Really this time.
    
    Fixes another 18 Khmer tests.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 0943b4d..a266a12 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -57,7 +57,7 @@ Repha = 15;
 Ra    = 16;
 
 c = (C | Ra);			# is_consonant
-n = (N.N? | ZWNJ?.RS);		# is_consonant_modifier
+n = ((ZWNJ?.RS)? (N.N?)?);	# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
 reph = (Ra H | Repha);		# possible reph
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 3f1faec..9b5b499 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -299,8 +299,6 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
       info.indic_position() = consonant_position (info.codepoint);
       if (is_ra (info.codepoint))
 	info.indic_category() = OT_Ra;
-    } else if (info.indic_category() == OT_RS) {
-      info.indic_position() = POS_ABOVE_M;
     } else if (info.indic_category() == OT_SM ||
 	       info.indic_category() == OT_VD) {
       info.indic_position() = POS_SMVD;
diff --git a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
index c9e5443..46a8073 100644
--- a/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/south-east-asian/script-khmer/misc/misc.txt
@@ -14,3 +14,4 @@
 ង្រ្គ
 ម៉្លេះ
 ម‌៉្លេះ
+ប៊័
commit e8cd81f76d159f3ecf808952dab24bc07782497a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 16:00:20 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index abe74b2..3f1faec 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -473,7 +473,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   /* Reorder characters */
 
   for (unsigned int i = start; i < base; i++)
-    info[i].indic_position() = MIN (POS_PRE_C, info[i].indic_position());
+    info[i].indic_position() = MIN ((unsigned int) POS_PRE_C, info[i].indic_position());
 
   if (base < end)
     info[base].indic_position() = POS_BASE_C;
commit 69f26bf39c824d6bf5b1c0d410380cc5462ad5ca
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 15:45:43 2012 -0400

    [Indic] Fix Matra reordering when base is at end of syllable
    
    For example: U+915,U+200c,U+93f
    
    Fixes last Tamil failure!

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 1c80ea4..abe74b2 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -473,7 +473,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   /* Reorder characters */
 
   for (unsigned int i = start; i < base; i++)
-    info[i].indic_position() = POS_PRE_C;
+    info[i].indic_position() = MIN (POS_PRE_C, info[i].indic_position());
 
   if (base < end)
     info[base].indic_position() = POS_BASE_C;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 23afbe0..83cac77 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -29,3 +29,4 @@
 र्अ्‌
 र्अ्‍
 र्आ्र्
+क‌ि
commit d16ccc4ae7aa8be460881042413fa2637929fede
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 15:43:55 2012 -0400

    Leave one extra item at the end of buffer allocation
    
    Just in case, for the times we do out-of-bounds access.
    
    jk

diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 9bd80e0..e06a7fa 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -148,7 +148,7 @@ struct hb_buffer_t {
   HB_INTERNAL bool enlarge (unsigned int size);
 
   inline bool ensure (unsigned int size)
-  { return likely (size <= allocated) ? true : enlarge (size); }
+  { return likely (size < allocated) ? true : enlarge (size); }
 
   HB_INTERNAL bool make_room_for (unsigned int num_in, unsigned int num_out);
 
commit 075d671f1093d2e3c58f7f45568696030f1b3efd
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 15:41:53 2012 -0400

    [Indic] Fix out-of-bounds array access

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 394e3f4..1c80ea4 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -475,7 +475,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
   for (unsigned int i = start; i < base; i++)
     info[i].indic_position() = POS_PRE_C;
 
-  info[base].indic_position() = POS_BASE_C;
+  if (base < end)
+    info[base].indic_position() = POS_BASE_C;
 
   /* Mark final consonants.  A final consonant is one appearing after a matra,
    * like in Khmer. */
@@ -577,7 +578,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       info[i].mask  |= mask;
     /* Base */
     mask = basic_mask_array[AKHN] | basic_mask_array[CJCT];
-    info[base].mask |= mask;
+    if (base < end)
+      info[base].mask |= mask;
     /* Post-base */
     mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF] | basic_mask_array[CJCT];
     for (unsigned int i = base + 1; i < end; i++)
commit dcb527242b1eca4db1e190a7802f9cd132aaf46e
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 15:28:25 2012 -0400

    [Indic] Allow joiners before matras
    
    Fixes 1 more Devanagari test!

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 9ad3ec2..0943b4d 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -63,7 +63,7 @@ h = H | Coeng;			# is_halant_or_coeng
 reph = (Ra H | Repha);		# possible reph
 
 cn = c.n?;
-matra_group = M.N?.H?;
+matra_group = z*.M.N?.H?;
 syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (h.z?|z.h);
@@ -87,7 +87,7 @@ main := |*
 
 #define process_syllable(func) \
   HB_STMT_START { \
-    /* fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #func); */ \
+    if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #func); \
     for (unsigned int i = last; i < p+1; i++) \
       info[i].syllable() = syllable_serial; \
     PASTE (initial_reordering_, func) (map, buffer, mask_array, last, p+1); \
commit 391cc0331749e263bdfe83a8f5f6d76f2360ee7a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 15:10:05 2012 -0400

    [Indic] Allow halant group in Vowel and placeholder syllables
    
    Fixes 2 out of 560 Devanagari failures.  AND:
    Fixes 1 out of 2 Tamil failures.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 48bf609..9ad3ec2 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -67,11 +67,12 @@ matra_group = M.N?.H?;
 syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (h.z?|z.h);
+halant_or_matra_group = (halant_group | matra_group*);
 
 
-consonant_syllable =	Repha? (cn.halant_group)* cn A? (halant_group | matra_group*)? syllable_tail;
-vowel_syllable =	reph? V.n? (halant_group.cn | ZWJ.cn)* matra_group* syllable_tail;
-standalone_cluster =	reph? place_holder.n? (halant_group.cn)* matra_group* syllable_tail;
+consonant_syllable =	Repha? (cn.halant_group)* cn A? halant_or_matra_group? syllable_tail;
+vowel_syllable =	reph? V.n? (halant_group.cn | ZWJ.cn)* halant_or_matra_group? syllable_tail;
+standalone_cluster =	reph? place_holder.n? (halant_group.cn)* halant_or_matra_group? syllable_tail;
 other =			any;
 
 main := |*
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 3c2bab7..23afbe0 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -28,3 +28,4 @@
 र्अ्
 र्अ्‌
 र्अ्‍
+र्आ्र्
commit ca4e3d3eab7b738c2b8e2a81696a28bca1b81495
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 15:05:40 2012 -0400

    [Indic] Streamline halant/joiner in grammar

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 54f7765..48bf609 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -69,9 +69,9 @@ place_holder = NBSP | DOTTEDCIRCLE;
 halant_group = (h.z?|z.h);
 
 
-consonant_syllable =	Repha? (cn halant_group)* cn A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable =	reph? V.n? (z?.h.cn | ZWJ.cn)* matra_group* syllable_tail;
-standalone_cluster =	reph? place_holder.n? (z? h.cn)* matra_group* syllable_tail;
+consonant_syllable =	Repha? (cn.halant_group)* cn A? (halant_group | matra_group*)? syllable_tail;
+vowel_syllable =	reph? V.n? (halant_group.cn | ZWJ.cn)* matra_group* syllable_tail;
+standalone_cluster =	reph? place_holder.n? (halant_group.cn)* matra_group* syllable_tail;
 other =			any;
 
 main := |*
commit 418d00dffddd95a1f27e9be15752d494c627d45e
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 14:57:28 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 5609ff3..54f7765 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -66,9 +66,10 @@ cn = c.n?;
 matra_group = M.N?.H?;
 syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
+halant_group = (h.z?|z.h);
 
 
-consonant_syllable =	Repha? (cn (h.z?|z.h))* cn A? (h.z? | matra_group*)? syllable_tail;
+consonant_syllable =	Repha? (cn halant_group)* cn A? (h.z? | matra_group*)? syllable_tail;
 vowel_syllable =	reph? V.n? (z?.h.cn | ZWJ.cn)* matra_group* syllable_tail;
 standalone_cluster =	reph? place_holder.n? (z? h.cn)* matra_group* syllable_tail;
 other =			any;
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
index 0ab04d6..3c2bab7 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-devanagari/misc/misc.txt
@@ -25,3 +25,6 @@
 क्ष
 क्‌ष
 क्‍ष
+र्अ्
+र्अ्‌
+र्अ्‍
commit 4c3691d2a32ca7e54a54f7c08098fd96fa7af39e
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 14:23:55 2012 -0400

    [Indic] Hopefully minor!
    
    Refactoring Indic machin.  No semantic change.

diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index 5f565b6..5609ff3 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -56,19 +56,21 @@ Coeng = 14;
 Repha = 15;
 Ra    = 16;
 
-c = C | Ra;			# is_consonant
+c = (C | Ra);			# is_consonant
 n = (N.N? | ZWNJ?.RS);		# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
 reph = (Ra H | Repha);		# possible reph
+
+cn = c.n?;
 matra_group = M.N?.H?;
-syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
+syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 
 
-consonant_syllable =	Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable =	reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster =	reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
+consonant_syllable =	Repha? (cn (h.z?|z.h))* cn A? (h.z? | matra_group*)? syllable_tail;
+vowel_syllable =	reph? V.n? (z?.h.cn | ZWJ.cn)* matra_group* syllable_tail;
+standalone_cluster =	reph? place_holder.n? (z? h.cn)* matra_group* syllable_tail;
 other =			any;
 
 main := |*
commit e092c556fb1cf38be3cea1f4b75a0d879372dfa2
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 14:09:25 2012 -0400

    [Indic] Minor

diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 9637018..e97fca9 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -267,9 +267,9 @@ static const hb_codepoint_t ra_chars[] = {
   0x0BB0, /* Tamil */		/* No Reph */
   0x0C30, /* Telugu */		/* No Reph */
   0x0CB0, /* Kannada */
-  0x0D30, /* Malayalam */	/* No Reph */
+  0x0D30, /* Malayalam */	/* No Reph, Logical Repha */
 
-  0x179A, /* Khmer */		/* No Reph */
+  0x179A, /* Khmer */		/* No Reph, Visual Repha */
 };
 
 
commit 14dbdd9e39d3a869fd1521000c889c347433d22b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 18 13:13:03 2012 -0400

    [Indic] Unbreak Tamil
    
    Tamil has only about 150 failures now!

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index bbf5024..394e3f4 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -365,7 +365,23 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
       has_reph = true;
     };
 
-    if (basic_mask_array[HALF])
+     enum base_position_t {
+       BASE_FIRST,
+       BASE_LAST
+     } base_pos;
+
+    switch ((hb_tag_t) buffer->props.script)
+    {
+      case HB_SCRIPT_KHMER:
+	base_pos = BASE_FIRST;
+	break;
+
+      default:
+	base_pos = BASE_LAST;
+	break;
+    }
+
+    if (base_pos == BASE_LAST)
     {
       /* -> starting from the end of the syllable, move backwards */
       unsigned int i = end;



More information about the HarfBuzz mailing list