[HarfBuzz] harfbuzz: Branch 'master' - 3 commits

Fri Nov 16 13:14:48 PST 2012

src/hb-ot-layout.h                                                                 |   14 +-
 src/hb-ot-shape-complex-default.cc                                                 |    6 -
 src/hb-ot-shape-complex-indic.cc                                                   |   57 +++++++---
 src/hb-ot-shape-complex-private.hh                                                 |   16 +-
 src/hb-ot-shape-normalize-private.hh                                               |   22 +++
 src/hb-ot-shape-normalize.cc                                                       |   56 +++------
 src/hb-ot-shape.cc                                                                 |    2 
 src/hb-shape-plan.h                                                                |    2 
 test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST         |    1 
 test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt |    4 
 10 files changed, 109 insertions(+), 71 deletions(-)

New commits:
commit 43b653150081a2f9dc6b7481229ac4cd952575dc
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Nov 16 13:12:35 2012 -0800

    [Indic] Another try to unbreak Sinhala split matras
    
    Just read the comments...

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index b185824..d924d1a 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1317,15 +1317,42 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c,
 #endif
   }
 
-  if (indic_options ().uniscribe_bug_compatible)
-  switch (ab)
+  if ((ab == 0x0DDA || hb_in_range<hb_codepoint_t> (ab, 0x0DDC, 0x0DDE)))
   {
-    /* These Sinhala ones have Unicode decompositions, but Uniscribe
-     * decomposes them "Khmer-style". */
-    case 0x0DDA  : *a = 0x0DD9; *b= 0x0DDA; return true;
-    case 0x0DDC  : *a = 0x0DD9; *b= 0x0DDC; return true;
-    case 0x0DDD  : *a = 0x0DD9; *b= 0x0DDD; return true;
-    case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;
+    /*
+     * Sinhala split matras...  Let the fun begin.
+     *
+     * These four characters have Unicode decompositions.  However, Uniscribe
+     * decomposes them "Khmer-style", that is, it uses the character itself to
+     * get the second half.  The first half of all four decompositions is always
+     * U+0DD9.
+     *
+     * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
+     * broken with Uniscribe.  But we need to support them.  As such, we only
+     * do the Uniscribe-style decomposition if the character is transformed into
+     * its "sec.half" form by the 'pstf' feature.  Otherwise, we fall back to
+     * Unicode decomposition.
+     *
+     * Note that we can't unconditionally use Unicode decomposition.  That would
+     * break some other fonts, that are designed to work with Uniscribe, and
+     * don't have positioning features for the Unicode-style decomposition.
+     *
+     * Argh...
+     */
+
+    const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
+
+    hb_codepoint_t glyph;
+
+    if (indic_options ().uniscribe_bug_compatible ||
+	(c->font->get_glyph (ab, 0, &glyph) &&
+	 indic_plan->pstf.would_substitute (&glyph, 1, true, c->font->face)))
+    {
+      /* Ok, safe to use Uniscribe-style decomposition. */
+      *a = 0x0DD9;
+      *b = ab;
+      return true;
+    }
   }
 
   return c->unicode->decompose (ab, a, b);
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
index 7eff9e1..a00d7ae 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
@@ -1,3 +1,4 @@
 extensive.txt
 misc.txt
 reph.txt
+split-matras.txt
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt
new file mode 100644
index 0000000..2a73a40
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt
@@ -0,0 +1,4 @@
+à¶šà·š
+à¶šà·œ
+à¶šà·
+à¶šà·ž
commit 977f1740ace730dcdff8221a17f2a592c2ec7c74
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Nov 16 13:10:07 2012 -0800

    Unbreak tests

diff --git a/src/hb-ot-layout.h b/src/hb-ot-layout.h
index 3f6e5d9..7b53367 100644
--- a/src/hb-ot-layout.h
+++ b/src/hb-ot-layout.h
@@ -60,12 +60,12 @@ typedef enum {
 
 #ifdef HB_NOT_IMPLEMENTED
 hb_ot_layout_glyph_class_t
-hb_ot_layout_get_glyph_class (hb_face_t      *face,
+Xhb_ot_layout_get_glyph_class (hb_face_t      *face,
 			      hb_codepoint_t  glyph);
 #endif
 
 #ifdef HB_NOT_IMPLEMENTED
-hb_ot_layout_get_glyphs_in_class (hb_face_t                  *face,
+Xhb_ot_layout_get_glyphs_in_class (hb_face_t                  *face,
 				  hb_ot_layout_glyph_class_t  klass,
 				  hb_set_t                   *glyphs /* OUT */);
 #endif
@@ -184,7 +184,7 @@ hb_ot_layout_feature_get_lookups (hb_face_t    *face,
 
 #ifdef HB_NOT_IMPLEMENTED
 void
-hb_ot_layout_collect_lookups (hb_face_t      *face,
+Xhb_ot_layout_collect_lookups (hb_face_t      *face,
 			      hb_tag_t        table_tag,
 			      const hb_tag_t *scripts,
 			      const hb_tag_t *languages,
@@ -199,7 +199,7 @@ hb_ot_shape_plan_collect_lookups (hb_shape_plan_t *shape_plan,
 
 #ifdef HB_NOT_IMPLEMENTED
 void
-hb_ot_layout_lookup_collect_glyphs (hb_face_t    *face,
+Xhb_ot_layout_lookup_collect_glyphs (hb_face_t    *face,
 				    hb_tag_t      table_tag,
 				    unsigned int  lookup_index,
 				    hb_set_t     *glyphs_before, /* OUT. May be NULL */
@@ -227,7 +227,7 @@ typedef hb_bool_t
 				       void         *user_data);
 
 void
-hb_ot_layout_lookup_enumerate_sequences (hb_face_t    *face,
+Xhb_ot_layout_lookup_enumerate_sequences (hb_face_t    *face,
 					 hb_tag_t      table_tag,
 					 unsigned int  lookup_index,
 					 hb_ot_layout_glyph_sequence_func_t callback,
@@ -258,7 +258,7 @@ hb_ot_layout_lookup_substitute_closure (hb_face_t    *face,
 #ifdef HB_NOT_IMPLEMENTED
 /* Note: You better have GDEF when using this API, or marks won't do much. */
 hb_bool_t
-hb_ot_layout_lookup_substitute (hb_font_t            *font,
+Xhb_ot_layout_lookup_substitute (hb_font_t            *font,
 				unsigned int          lookup_index,
 				const hb_ot_layout_glyph_sequence_t *sequence,
 				unsigned int          out_size,
@@ -278,7 +278,7 @@ hb_ot_layout_has_positioning (hb_face_t *face);
 #ifdef HB_NOT_IMPLEMENTED
 /* Note: You better have GDEF when using this API, or marks won't do much. */
 hb_bool_t
-hb_ot_layout_lookup_position (hb_font_t            *font,
+Xhb_ot_layout_lookup_position (hb_font_t            *font,
 			      unsigned int          lookup_index,
 			      const hb_ot_layout_glyph_sequence_t *sequence,
 			      hb_glyph_position_t  *positions /* IN / OUT */);
diff --git a/src/hb-shape-plan.h b/src/hb-shape-plan.h
index 1f74ba5..e4ea94b 100644
--- a/src/hb-shape-plan.h
+++ b/src/hb-shape-plan.h
@@ -82,7 +82,7 @@ hb_shape_plan_execute (hb_shape_plan_t    *shape_plan,
 
 #ifdef HB_NOT_IMPLEMENTED
 const char *
-hb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan);
+Xhb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan);
 #endif
 
 
commit eba312c8d1b2bbe8cb9b6414e843e78d2c521aa4
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Nov 16 12:39:23 2012 -0800

    Plumbing to get shape plan and font into complex decompose function
    
    So we can handle Sinhala split matras smartly...  Coming soon.

diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index 7645e22..6a17330 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -83,8 +83,8 @@ normalization_preference_default (const hb_segment_properties_t *props)
   return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
 }
 
-static hb_bool_t
-compose_default (hb_unicode_funcs_t *unicode,
+static bool
+compose_default (const hb_ot_shape_normalize_context_t *c,
 		 hb_codepoint_t  a,
 		 hb_codepoint_t  b,
 		 hb_codepoint_t *ab)
@@ -123,7 +123,7 @@ compose_default (hb_unicode_funcs_t *unicode,
     0xFB4A // TAV
   };
 
-  hb_bool_t found = unicode->compose (a, b, ab);
+  bool found = c->unicode->compose (a, b, ab);
 
   if (!found && (b & ~0x7F) == 0x0580) {
       // special-case Hebrew presentation forms that are excluded from
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index eaa77b7..b185824 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1278,8 +1278,8 @@ normalization_preference_indic (const hb_segment_properties_t *props)
   return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
 }
 
-static hb_bool_t
-decompose_indic (hb_unicode_funcs_t *unicode,
+static bool
+decompose_indic (const hb_ot_shape_normalize_context_t *c,
 		 hb_codepoint_t  ab,
 		 hb_codepoint_t *a,
 		 hb_codepoint_t *b)
@@ -1328,23 +1328,23 @@ decompose_indic (hb_unicode_funcs_t *unicode,
     case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;
   }
 
-  return unicode->decompose (ab, a, b);
+  return c->unicode->decompose (ab, a, b);
 }
 
-static hb_bool_t
-compose_indic (hb_unicode_funcs_t *unicode,
+static bool
+compose_indic (const hb_ot_shape_normalize_context_t *c,
 	       hb_codepoint_t  a,
 	       hb_codepoint_t  b,
 	       hb_codepoint_t *ab)
 {
   /* Avoid recomposing split matras. */
-  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (unicode->general_category (a)))
+  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
     return false;
 
   /* Composition-exclusion exceptions that we want to recompose. */
   if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
 
-  return unicode->compose (a, b, ab);
+  return c->unicode->compose (a, b, ab);
 }
 
 
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 9f8cecd..75dcc04 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -106,19 +106,19 @@ struct hb_ot_complex_shaper_t
    * Called during shape()'s normalization.
    * May be NULL.
    */
-  hb_bool_t (*decompose) (hb_unicode_funcs_t *unicode,
-			  hb_codepoint_t  ab,
-			  hb_codepoint_t *a,
-			  hb_codepoint_t *b);
+  bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
+		     hb_codepoint_t  ab,
+		     hb_codepoint_t *a,
+		     hb_codepoint_t *b);
 
   /* compose()
    * Called during shape()'s normalization.
    * May be NULL.
    */
-  hb_bool_t (*compose) (hb_unicode_funcs_t *unicode,
-			hb_codepoint_t  a,
-			hb_codepoint_t  b,
-			hb_codepoint_t *ab);
+  bool (*compose) (const hb_ot_shape_normalize_context_t *c,
+		   hb_codepoint_t  a,
+		   hb_codepoint_t  b,
+		   hb_codepoint_t *ab);
 
   /* setup_masks()
    * Called during shape().
diff --git a/src/hb-ot-shape-normalize-private.hh b/src/hb-ot-shape-normalize-private.hh
index 4b77699..8112f03 100644
--- a/src/hb-ot-shape-normalize-private.hh
+++ b/src/hb-ot-shape-normalize-private.hh
@@ -35,7 +35,7 @@
 /* buffer var allocations, used during the normalization process */
 #define glyph_index()	var1.u32
 
-struct hb_ot_complex_shaper_t;
+struct hb_ot_shape_plan_t;
 
 enum hb_ot_shape_normalization_mode_t {
   HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
@@ -46,8 +46,26 @@ enum hb_ot_shape_normalization_mode_t {
   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
 };
 
-HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_complex_shaper_t *shaper,
+HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,
 					 hb_buffer_t *buffer,
 					 hb_font_t *font);
 
+
+struct hb_ot_shape_normalize_context_t
+{
+  const hb_ot_shape_plan_t *plan;
+  hb_buffer_t *buffer;
+  hb_font_t *font;
+  hb_unicode_funcs_t *unicode;
+  bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
+		     hb_codepoint_t  ab,
+		     hb_codepoint_t *a,
+		     hb_codepoint_t *b);
+  bool (*compose) (const hb_ot_shape_normalize_context_t *c,
+		   hb_codepoint_t  a,
+		   hb_codepoint_t  b,
+		   hb_codepoint_t *ab);
+};
+
+
 #endif /* HB_OT_SHAPE_NORMALIZE_PRIVATE_HH */
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index df15f7d..c5325e4 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -82,22 +82,22 @@
  *     egrep  "`echo -n ';('; grep ';<' UnicodeData.txt | cut -d';' -f1 | tr '\n' '|'; echo ') '`" UnicodeData.txt
  */
 
-static hb_bool_t
-decompose_unicode (hb_unicode_funcs_t *unicode,
+static bool
+decompose_unicode (const hb_ot_shape_normalize_context_t *c,
 		   hb_codepoint_t  ab,
 		   hb_codepoint_t *a,
 		   hb_codepoint_t *b)
 {
-  return unicode->decompose (ab, a, b);
+  return c->unicode->decompose (ab, a, b);
 }
 
-static hb_bool_t
-compose_unicode (hb_unicode_funcs_t *unicode,
+static bool
+compose_unicode (const hb_ot_shape_normalize_context_t *c,
 		 hb_codepoint_t  a,
 		 hb_codepoint_t  b,
 		 hb_codepoint_t *ab)
 {
-  return unicode->compose (a, b, ab);
+  return c->unicode->compose (a, b, ab);
 }
 
 static inline void
@@ -127,27 +127,13 @@ skip_char (hb_buffer_t *buffer)
   buffer->skip_glyph ();
 }
 
-struct normalize_context_t
-{
-  hb_buffer_t *buffer;
-  hb_font_t *font;
-  hb_bool_t (*decompose) (hb_unicode_funcs_t *unicode,
-			  hb_codepoint_t  ab,
-			  hb_codepoint_t *a,
-			  hb_codepoint_t *b);
-  hb_bool_t (*compose) (hb_unicode_funcs_t *unicode,
-			hb_codepoint_t  a,
-			hb_codepoint_t  b,
-			hb_codepoint_t *ab);
-};
-
 /* Returns 0 if didn't decompose, number of resulting characters otherwise. */
 static inline unsigned int
-decompose (const normalize_context_t *c, bool shortest, hb_codepoint_t ab)
+decompose (const hb_ot_shape_normalize_context_t *c, bool shortest, hb_codepoint_t ab)
 {
   hb_codepoint_t a, b, a_glyph, b_glyph;
 
-  if (!c->decompose (c->buffer->unicode, ab, &a, &b) ||
+  if (!c->decompose (c, ab, &a, &b) ||
       (b && !c->font->get_glyph (b, 0, &b_glyph)))
     return 0;
 
@@ -185,7 +171,7 @@ decompose (const normalize_context_t *c, bool shortest, hb_codepoint_t ab)
 
 /* Returns 0 if didn't decompose, number of resulting characters otherwise. */
 static inline bool
-decompose_compatibility (const normalize_context_t *c, hb_codepoint_t u)
+decompose_compatibility (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t u)
 {
   unsigned int len, i;
   hb_codepoint_t decomposed[HB_UNICODE_MAX_DECOMPOSITION_LEN];
@@ -207,7 +193,7 @@ decompose_compatibility (const normalize_context_t *c, hb_codepoint_t u)
 
 /* Returns true if recomposition may be benefitial. */
 static inline bool
-decompose_current_character (const normalize_context_t *c, bool shortest)
+decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shortest)
 {
   hb_buffer_t * const buffer = c->buffer;
   hb_codepoint_t glyph;
@@ -233,7 +219,7 @@ decompose_current_character (const normalize_context_t *c, bool shortest)
 }
 
 static inline void
-handle_variation_selector_cluster (const normalize_context_t *c, unsigned int end)
+handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end)
 {
   hb_buffer_t * const buffer = c->buffer;
   for (; buffer->idx < end - 1;) {
@@ -254,7 +240,7 @@ handle_variation_selector_cluster (const normalize_context_t *c, unsigned int en
 
 /* Returns true if recomposition may be benefitial. */
 static inline bool
-decompose_multi_char_cluster (const normalize_context_t *c, unsigned int end)
+decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end)
 {
   hb_buffer_t * const buffer = c->buffer;
   /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
@@ -272,7 +258,7 @@ decompose_multi_char_cluster (const normalize_context_t *c, unsigned int end)
 }
 
 static inline bool
-decompose_cluster (const normalize_context_t *c, bool short_circuit, unsigned int end)
+decompose_cluster (const hb_ot_shape_normalize_context_t *c, bool short_circuit, unsigned int end)
 {
   if (likely (c->buffer->idx + 1 == end))
     return decompose_current_character (c, short_circuit);
@@ -292,18 +278,20 @@ compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 
 
 void
-_hb_ot_shape_normalize (const hb_ot_complex_shaper_t *shaper,
+_hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
 			hb_buffer_t *buffer,
 			hb_font_t *font)
 {
-  hb_ot_shape_normalization_mode_t mode = shaper->normalization_preference ?
-					  shaper->normalization_preference (&buffer->props) :
+  hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference ?
+					  plan->shaper->normalization_preference (&buffer->props) :
 					  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT;
-  const normalize_context_t c = {
+  const hb_ot_shape_normalize_context_t c = {
+    plan,
     buffer,
     font,
-    shaper->decompose ? shaper->decompose : decompose_unicode,
-    shaper->compose ? shaper->compose : compose_unicode
+    buffer->unicode,
+    plan->shaper->decompose ? plan->shaper->decompose : decompose_unicode,
+    plan->shaper->compose   ? plan->shaper->compose   : compose_unicode
   };
 
   bool short_circuit = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED &&
@@ -389,7 +377,7 @@ _hb_ot_shape_normalize (const hb_ot_complex_shaper_t *shaper,
 	(starter == buffer->out_len - 1 ||
 	 _hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_glyph_info_get_modified_combining_class (&buffer->cur())) &&
 	/* And compose. */
-	c.compose (buffer->unicode,
+	c.compose (&c,
 		   buffer->out_info[starter].codepoint,
 		   buffer->cur().codepoint,
 		   &composed) &&
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 450d609..8601127 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -362,7 +362,7 @@ hb_ot_substitute_default (hb_ot_shape_context_t *c)
 
   HB_BUFFER_ALLOCATE_VAR (c->buffer, glyph_index);
 
-  _hb_ot_shape_normalize (c->plan->shaper, c->buffer, c->font);
+  _hb_ot_shape_normalize (c->plan, c->buffer, c->font);
 
   hb_ot_shape_setup_masks (c);