[HarfBuzz] harfbuzz: Branch 'master' - 7 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Wed Aug 29 13:07:00 PDT 2012


 TODO                                 |    4 
 src/hb-buffer-private.hh             |    3 
 src/hb-buffer.cc                     |   16 +++
 src/hb-ot-layout-gsub-table.hh       |  148 ++++--------------------------
 src/hb-ot-layout-gsubgpos-private.hh |  172 +++++++++++++++++++++++++++++++----
 src/hb-ot-shape-complex-arabic.cc    |   72 +++++++++-----
 src/hb-ot-shape.cc                   |   25 ++---
 7 files changed, 260 insertions(+), 180 deletions(-)

New commits:
commit 965c280de09b49d711cb78d629da321c802084de
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Aug 29 13:59:16 2012 -0400

    Add HB_BUFFER_ASSERT_VAR
    
    To be used in places we access buffer vars...

diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 9864ca2..91e7560 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -129,6 +129,7 @@ struct hb_buffer_t {
 
   HB_INTERNAL void allocate_var (unsigned int byte_i, unsigned int count, const char *owner);
   HB_INTERNAL void deallocate_var (unsigned int byte_i, unsigned int count, const char *owner);
+  HB_INTERNAL void assert_var (unsigned int byte_i, unsigned int count, const char *owner);
   HB_INTERNAL void deallocate_var_all (void);
 
   HB_INTERNAL void add (hb_codepoint_t  codepoint,
@@ -198,6 +199,8 @@ struct hb_buffer_t {
 	HB_BUFFER_XALLOCATE_VAR (b, allocate_var, var (), #var)
 #define HB_BUFFER_DEALLOCATE_VAR(b, var) \
 	HB_BUFFER_XALLOCATE_VAR (b, deallocate_var, var (), #var)
+#define HB_BUFFER_ASSERT_VAR(b, var) \
+	HB_BUFFER_XALLOCATE_VAR (b, assert_var, var (), #var)
 
 
 #endif /* HB_BUFFER_PRIVATE_HH */
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index eddd5d0..e9bb15e 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -523,6 +523,22 @@ void hb_buffer_t::deallocate_var (unsigned int byte_i, unsigned int count, const
   }
 }
 
+void hb_buffer_t::assert_var (unsigned int byte_i, unsigned int count, const char *owner)
+{
+  if (DEBUG (BUFFER))
+    dump_var_allocation (this);
+
+  DEBUG_MSG (BUFFER, this,
+	     "Asserting var bytes %d..%d for %s",
+	     byte_i, byte_i + count - 1, owner);
+
+  assert (byte_i < 8 && byte_i + count <= 8);
+  for (unsigned int i = byte_i; i < byte_i + count; i++) {
+    assert (allocated_var_bytes[i]);
+    assert (0 == strcmp (allocated_var_owner[i], owner));
+  }
+}
+
 void hb_buffer_t::deallocate_var_all (void)
 {
   memset (allocated_var_bytes, 0, sizeof (allocated_var_bytes));
commit 0ccf9b64736559a230425fd131c9eb8aa3048221
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Aug 29 11:53:26 2012 -0400

    Move code around

diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 009a25d..473bc17 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -317,6 +317,16 @@ hb_ot_map_glyphs_fast (hb_buffer_t  *buffer)
 }
 
 static inline void
+hb_synthesize_glyph_classes (hb_ot_shape_context_t *c)
+{
+  unsigned int count = c->buffer->len;
+  for (unsigned int i = 0; i < count; i++)
+    c->buffer->info[i].glyph_props() = _hb_glyph_info_get_general_category (&c->buffer->info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ?
+				       HB_OT_LAYOUT_GLYPH_CLASS_MARK :
+				       HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH;
+}
+
+static inline void
 hb_ot_substitute_default (hb_ot_shape_context_t *c)
 {
   if (c->plan->shaper->preprocess_text) {
@@ -341,17 +351,6 @@ hb_ot_substitute_default (hb_ot_shape_context_t *c)
 }
 
 static inline void
-hb_synthesize_glyph_classes (hb_ot_shape_context_t *c)
-{
-  unsigned int count = c->buffer->len;
-  for (unsigned int i = 0; i < count; i++)
-    c->buffer->info[i].glyph_props() = _hb_glyph_info_get_general_category (&c->buffer->info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ?
-				       HB_OT_LAYOUT_GLYPH_CLASS_MARK :
-				       HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH;
-}
-
-
-static inline void
 hb_ot_substitute_complex (hb_ot_shape_context_t *c)
 {
   hb_ot_layout_substitute_start (c->font, c->buffer);
commit 2fcbbdb41a322f54b61d9ce983ab54434504c5ed
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Aug 29 11:11:54 2012 -0400

    Port Arabic fallback ligating to share code with GSUB
    
    This will eventually allow us to skip marks, as well as (fallback)
    attach marks to ligature components of fallback-shaped Arabic.
    That would be pretty cool.  I kludged GDEF props in, so mark-skipping
    works, but the produced ligature id/components will be cleared later
    by substitute_start() et al.
    
    Perhaps using a synthetic table for Arabic fallback shaping was a better
    idea.  The current approach has way too many layering violations...

diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 5815d5f..00bc563 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -134,6 +134,10 @@ struct hb_apply_context_t
 			has_glyph_classes (gdef.has_glyph_classes ()),
 			digest (*digest_) {}
 
+  void set_lookup_props (unsigned int lookup_props_) {
+    lookup_props = lookup_props_;
+  }
+
   void set_lookup (const Lookup &l) {
     lookup_props = l.get_props ();
   }
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index 857bf55..965947a 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -26,7 +26,7 @@
 
 #include "hb-ot-shape-complex-private.hh"
 #include "hb-ot-shape-private.hh"
-
+#include "hb-ot-layout-gsubgpos-private.hh"
 
 
 /* buffer var allocations */
@@ -88,17 +88,6 @@ static hb_codepoint_t get_arabic_shape (hb_codepoint_t u, unsigned int shape)
   return u;
 }
 
-static uint16_t get_ligature (hb_codepoint_t first, hb_codepoint_t second)
-{
-  if (unlikely (!second)) return 0;
-  for (unsigned i = 0; i < ARRAY_LENGTH (ligature_table); i++)
-    if (ligature_table[i].first == first)
-      for (unsigned j = 0; j < ARRAY_LENGTH (ligature_table[i].ligatures); j++)
-	if (ligature_table[i].ligatures[j].second == second)
-	  return ligature_table[i].ligatures[j].ligature;
-  return 0;
-}
-
 static const hb_tag_t arabic_features[] =
 {
   HB_TAG('i','n','i','t'),
@@ -257,20 +246,57 @@ arabic_fallback_shape (hb_font_t *font, hb_buffer_t *buffer)
       buffer->info[i].codepoint = shaped;
   }
 
+  OT::hb_apply_context_t c (font, buffer, 1/*global mask*/, NULL);
+  c.set_lookup_props (OT::LookupFlag::IgnoreMarks);
+
   /* Mandatory ligatures */
   buffer->clear_output ();
-  for (buffer->idx = 0; buffer->idx + 1 < count;) {
-    hb_codepoint_t ligature = get_ligature (buffer->cur().codepoint,
-					    buffer->cur(+1).codepoint);
-    if (likely (!ligature) || !(font->get_glyph (ligature, 0, &glyph))) {
-      buffer->next_glyph ();
-      continue;
+  for (buffer->idx = 0; buffer->idx + 1 < count;)
+  {
+    const unsigned int count = 2;
+    unsigned int end_offset;
+    bool is_mark_ligature;
+    unsigned int total_component_count;
+
+    bool matched = false;
+    for (unsigned i = 0; i < ARRAY_LENGTH (ligature_table); i++)
+    {
+      if (ligature_table[i].first != buffer->cur().codepoint)
+        continue;
+      for (unsigned j = 0; j < ARRAY_LENGTH (ligature_table[i].ligatures); j++)
+      {
+	OT::USHORT component;
+	component.set (ligature_table[i].ligatures[j].second);
+	hb_codepoint_t ligature = ligature_table[i].ligatures[j].ligature;
+	if (likely (!OT::match_input (&c, count,
+				      &component,
+				      OT::match_glyph,
+				      NULL,
+				      &end_offset,
+				      &is_mark_ligature,
+				      &total_component_count) ||
+		    !(font->get_glyph (ligature, 0, &glyph))))
+	  continue;
+
+	/* Deal, we are forming the ligature. */
+	buffer->merge_clusters (buffer->idx, buffer->idx + end_offset);
+
+	OT::ligate_input (&c,
+			  count,
+			  &component,
+			  ligature,
+			  OT::match_glyph,
+			  NULL,
+			  is_mark_ligature,
+			  total_component_count);
+	matched = true;
+	break;
+      }
+      if (matched)
+        break;
     }
-
-    buffer->replace_glyphs (2, 1, &ligature);
-
-    /* Technically speaking we can skip marks and stuff, like the GSUB path does.
-     * But who cares, we're in fallback! */
+    if (!matched)
+      buffer->next_glyph ();
   }
   for (; buffer->idx < count;)
       buffer->next_glyph ();
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 29076cf..009a25d 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -319,8 +319,10 @@ hb_ot_map_glyphs_fast (hb_buffer_t  *buffer)
 static inline void
 hb_ot_substitute_default (hb_ot_shape_context_t *c)
 {
-  if (c->plan->shaper->preprocess_text)
+  if (c->plan->shaper->preprocess_text) {
+    hb_synthesize_glyph_classes (c); /* XXX This is a hack for now. */
     c->plan->shaper->preprocess_text (c->plan, c->buffer, c->font);
+  }
 
   hb_ot_mirror_chars (c);
 
commit 5e399a8a45bddb49e06e2ca39df1ed04398c0aff
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Aug 29 10:40:49 2012 -0400

    Minor

diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index 2b5befa..8c68984 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -513,7 +513,6 @@ struct Ligature
 		  ligGlyph,
 		  match_glyph,
 		  NULL,
-		  end_offset,
 		  is_mark_ligature,
 		  total_component_count);
 
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 2b5c8f4..5815d5f 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -502,7 +502,6 @@ static inline void ligate_input (hb_apply_context_t *c,
 				 hb_codepoint_t lig_glyph,
 				 match_func_t match_func,
 				 const void *match_data,
-				 unsigned int end_offset,
 				 bool is_mark_ligature,
 				 unsigned int total_component_count)
 {
commit a177d027d1d0ad9539e30ed75d8652e0e8da20ff
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Aug 28 23:18:22 2012 -0400

    [GSUB] Move ligation logic over

diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index ac3e8a9..2b5befa 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -507,75 +507,15 @@ struct Ligature
     /* Deal, we are forming the ligature. */
     c->buffer->merge_clusters (c->buffer->idx, c->buffer->idx + end_offset);
 
-    /*
-     * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
-     *   the ligature to keep its old ligature id.  This will allow it to attach to
-     *   a base ligature in GPOS.  Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
-     *   and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
-     *   ligature id and component value of 2.  Then if SHADDA,FATHA form a ligature
-     *   later, we don't want them to lose their ligature id/component, otherwise
-     *   GPOS will fail to correctly position the mark ligature on top of the
-     *   LAM,LAM,HEH ligature.  See:
-     *     https://bugzilla.gnome.org/show_bug.cgi?id=676343
-     *
-     * - If a ligature is formed of components that some of which are also ligatures
-     *   themselves, and those ligature components had marks attached to *their*
-     *   components, we have to attach the marks to the new ligature component
-     *   positions!  Now *that*'s tricky!  And these marks may be following the
-     *   last component of the whole sequence, so we should loop forward looking
-     *   for them and update them.
-     *
-     *   Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
-     *   'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
-     *   id and component == 1.  Now, during 'liga', the LAM and the LAM-HEH ligature
-     *   form a LAM-LAM-HEH ligature.  We need to reassign the SHADDA and FATHA to
-     *   the new ligature with a component value of 2.
-     *
-     *   This in fact happened to a font...  See:
-     *   https://bugzilla.gnome.org/show_bug.cgi?id=437633
-     */
-
-    unsigned int klass = is_mark_ligature ? 0 : HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE;
-    unsigned int lig_id = is_mark_ligature ? 0 : allocate_lig_id (c->buffer);
-    unsigned int last_lig_id = get_lig_id (c->buffer->cur());
-    unsigned int last_num_components = get_lig_num_comps (c->buffer->cur());
-    unsigned int components_so_far = last_num_components;
-
-    if (!is_mark_ligature)
-      set_lig_props_for_ligature (c->buffer->cur(), lig_id, total_component_count);
-    c->replace_glyph (ligGlyph, klass);
-
-    for (unsigned int i = 1; i < count; i++)
-    {
-      while (c->should_mark_skip_current_glyph ())
-      {
-	if (!is_mark_ligature) {
-	  unsigned int new_lig_comp = components_so_far - last_num_components +
-				      MIN (MAX (get_lig_comp (c->buffer->cur()), 1u), last_num_components);
-	  set_lig_props_for_mark (c->buffer->cur(), lig_id, new_lig_comp);
-	}
-	c->buffer->next_glyph ();
-      }
-
-      last_lig_id = get_lig_id (c->buffer->cur());
-      last_num_components = get_lig_num_comps (c->buffer->cur());
-      components_so_far += last_num_components;
-
-      /* Skip the base glyph */
-      c->buffer->idx++;
-    }
-
-    if (!is_mark_ligature && last_lig_id) {
-      /* Re-adjust components for any marks following. */
-      for (unsigned int i = c->buffer->idx; i < c->buffer->len; i++) {
-	if (last_lig_id == get_lig_id (c->buffer->info[i])) {
-	  unsigned int new_lig_comp = components_so_far - last_num_components +
-				      MIN (MAX (get_lig_comp (c->buffer->info[i]), 1u), last_num_components);
-	  set_lig_props_for_mark (c->buffer->info[i], lig_id, new_lig_comp);
-	} else
-	  break;
-      }
-    }
+    ligate_input (c,
+		  count,
+		  &component[1],
+		  ligGlyph,
+		  match_glyph,
+		  NULL,
+		  end_offset,
+		  is_mark_ligature,
+		  total_component_count);
 
     return TRACE_RETURN (true);
   }
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 3d69736..2b5c8f4 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -496,6 +496,86 @@ static inline bool match_input (hb_apply_context_t *c,
 
   return true;
 }
+static inline void ligate_input (hb_apply_context_t *c,
+				 unsigned int count, /* Including the first glyph (not matched) */
+				 const USHORT input[], /* Array of input values--start with second glyph */
+				 hb_codepoint_t lig_glyph,
+				 match_func_t match_func,
+				 const void *match_data,
+				 unsigned int end_offset,
+				 bool is_mark_ligature,
+				 unsigned int total_component_count)
+{
+  /*
+   * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
+   *   the ligature to keep its old ligature id.  This will allow it to attach to
+   *   a base ligature in GPOS.  Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
+   *   and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
+   *   ligature id and component value of 2.  Then if SHADDA,FATHA form a ligature
+   *   later, we don't want them to lose their ligature id/component, otherwise
+   *   GPOS will fail to correctly position the mark ligature on top of the
+   *   LAM,LAM,HEH ligature.  See:
+   *     https://bugzilla.gnome.org/show_bug.cgi?id=676343
+   *
+   * - If a ligature is formed of components that some of which are also ligatures
+   *   themselves, and those ligature components had marks attached to *their*
+   *   components, we have to attach the marks to the new ligature component
+   *   positions!  Now *that*'s tricky!  And these marks may be following the
+   *   last component of the whole sequence, so we should loop forward looking
+   *   for them and update them.
+   *
+   *   Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
+   *   'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
+   *   id and component == 1.  Now, during 'liga', the LAM and the LAM-HEH ligature
+   *   form a LAM-LAM-HEH ligature.  We need to reassign the SHADDA and FATHA to
+   *   the new ligature with a component value of 2.
+   *
+   *   This in fact happened to a font...  See:
+   *   https://bugzilla.gnome.org/show_bug.cgi?id=437633
+   */
+
+  unsigned int klass = is_mark_ligature ? 0 : HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE;
+  unsigned int lig_id = is_mark_ligature ? 0 : allocate_lig_id (c->buffer);
+  unsigned int last_lig_id = get_lig_id (c->buffer->cur());
+  unsigned int last_num_components = get_lig_num_comps (c->buffer->cur());
+  unsigned int components_so_far = last_num_components;
+
+  if (!is_mark_ligature)
+    set_lig_props_for_ligature (c->buffer->cur(), lig_id, total_component_count);
+  c->replace_glyph (lig_glyph, klass);
+
+  for (unsigned int i = 1; i < count; i++)
+  {
+    while (c->should_mark_skip_current_glyph ())
+    {
+      if (!is_mark_ligature) {
+	unsigned int new_lig_comp = components_so_far - last_num_components +
+				    MIN (MAX (get_lig_comp (c->buffer->cur()), 1u), last_num_components);
+	set_lig_props_for_mark (c->buffer->cur(), lig_id, new_lig_comp);
+      }
+      c->buffer->next_glyph ();
+    }
+
+    last_lig_id = get_lig_id (c->buffer->cur());
+    last_num_components = get_lig_num_comps (c->buffer->cur());
+    components_so_far += last_num_components;
+
+    /* Skip the base glyph */
+    c->buffer->idx++;
+  }
+
+  if (!is_mark_ligature && last_lig_id) {
+    /* Re-adjust components for any marks following. */
+    for (unsigned int i = c->buffer->idx; i < c->buffer->len; i++) {
+      if (last_lig_id == get_lig_id (c->buffer->info[i])) {
+	unsigned int new_lig_comp = components_so_far - last_num_components +
+				    MIN (MAX (get_lig_comp (c->buffer->info[i]), 1u), last_num_components);
+	set_lig_props_for_mark (c->buffer->info[i], lig_id, new_lig_comp);
+      } else
+	break;
+    }
+  }
+}
 
 static inline bool match_backtrack (hb_apply_context_t *c,
 				    unsigned int count,
commit 191fa885d9e0a2dce92dd8727cddd18495e62409
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Aug 28 22:58:55 2012 -0400

    [GSUB] Merge Ligature and context input matching
    
    Looks better now...

diff --git a/TODO b/TODO
index 441eb4d..1ca2480 100644
--- a/TODO
+++ b/TODO
@@ -1,10 +1,6 @@
 General fixes:
 =============
 
-- Ligature matching and match_input() of (Chain)Context should use the
-  same logic.  Right now the Ligature logic is more involved.  Possibly
-  merge, or duplicate.
-
 - mask propagation? (when ligation, "or" the masks).
 
 - Warn at compile time (and runtime with HB_DEBUG?) if no Unicode / font
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index c85df46..ac3e8a9 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -491,66 +491,21 @@ struct Ligature
     unsigned int count = component.len;
     if (unlikely (count < 1)) return TRACE_RETURN (false);
 
-    hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
-    if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
-
-    /*
-     * This is perhaps the trickiest part of OpenType...  Remarks:
-     *
-     * - If all components of the ligature were marks, we call this a mark ligature.
-     *
-     * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
-     *   it as a ligature glyph.
-     *
-     * - Ligatures cannot be formed across glyphs attached to different components
-     *   of previous ligatures.  Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
-     *   LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
-     *   However, it would be wrong to ligate that SHADDA,FATHA sequence.o
-     *   There is an exception to this: If a ligature tries ligating with marks that
-     *   belong to it itself, go ahead, assuming that the font designer knows what
-     *   they are doing (otherwise it can break Indic stuff when a matra wants to
-     *   ligate with a conjunct...)
-     */
-
-    bool is_mark_ligature = !!(c->property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
-
-    unsigned int total_component_count = 0;
-    total_component_count += get_lig_num_comps (c->buffer->cur());
-
-    unsigned int first_lig_id = get_lig_id (c->buffer->cur());
-    unsigned int first_lig_comp = get_lig_comp (c->buffer->cur());
-
-    for (unsigned int i = 1; i < count; i++)
-    {
-      unsigned int property;
-
-      if (!skippy_iter.next (&property)) return TRACE_RETURN (false);
-
-      if (likely (c->buffer->info[skippy_iter.idx].codepoint != component[i])) return TRACE_RETURN (false);
-
-      unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
-      unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
-
-      if (first_lig_id && first_lig_comp) {
-        /* If first component was attached to a previous ligature component,
-	 * all subsequent components should be attached to the same ligature
-	 * component, otherwise we shouldn't ligate them. */
-        if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp)
-	  return TRACE_RETURN (false);
-      } else {
-        /* If first component was NOT attached to a previous ligature component,
-	 * all subsequent components should also NOT be attached to any ligature
-	 * component, unless they are attached to the first component itself! */
-        if (this_lig_id && this_lig_comp && (this_lig_id != first_lig_id))
-	  return TRACE_RETURN (false);
-      }
-
-      is_mark_ligature = is_mark_ligature && (property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
-      total_component_count += get_lig_num_comps (c->buffer->info[skippy_iter.idx]);
-    }
+    unsigned int end_offset;
+    bool is_mark_ligature;
+    unsigned int total_component_count;
+
+    if (likely (!match_input (c, count,
+			      &component[1],
+			      match_glyph,
+			      NULL,
+			      &end_offset,
+			      &is_mark_ligature,
+			      &total_component_count)))
+      return TRACE_RETURN (false);
 
     /* Deal, we are forming the ligature. */
-    c->buffer->merge_clusters (c->buffer->idx, skippy_iter.idx + 1);
+    c->buffer->merge_clusters (c->buffer->idx, c->buffer->idx + end_offset);
 
     /*
      * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 5032d30..3d69736 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -421,13 +421,33 @@ static inline bool match_input (hb_apply_context_t *c,
 				const USHORT input[], /* Array of input values--start with second glyph */
 				match_func_t match_func,
 				const void *match_data,
-				unsigned int *end_offset = NULL)
+				unsigned int *end_offset = NULL,
+				bool *p_is_mark_ligature = NULL,
+				unsigned int *p_total_component_count = NULL)
 {
   hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
 
   hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
   if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
 
+  /*
+   * This is perhaps the trickiest part of OpenType...  Remarks:
+   *
+   * - If all components of the ligature were marks, we call this a mark ligature.
+   *
+   * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
+   *   it as a ligature glyph.
+   *
+   * - Ligatures cannot be formed across glyphs attached to different components
+   *   of previous ligatures.  Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
+   *   LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
+   *   However, it would be wrong to ligate that SHADDA,FATHA sequence.o
+   *   There is an exception to this: If a ligature tries ligating with marks that
+   *   belong to it itself, go ahead, assuming that the font designer knows what
+   *   they are doing (otherwise it can break Indic stuff when a matra wants to
+   *   ligate with a conjunct...)
+   */
+
   bool is_mark_ligature = !!(c->property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
 
   unsigned int total_component_count = 0;
@@ -443,7 +463,6 @@ static inline bool match_input (hb_apply_context_t *c,
     if (!skippy_iter.next (&property)) return TRACE_RETURN (false);
 
     if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data))) return false;
-//    if (likely (c->buffer->info[skippy_iter.idx].codepoint != component[i])) return TRACE_RETURN (false);
 
     unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
     unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
@@ -469,6 +488,12 @@ static inline bool match_input (hb_apply_context_t *c,
   if (end_offset)
     *end_offset = skippy_iter.idx - c->buffer->idx + 1;
 
+  if (p_is_mark_ligature)
+    *p_is_mark_ligature = is_mark_ligature;
+
+  if (p_total_component_count)
+    *p_total_component_count = total_component_count;
+
   return true;
 }
 
commit 93814ca7dc2a7251f861c1c47ba155ba6e6bdf19
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Aug 28 22:24:51 2012 -0400

    Start converging Ligature and match_input

diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index a14db14..c85df46 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -495,38 +495,12 @@ struct Ligature
     if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
 
     /*
-     * This is perhaps the trickiest part of GSUB...  Remarks:
+     * This is perhaps the trickiest part of OpenType...  Remarks:
      *
      * - If all components of the ligature were marks, we call this a mark ligature.
      *
      * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
-     *   it as a ligature glyph.  Though, really, this will not really be used...
-     *
-     * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
-     *   the ligature to keep its old ligature id.  This will allow it to attach to
-     *   a base ligature in GPOS.  Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
-     *   and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
-     *   ligature id and component value of 2.  Then if SHADDA,FATHA form a ligature
-     *   later, we don't want them to lose their ligature id/component, otherwise
-     *   GPOS will fail to correctly position the mark ligature on top of the
-     *   LAM,LAM,HEH ligature.  See:
-     *     https://bugzilla.gnome.org/show_bug.cgi?id=676343
-     *
-     * - If a ligature is formed of components that some of which are also ligatures
-     *   themselves, and those ligature components had marks attached to *their*
-     *   components, we have to attach the marks to the new ligature component
-     *   positions!  Now *that*'s tricky!  And these marks may be following the
-     *   last component of the whole sequence, so we should loop forward looking
-     *   for them and update them.
-     *
-     *   Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
-     *   'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
-     *   id and component == 1.  Now, during 'liga', the LAM and the LAM-HEH ligature
-     *   form a LAM-LAM-HEH ligature.  We need to reassign the SHADDA and FATHA to
-     *   the new ligature with a component value of 2.
-     *
-     *   This in fact happened to a font...  See:
-     *   https://bugzilla.gnome.org/show_bug.cgi?id=437633
+     *   it as a ligature glyph.
      *
      * - Ligatures cannot be formed across glyphs attached to different components
      *   of previous ligatures.  Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
@@ -578,6 +552,34 @@ struct Ligature
     /* Deal, we are forming the ligature. */
     c->buffer->merge_clusters (c->buffer->idx, skippy_iter.idx + 1);
 
+    /*
+     * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
+     *   the ligature to keep its old ligature id.  This will allow it to attach to
+     *   a base ligature in GPOS.  Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
+     *   and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
+     *   ligature id and component value of 2.  Then if SHADDA,FATHA form a ligature
+     *   later, we don't want them to lose their ligature id/component, otherwise
+     *   GPOS will fail to correctly position the mark ligature on top of the
+     *   LAM,LAM,HEH ligature.  See:
+     *     https://bugzilla.gnome.org/show_bug.cgi?id=676343
+     *
+     * - If a ligature is formed of components that some of which are also ligatures
+     *   themselves, and those ligature components had marks attached to *their*
+     *   components, we have to attach the marks to the new ligature component
+     *   positions!  Now *that*'s tricky!  And these marks may be following the
+     *   last component of the whole sequence, so we should loop forward looking
+     *   for them and update them.
+     *
+     *   Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
+     *   'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
+     *   id and component == 1.  Now, during 'liga', the LAM and the LAM-HEH ligature
+     *   form a LAM-LAM-HEH ligature.  We need to reassign the SHADDA and FATHA to
+     *   the new ligature with a component value of 2.
+     *
+     *   This in fact happened to a font...  See:
+     *   https://bugzilla.gnome.org/show_bug.cgi?id=437633
+     */
+
     unsigned int klass = is_mark_ligature ? 0 : HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE;
     unsigned int lig_id = is_mark_ligature ? 0 : allocate_lig_id (c->buffer);
     unsigned int last_lig_id = get_lig_id (c->buffer->cur());
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index de56402..5032d30 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -423,17 +423,47 @@ static inline bool match_input (hb_apply_context_t *c,
 				const void *match_data,
 				unsigned int *end_offset = NULL)
 {
+  hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
+
   hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
-  if (skippy_iter.has_no_chance ())
-    return false;
+  if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
+
+  bool is_mark_ligature = !!(c->property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
+
+  unsigned int total_component_count = 0;
+  total_component_count += get_lig_num_comps (c->buffer->cur());
+
+  unsigned int first_lig_id = get_lig_id (c->buffer->cur());
+  unsigned int first_lig_comp = get_lig_comp (c->buffer->cur());
 
   for (unsigned int i = 1; i < count; i++)
   {
-    if (!skippy_iter.next ())
-      return false;
+    unsigned int property;
 
-    if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data)))
-      return false;
+    if (!skippy_iter.next (&property)) return TRACE_RETURN (false);
+
+    if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data))) return false;
+//    if (likely (c->buffer->info[skippy_iter.idx].codepoint != component[i])) return TRACE_RETURN (false);
+
+    unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
+    unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
+
+    if (first_lig_id && first_lig_comp) {
+      /* If first component was attached to a previous ligature component,
+       * all subsequent components should be attached to the same ligature
+       * component, otherwise we shouldn't ligate them. */
+      if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp)
+	return TRACE_RETURN (false);
+    } else {
+      /* If first component was NOT attached to a previous ligature component,
+       * all subsequent components should also NOT be attached to any ligature
+       * component, unless they are attached to the first component itself! */
+      if (this_lig_id && this_lig_comp && (this_lig_id != first_lig_id))
+	return TRACE_RETURN (false);
+    }
+
+    is_mark_ligature = is_mark_ligature && (property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
+    total_component_count += get_lig_num_comps (c->buffer->info[skippy_iter.idx]);
   }
 
   if (end_offset)
@@ -448,20 +478,22 @@ static inline bool match_backtrack (hb_apply_context_t *c,
 				    match_func_t match_func,
 				    const void *match_data)
 {
+  hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
+
   hb_apply_context_t::mark_skipping_backward_iterator_t skippy_iter (c, c->buffer->backtrack_len (), count, true);
   if (skippy_iter.has_no_chance ())
-    return false;
+    return TRACE_RETURN (false);
 
   for (unsigned int i = 0; i < count; i++)
   {
     if (!skippy_iter.prev ())
-      return false;
+      return TRACE_RETURN (false);
 
     if (likely (!match_func (c->buffer->out_info[skippy_iter.idx].codepoint, backtrack[i], match_data)))
-      return false;
+      return TRACE_RETURN (false);
   }
 
-  return true;
+  return TRACE_RETURN (true);
 }
 
 static inline bool match_lookahead (hb_apply_context_t *c,
@@ -471,20 +503,22 @@ static inline bool match_lookahead (hb_apply_context_t *c,
 				    const void *match_data,
 				    unsigned int offset)
 {
+  hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
+
   hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx + offset - 1, count, true);
   if (skippy_iter.has_no_chance ())
-    return false;
+    return TRACE_RETURN (false);
 
   for (unsigned int i = 0; i < count; i++)
   {
     if (!skippy_iter.next ())
-      return false;
+      return TRACE_RETURN (false);
 
     if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, lookahead[i], match_data)))
-      return false;
+      return TRACE_RETURN (false);
   }
 
-  return true;
+  return TRACE_RETURN (true);
 }
 
 



More information about the HarfBuzz mailing list