[HarfBuzz] harfbuzz: Branch 'master' - 7 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Wed Aug 29 13:07:00 PDT 2012
TODO | 4
src/hb-buffer-private.hh | 3
src/hb-buffer.cc | 16 +++
src/hb-ot-layout-gsub-table.hh | 148 ++++--------------------------
src/hb-ot-layout-gsubgpos-private.hh | 172 +++++++++++++++++++++++++++++++----
src/hb-ot-shape-complex-arabic.cc | 72 +++++++++-----
src/hb-ot-shape.cc | 25 ++---
7 files changed, 260 insertions(+), 180 deletions(-)
New commits:
commit 965c280de09b49d711cb78d629da321c802084de
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Aug 29 13:59:16 2012 -0400
Add HB_BUFFER_ASSERT_VAR
To be used in places we access buffer vars...
diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 9864ca2..91e7560 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -129,6 +129,7 @@ struct hb_buffer_t {
HB_INTERNAL void allocate_var (unsigned int byte_i, unsigned int count, const char *owner);
HB_INTERNAL void deallocate_var (unsigned int byte_i, unsigned int count, const char *owner);
+ HB_INTERNAL void assert_var (unsigned int byte_i, unsigned int count, const char *owner);
HB_INTERNAL void deallocate_var_all (void);
HB_INTERNAL void add (hb_codepoint_t codepoint,
@@ -198,6 +199,8 @@ struct hb_buffer_t {
HB_BUFFER_XALLOCATE_VAR (b, allocate_var, var (), #var)
#define HB_BUFFER_DEALLOCATE_VAR(b, var) \
HB_BUFFER_XALLOCATE_VAR (b, deallocate_var, var (), #var)
+#define HB_BUFFER_ASSERT_VAR(b, var) \
+ HB_BUFFER_XALLOCATE_VAR (b, assert_var, var (), #var)
#endif /* HB_BUFFER_PRIVATE_HH */
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index eddd5d0..e9bb15e 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -523,6 +523,22 @@ void hb_buffer_t::deallocate_var (unsigned int byte_i, unsigned int count, const
}
}
+void hb_buffer_t::assert_var (unsigned int byte_i, unsigned int count, const char *owner)
+{
+ if (DEBUG (BUFFER))
+ dump_var_allocation (this);
+
+ DEBUG_MSG (BUFFER, this,
+ "Asserting var bytes %d..%d for %s",
+ byte_i, byte_i + count - 1, owner);
+
+ assert (byte_i < 8 && byte_i + count <= 8);
+ for (unsigned int i = byte_i; i < byte_i + count; i++) {
+ assert (allocated_var_bytes[i]);
+ assert (0 == strcmp (allocated_var_owner[i], owner));
+ }
+}
+
void hb_buffer_t::deallocate_var_all (void)
{
memset (allocated_var_bytes, 0, sizeof (allocated_var_bytes));
commit 0ccf9b64736559a230425fd131c9eb8aa3048221
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Aug 29 11:53:26 2012 -0400
Move code around
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 009a25d..473bc17 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -317,6 +317,16 @@ hb_ot_map_glyphs_fast (hb_buffer_t *buffer)
}
static inline void
+hb_synthesize_glyph_classes (hb_ot_shape_context_t *c)
+{
+ unsigned int count = c->buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ c->buffer->info[i].glyph_props() = _hb_glyph_info_get_general_category (&c->buffer->info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ?
+ HB_OT_LAYOUT_GLYPH_CLASS_MARK :
+ HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH;
+}
+
+static inline void
hb_ot_substitute_default (hb_ot_shape_context_t *c)
{
if (c->plan->shaper->preprocess_text) {
@@ -341,17 +351,6 @@ hb_ot_substitute_default (hb_ot_shape_context_t *c)
}
static inline void
-hb_synthesize_glyph_classes (hb_ot_shape_context_t *c)
-{
- unsigned int count = c->buffer->len;
- for (unsigned int i = 0; i < count; i++)
- c->buffer->info[i].glyph_props() = _hb_glyph_info_get_general_category (&c->buffer->info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ?
- HB_OT_LAYOUT_GLYPH_CLASS_MARK :
- HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH;
-}
-
-
-static inline void
hb_ot_substitute_complex (hb_ot_shape_context_t *c)
{
hb_ot_layout_substitute_start (c->font, c->buffer);
commit 2fcbbdb41a322f54b61d9ce983ab54434504c5ed
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Aug 29 11:11:54 2012 -0400
Port Arabic fallback ligating to share code with GSUB
This will eventually allow us to skip marks, as well as (fallback)
attach marks to ligature components of fallback-shaped Arabic.
That would be pretty cool. I kludged GDEF props in, so mark-skipping
works, but the produced ligature id/components will be cleared later
by substitute_start() et al.
Perhaps using a synthetic table for Arabic fallback shaping was a better
idea. The current approach has way too many layering violations...
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 5815d5f..00bc563 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -134,6 +134,10 @@ struct hb_apply_context_t
has_glyph_classes (gdef.has_glyph_classes ()),
digest (*digest_) {}
+ void set_lookup_props (unsigned int lookup_props_) {
+ lookup_props = lookup_props_;
+ }
+
void set_lookup (const Lookup &l) {
lookup_props = l.get_props ();
}
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index 857bf55..965947a 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -26,7 +26,7 @@
#include "hb-ot-shape-complex-private.hh"
#include "hb-ot-shape-private.hh"
-
+#include "hb-ot-layout-gsubgpos-private.hh"
/* buffer var allocations */
@@ -88,17 +88,6 @@ static hb_codepoint_t get_arabic_shape (hb_codepoint_t u, unsigned int shape)
return u;
}
-static uint16_t get_ligature (hb_codepoint_t first, hb_codepoint_t second)
-{
- if (unlikely (!second)) return 0;
- for (unsigned i = 0; i < ARRAY_LENGTH (ligature_table); i++)
- if (ligature_table[i].first == first)
- for (unsigned j = 0; j < ARRAY_LENGTH (ligature_table[i].ligatures); j++)
- if (ligature_table[i].ligatures[j].second == second)
- return ligature_table[i].ligatures[j].ligature;
- return 0;
-}
-
static const hb_tag_t arabic_features[] =
{
HB_TAG('i','n','i','t'),
@@ -257,20 +246,57 @@ arabic_fallback_shape (hb_font_t *font, hb_buffer_t *buffer)
buffer->info[i].codepoint = shaped;
}
+ OT::hb_apply_context_t c (font, buffer, 1/*global mask*/, NULL);
+ c.set_lookup_props (OT::LookupFlag::IgnoreMarks);
+
/* Mandatory ligatures */
buffer->clear_output ();
- for (buffer->idx = 0; buffer->idx + 1 < count;) {
- hb_codepoint_t ligature = get_ligature (buffer->cur().codepoint,
- buffer->cur(+1).codepoint);
- if (likely (!ligature) || !(font->get_glyph (ligature, 0, &glyph))) {
- buffer->next_glyph ();
- continue;
+ for (buffer->idx = 0; buffer->idx + 1 < count;)
+ {
+ const unsigned int count = 2;
+ unsigned int end_offset;
+ bool is_mark_ligature;
+ unsigned int total_component_count;
+
+ bool matched = false;
+ for (unsigned i = 0; i < ARRAY_LENGTH (ligature_table); i++)
+ {
+ if (ligature_table[i].first != buffer->cur().codepoint)
+ continue;
+ for (unsigned j = 0; j < ARRAY_LENGTH (ligature_table[i].ligatures); j++)
+ {
+ OT::USHORT component;
+ component.set (ligature_table[i].ligatures[j].second);
+ hb_codepoint_t ligature = ligature_table[i].ligatures[j].ligature;
+ if (likely (!OT::match_input (&c, count,
+ &component,
+ OT::match_glyph,
+ NULL,
+ &end_offset,
+ &is_mark_ligature,
+ &total_component_count) ||
+ !(font->get_glyph (ligature, 0, &glyph))))
+ continue;
+
+ /* Deal, we are forming the ligature. */
+ buffer->merge_clusters (buffer->idx, buffer->idx + end_offset);
+
+ OT::ligate_input (&c,
+ count,
+ &component,
+ ligature,
+ OT::match_glyph,
+ NULL,
+ is_mark_ligature,
+ total_component_count);
+ matched = true;
+ break;
+ }
+ if (matched)
+ break;
}
-
- buffer->replace_glyphs (2, 1, &ligature);
-
- /* Technically speaking we can skip marks and stuff, like the GSUB path does.
- * But who cares, we're in fallback! */
+ if (!matched)
+ buffer->next_glyph ();
}
for (; buffer->idx < count;)
buffer->next_glyph ();
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 29076cf..009a25d 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -319,8 +319,10 @@ hb_ot_map_glyphs_fast (hb_buffer_t *buffer)
static inline void
hb_ot_substitute_default (hb_ot_shape_context_t *c)
{
- if (c->plan->shaper->preprocess_text)
+ if (c->plan->shaper->preprocess_text) {
+ hb_synthesize_glyph_classes (c); /* XXX This is a hack for now. */
c->plan->shaper->preprocess_text (c->plan, c->buffer, c->font);
+ }
hb_ot_mirror_chars (c);
commit 5e399a8a45bddb49e06e2ca39df1ed04398c0aff
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Aug 29 10:40:49 2012 -0400
Minor
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index 2b5befa..8c68984 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -513,7 +513,6 @@ struct Ligature
ligGlyph,
match_glyph,
NULL,
- end_offset,
is_mark_ligature,
total_component_count);
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 2b5c8f4..5815d5f 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -502,7 +502,6 @@ static inline void ligate_input (hb_apply_context_t *c,
hb_codepoint_t lig_glyph,
match_func_t match_func,
const void *match_data,
- unsigned int end_offset,
bool is_mark_ligature,
unsigned int total_component_count)
{
commit a177d027d1d0ad9539e30ed75d8652e0e8da20ff
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Aug 28 23:18:22 2012 -0400
[GSUB] Move ligation logic over
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index ac3e8a9..2b5befa 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -507,75 +507,15 @@ struct Ligature
/* Deal, we are forming the ligature. */
c->buffer->merge_clusters (c->buffer->idx, c->buffer->idx + end_offset);
- /*
- * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
- * the ligature to keep its old ligature id. This will allow it to attach to
- * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
- * and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
- * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature
- * later, we don't want them to lose their ligature id/component, otherwise
- * GPOS will fail to correctly position the mark ligature on top of the
- * LAM,LAM,HEH ligature. See:
- * https://bugzilla.gnome.org/show_bug.cgi?id=676343
- *
- * - If a ligature is formed of components that some of which are also ligatures
- * themselves, and those ligature components had marks attached to *their*
- * components, we have to attach the marks to the new ligature component
- * positions! Now *that*'s tricky! And these marks may be following the
- * last component of the whole sequence, so we should loop forward looking
- * for them and update them.
- *
- * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
- * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
- * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature
- * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to
- * the new ligature with a component value of 2.
- *
- * This in fact happened to a font... See:
- * https://bugzilla.gnome.org/show_bug.cgi?id=437633
- */
-
- unsigned int klass = is_mark_ligature ? 0 : HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE;
- unsigned int lig_id = is_mark_ligature ? 0 : allocate_lig_id (c->buffer);
- unsigned int last_lig_id = get_lig_id (c->buffer->cur());
- unsigned int last_num_components = get_lig_num_comps (c->buffer->cur());
- unsigned int components_so_far = last_num_components;
-
- if (!is_mark_ligature)
- set_lig_props_for_ligature (c->buffer->cur(), lig_id, total_component_count);
- c->replace_glyph (ligGlyph, klass);
-
- for (unsigned int i = 1; i < count; i++)
- {
- while (c->should_mark_skip_current_glyph ())
- {
- if (!is_mark_ligature) {
- unsigned int new_lig_comp = components_so_far - last_num_components +
- MIN (MAX (get_lig_comp (c->buffer->cur()), 1u), last_num_components);
- set_lig_props_for_mark (c->buffer->cur(), lig_id, new_lig_comp);
- }
- c->buffer->next_glyph ();
- }
-
- last_lig_id = get_lig_id (c->buffer->cur());
- last_num_components = get_lig_num_comps (c->buffer->cur());
- components_so_far += last_num_components;
-
- /* Skip the base glyph */
- c->buffer->idx++;
- }
-
- if (!is_mark_ligature && last_lig_id) {
- /* Re-adjust components for any marks following. */
- for (unsigned int i = c->buffer->idx; i < c->buffer->len; i++) {
- if (last_lig_id == get_lig_id (c->buffer->info[i])) {
- unsigned int new_lig_comp = components_so_far - last_num_components +
- MIN (MAX (get_lig_comp (c->buffer->info[i]), 1u), last_num_components);
- set_lig_props_for_mark (c->buffer->info[i], lig_id, new_lig_comp);
- } else
- break;
- }
- }
+ ligate_input (c,
+ count,
+ &component[1],
+ ligGlyph,
+ match_glyph,
+ NULL,
+ end_offset,
+ is_mark_ligature,
+ total_component_count);
return TRACE_RETURN (true);
}
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 3d69736..2b5c8f4 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -496,6 +496,86 @@ static inline bool match_input (hb_apply_context_t *c,
return true;
}
+static inline void ligate_input (hb_apply_context_t *c,
+ unsigned int count, /* Including the first glyph (not matched) */
+ const USHORT input[], /* Array of input values--start with second glyph */
+ hb_codepoint_t lig_glyph,
+ match_func_t match_func,
+ const void *match_data,
+ unsigned int end_offset,
+ bool is_mark_ligature,
+ unsigned int total_component_count)
+{
+ /*
+ * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
+ * the ligature to keep its old ligature id. This will allow it to attach to
+ * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
+ * and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
+ * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature
+ * later, we don't want them to lose their ligature id/component, otherwise
+ * GPOS will fail to correctly position the mark ligature on top of the
+ * LAM,LAM,HEH ligature. See:
+ * https://bugzilla.gnome.org/show_bug.cgi?id=676343
+ *
+ * - If a ligature is formed of components that some of which are also ligatures
+ * themselves, and those ligature components had marks attached to *their*
+ * components, we have to attach the marks to the new ligature component
+ * positions! Now *that*'s tricky! And these marks may be following the
+ * last component of the whole sequence, so we should loop forward looking
+ * for them and update them.
+ *
+ * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
+ * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
+ * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature
+ * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to
+ * the new ligature with a component value of 2.
+ *
+ * This in fact happened to a font... See:
+ * https://bugzilla.gnome.org/show_bug.cgi?id=437633
+ */
+
+ unsigned int klass = is_mark_ligature ? 0 : HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE;
+ unsigned int lig_id = is_mark_ligature ? 0 : allocate_lig_id (c->buffer);
+ unsigned int last_lig_id = get_lig_id (c->buffer->cur());
+ unsigned int last_num_components = get_lig_num_comps (c->buffer->cur());
+ unsigned int components_so_far = last_num_components;
+
+ if (!is_mark_ligature)
+ set_lig_props_for_ligature (c->buffer->cur(), lig_id, total_component_count);
+ c->replace_glyph (lig_glyph, klass);
+
+ for (unsigned int i = 1; i < count; i++)
+ {
+ while (c->should_mark_skip_current_glyph ())
+ {
+ if (!is_mark_ligature) {
+ unsigned int new_lig_comp = components_so_far - last_num_components +
+ MIN (MAX (get_lig_comp (c->buffer->cur()), 1u), last_num_components);
+ set_lig_props_for_mark (c->buffer->cur(), lig_id, new_lig_comp);
+ }
+ c->buffer->next_glyph ();
+ }
+
+ last_lig_id = get_lig_id (c->buffer->cur());
+ last_num_components = get_lig_num_comps (c->buffer->cur());
+ components_so_far += last_num_components;
+
+ /* Skip the base glyph */
+ c->buffer->idx++;
+ }
+
+ if (!is_mark_ligature && last_lig_id) {
+ /* Re-adjust components for any marks following. */
+ for (unsigned int i = c->buffer->idx; i < c->buffer->len; i++) {
+ if (last_lig_id == get_lig_id (c->buffer->info[i])) {
+ unsigned int new_lig_comp = components_so_far - last_num_components +
+ MIN (MAX (get_lig_comp (c->buffer->info[i]), 1u), last_num_components);
+ set_lig_props_for_mark (c->buffer->info[i], lig_id, new_lig_comp);
+ } else
+ break;
+ }
+ }
+}
static inline bool match_backtrack (hb_apply_context_t *c,
unsigned int count,
commit 191fa885d9e0a2dce92dd8727cddd18495e62409
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Aug 28 22:58:55 2012 -0400
[GSUB] Merge Ligature and context input matching
Looks better now...
diff --git a/TODO b/TODO
index 441eb4d..1ca2480 100644
--- a/TODO
+++ b/TODO
@@ -1,10 +1,6 @@
General fixes:
=============
-- Ligature matching and match_input() of (Chain)Context should use the
- same logic. Right now the Ligature logic is more involved. Possibly
- merge, or duplicate.
-
- mask propagation? (when ligation, "or" the masks).
- Warn at compile time (and runtime with HB_DEBUG?) if no Unicode / font
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index c85df46..ac3e8a9 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -491,66 +491,21 @@ struct Ligature
unsigned int count = component.len;
if (unlikely (count < 1)) return TRACE_RETURN (false);
- hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
- if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
-
- /*
- * This is perhaps the trickiest part of OpenType... Remarks:
- *
- * - If all components of the ligature were marks, we call this a mark ligature.
- *
- * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
- * it as a ligature glyph.
- *
- * - Ligatures cannot be formed across glyphs attached to different components
- * of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
- * LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
- * However, it would be wrong to ligate that SHADDA,FATHA sequence.o
- * There is an exception to this: If a ligature tries ligating with marks that
- * belong to it itself, go ahead, assuming that the font designer knows what
- * they are doing (otherwise it can break Indic stuff when a matra wants to
- * ligate with a conjunct...)
- */
-
- bool is_mark_ligature = !!(c->property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
-
- unsigned int total_component_count = 0;
- total_component_count += get_lig_num_comps (c->buffer->cur());
-
- unsigned int first_lig_id = get_lig_id (c->buffer->cur());
- unsigned int first_lig_comp = get_lig_comp (c->buffer->cur());
-
- for (unsigned int i = 1; i < count; i++)
- {
- unsigned int property;
-
- if (!skippy_iter.next (&property)) return TRACE_RETURN (false);
-
- if (likely (c->buffer->info[skippy_iter.idx].codepoint != component[i])) return TRACE_RETURN (false);
-
- unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
- unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
-
- if (first_lig_id && first_lig_comp) {
- /* If first component was attached to a previous ligature component,
- * all subsequent components should be attached to the same ligature
- * component, otherwise we shouldn't ligate them. */
- if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp)
- return TRACE_RETURN (false);
- } else {
- /* If first component was NOT attached to a previous ligature component,
- * all subsequent components should also NOT be attached to any ligature
- * component, unless they are attached to the first component itself! */
- if (this_lig_id && this_lig_comp && (this_lig_id != first_lig_id))
- return TRACE_RETURN (false);
- }
-
- is_mark_ligature = is_mark_ligature && (property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
- total_component_count += get_lig_num_comps (c->buffer->info[skippy_iter.idx]);
- }
+ unsigned int end_offset;
+ bool is_mark_ligature;
+ unsigned int total_component_count;
+
+ if (likely (!match_input (c, count,
+ &component[1],
+ match_glyph,
+ NULL,
+ &end_offset,
+ &is_mark_ligature,
+ &total_component_count)))
+ return TRACE_RETURN (false);
/* Deal, we are forming the ligature. */
- c->buffer->merge_clusters (c->buffer->idx, skippy_iter.idx + 1);
+ c->buffer->merge_clusters (c->buffer->idx, c->buffer->idx + end_offset);
/*
* - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 5032d30..3d69736 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -421,13 +421,33 @@ static inline bool match_input (hb_apply_context_t *c,
const USHORT input[], /* Array of input values--start with second glyph */
match_func_t match_func,
const void *match_data,
- unsigned int *end_offset = NULL)
+ unsigned int *end_offset = NULL,
+ bool *p_is_mark_ligature = NULL,
+ unsigned int *p_total_component_count = NULL)
{
hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
+ /*
+ * This is perhaps the trickiest part of OpenType... Remarks:
+ *
+ * - If all components of the ligature were marks, we call this a mark ligature.
+ *
+ * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
+ * it as a ligature glyph.
+ *
+ * - Ligatures cannot be formed across glyphs attached to different components
+ * of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
+ * LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
+ * However, it would be wrong to ligate that SHADDA,FATHA sequence.o
+ * There is an exception to this: If a ligature tries ligating with marks that
+ * belong to it itself, go ahead, assuming that the font designer knows what
+ * they are doing (otherwise it can break Indic stuff when a matra wants to
+ * ligate with a conjunct...)
+ */
+
bool is_mark_ligature = !!(c->property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
unsigned int total_component_count = 0;
@@ -443,7 +463,6 @@ static inline bool match_input (hb_apply_context_t *c,
if (!skippy_iter.next (&property)) return TRACE_RETURN (false);
if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data))) return false;
-// if (likely (c->buffer->info[skippy_iter.idx].codepoint != component[i])) return TRACE_RETURN (false);
unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
@@ -469,6 +488,12 @@ static inline bool match_input (hb_apply_context_t *c,
if (end_offset)
*end_offset = skippy_iter.idx - c->buffer->idx + 1;
+ if (p_is_mark_ligature)
+ *p_is_mark_ligature = is_mark_ligature;
+
+ if (p_total_component_count)
+ *p_total_component_count = total_component_count;
+
return true;
}
commit 93814ca7dc2a7251f861c1c47ba155ba6e6bdf19
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Tue Aug 28 22:24:51 2012 -0400
Start converging Ligature and match_input
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index a14db14..c85df46 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -495,38 +495,12 @@ struct Ligature
if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
/*
- * This is perhaps the trickiest part of GSUB... Remarks:
+ * This is perhaps the trickiest part of OpenType... Remarks:
*
* - If all components of the ligature were marks, we call this a mark ligature.
*
* - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
- * it as a ligature glyph. Though, really, this will not really be used...
- *
- * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
- * the ligature to keep its old ligature id. This will allow it to attach to
- * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
- * and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
- * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature
- * later, we don't want them to lose their ligature id/component, otherwise
- * GPOS will fail to correctly position the mark ligature on top of the
- * LAM,LAM,HEH ligature. See:
- * https://bugzilla.gnome.org/show_bug.cgi?id=676343
- *
- * - If a ligature is formed of components that some of which are also ligatures
- * themselves, and those ligature components had marks attached to *their*
- * components, we have to attach the marks to the new ligature component
- * positions! Now *that*'s tricky! And these marks may be following the
- * last component of the whole sequence, so we should loop forward looking
- * for them and update them.
- *
- * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
- * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
- * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature
- * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to
- * the new ligature with a component value of 2.
- *
- * This in fact happened to a font... See:
- * https://bugzilla.gnome.org/show_bug.cgi?id=437633
+ * it as a ligature glyph.
*
* - Ligatures cannot be formed across glyphs attached to different components
* of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
@@ -578,6 +552,34 @@ struct Ligature
/* Deal, we are forming the ligature. */
c->buffer->merge_clusters (c->buffer->idx, skippy_iter.idx + 1);
+ /*
+ * - If it *is* a mark ligature, we don't allocate a new ligature id, and leave
+ * the ligature to keep its old ligature id. This will allow it to attach to
+ * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH,
+ * and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA wit a
+ * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature
+ * later, we don't want them to lose their ligature id/component, otherwise
+ * GPOS will fail to correctly position the mark ligature on top of the
+ * LAM,LAM,HEH ligature. See:
+ * https://bugzilla.gnome.org/show_bug.cgi?id=676343
+ *
+ * - If a ligature is formed of components that some of which are also ligatures
+ * themselves, and those ligature components had marks attached to *their*
+ * components, we have to attach the marks to the new ligature component
+ * positions! Now *that*'s tricky! And these marks may be following the
+ * last component of the whole sequence, so we should loop forward looking
+ * for them and update them.
+ *
+ * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a
+ * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature
+ * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature
+ * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to
+ * the new ligature with a component value of 2.
+ *
+ * This in fact happened to a font... See:
+ * https://bugzilla.gnome.org/show_bug.cgi?id=437633
+ */
+
unsigned int klass = is_mark_ligature ? 0 : HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE;
unsigned int lig_id = is_mark_ligature ? 0 : allocate_lig_id (c->buffer);
unsigned int last_lig_id = get_lig_id (c->buffer->cur());
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index de56402..5032d30 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -423,17 +423,47 @@ static inline bool match_input (hb_apply_context_t *c,
const void *match_data,
unsigned int *end_offset = NULL)
{
+ hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
+
hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx, count - 1);
- if (skippy_iter.has_no_chance ())
- return false;
+ if (skippy_iter.has_no_chance ()) return TRACE_RETURN (false);
+
+ bool is_mark_ligature = !!(c->property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
+
+ unsigned int total_component_count = 0;
+ total_component_count += get_lig_num_comps (c->buffer->cur());
+
+ unsigned int first_lig_id = get_lig_id (c->buffer->cur());
+ unsigned int first_lig_comp = get_lig_comp (c->buffer->cur());
for (unsigned int i = 1; i < count; i++)
{
- if (!skippy_iter.next ())
- return false;
+ unsigned int property;
- if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data)))
- return false;
+ if (!skippy_iter.next (&property)) return TRACE_RETURN (false);
+
+ if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, input[i - 1], match_data))) return false;
+// if (likely (c->buffer->info[skippy_iter.idx].codepoint != component[i])) return TRACE_RETURN (false);
+
+ unsigned int this_lig_id = get_lig_id (c->buffer->info[skippy_iter.idx]);
+ unsigned int this_lig_comp = get_lig_comp (c->buffer->info[skippy_iter.idx]);
+
+ if (first_lig_id && first_lig_comp) {
+ /* If first component was attached to a previous ligature component,
+ * all subsequent components should be attached to the same ligature
+ * component, otherwise we shouldn't ligate them. */
+ if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp)
+ return TRACE_RETURN (false);
+ } else {
+ /* If first component was NOT attached to a previous ligature component,
+ * all subsequent components should also NOT be attached to any ligature
+ * component, unless they are attached to the first component itself! */
+ if (this_lig_id && this_lig_comp && (this_lig_id != first_lig_id))
+ return TRACE_RETURN (false);
+ }
+
+ is_mark_ligature = is_mark_ligature && (property & HB_OT_LAYOUT_GLYPH_CLASS_MARK);
+ total_component_count += get_lig_num_comps (c->buffer->info[skippy_iter.idx]);
}
if (end_offset)
@@ -448,20 +478,22 @@ static inline bool match_backtrack (hb_apply_context_t *c,
match_func_t match_func,
const void *match_data)
{
+ hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
+
hb_apply_context_t::mark_skipping_backward_iterator_t skippy_iter (c, c->buffer->backtrack_len (), count, true);
if (skippy_iter.has_no_chance ())
- return false;
+ return TRACE_RETURN (false);
for (unsigned int i = 0; i < count; i++)
{
if (!skippy_iter.prev ())
- return false;
+ return TRACE_RETURN (false);
if (likely (!match_func (c->buffer->out_info[skippy_iter.idx].codepoint, backtrack[i], match_data)))
- return false;
+ return TRACE_RETURN (false);
}
- return true;
+ return TRACE_RETURN (true);
}
static inline bool match_lookahead (hb_apply_context_t *c,
@@ -471,20 +503,22 @@ static inline bool match_lookahead (hb_apply_context_t *c,
const void *match_data,
unsigned int offset)
{
+ hb_auto_trace_t<HB_DEBUG_APPLY> trace (&c->debug_depth, "APPLY", NULL, HB_FUNC, "idx %d codepoint %u", c->buffer->idx, c->buffer->cur().codepoint);
+
hb_apply_context_t::mark_skipping_forward_iterator_t skippy_iter (c, c->buffer->idx + offset - 1, count, true);
if (skippy_iter.has_no_chance ())
- return false;
+ return TRACE_RETURN (false);
for (unsigned int i = 0; i < count; i++)
{
if (!skippy_iter.next ())
- return false;
+ return TRACE_RETURN (false);
if (likely (!match_func (c->buffer->info[skippy_iter.idx].codepoint, lookahead[i], match_data)))
- return false;
+ return TRACE_RETURN (false);
}
- return true;
+ return TRACE_RETURN (true);
}
More information about the HarfBuzz
mailing list