[HarfBuzz] harfbuzz: Branch 'master' - 3 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Fri Nov 16 13:14:48 PST 2012
src/hb-ot-layout.h | 14 +-
src/hb-ot-shape-complex-default.cc | 6 -
src/hb-ot-shape-complex-indic.cc | 57 +++++++---
src/hb-ot-shape-complex-private.hh | 16 +-
src/hb-ot-shape-normalize-private.hh | 22 +++
src/hb-ot-shape-normalize.cc | 56 +++------
src/hb-ot-shape.cc | 2
src/hb-shape-plan.h | 2
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST | 1
test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt | 4
10 files changed, 109 insertions(+), 71 deletions(-)
New commits:
commit 43b653150081a2f9dc6b7481229ac4cd952575dc
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Fri Nov 16 13:12:35 2012 -0800
[Indic] Another try to unbreak Sinhala split matras
Just read the comments...
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index b185824..d924d1a 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1317,15 +1317,42 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c,
#endif
}
- if (indic_options ().uniscribe_bug_compatible)
- switch (ab)
+ if ((ab == 0x0DDA || hb_in_range<hb_codepoint_t> (ab, 0x0DDC, 0x0DDE)))
{
- /* These Sinhala ones have Unicode decompositions, but Uniscribe
- * decomposes them "Khmer-style". */
- case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
- case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
- case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
- case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
+ /*
+ * Sinhala split matras... Let the fun begin.
+ *
+ * These four characters have Unicode decompositions. However, Uniscribe
+ * decomposes them "Khmer-style", that is, it uses the character itself to
+ * get the second half. The first half of all four decompositions is always
+ * U+0DD9.
+ *
+ * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
+ * broken with Uniscribe. But we need to support them. As such, we only
+ * do the Uniscribe-style decomposition if the character is transformed into
+ * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
+ * Unicode decomposition.
+ *
+ * Note that we can't unconditionally use Unicode decomposition. That would
+ * break some other fonts, that are designed to work with Uniscribe, and
+ * don't have positioning features for the Unicode-style decomposition.
+ *
+ * Argh...
+ */
+
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
+
+ hb_codepoint_t glyph;
+
+ if (indic_options ().uniscribe_bug_compatible ||
+ (c->font->get_glyph (ab, 0, &glyph) &&
+ indic_plan->pstf.would_substitute (&glyph, 1, true, c->font->face)))
+ {
+ /* Ok, safe to use Uniscribe-style decomposition. */
+ *a = 0x0DD9;
+ *b = ab;
+ return true;
+ }
}
return c->unicode->decompose (ab, a, b);
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
index 7eff9e1..a00d7ae 100644
--- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST
@@ -1,3 +1,4 @@
extensive.txt
misc.txt
reph.txt
+split-matras.txt
diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt
new file mode 100644
index 0000000..2a73a40
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt
@@ -0,0 +1,4 @@
+à¶à·
+à¶à·
+à¶à·
+à¶à·
commit 977f1740ace730dcdff8221a17f2a592c2ec7c74
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Fri Nov 16 13:10:07 2012 -0800
Unbreak tests
diff --git a/src/hb-ot-layout.h b/src/hb-ot-layout.h
index 3f6e5d9..7b53367 100644
--- a/src/hb-ot-layout.h
+++ b/src/hb-ot-layout.h
@@ -60,12 +60,12 @@ typedef enum {
#ifdef HB_NOT_IMPLEMENTED
hb_ot_layout_glyph_class_t
-hb_ot_layout_get_glyph_class (hb_face_t *face,
+Xhb_ot_layout_get_glyph_class (hb_face_t *face,
hb_codepoint_t glyph);
#endif
#ifdef HB_NOT_IMPLEMENTED
-hb_ot_layout_get_glyphs_in_class (hb_face_t *face,
+Xhb_ot_layout_get_glyphs_in_class (hb_face_t *face,
hb_ot_layout_glyph_class_t klass,
hb_set_t *glyphs /* OUT */);
#endif
@@ -184,7 +184,7 @@ hb_ot_layout_feature_get_lookups (hb_face_t *face,
#ifdef HB_NOT_IMPLEMENTED
void
-hb_ot_layout_collect_lookups (hb_face_t *face,
+Xhb_ot_layout_collect_lookups (hb_face_t *face,
hb_tag_t table_tag,
const hb_tag_t *scripts,
const hb_tag_t *languages,
@@ -199,7 +199,7 @@ hb_ot_shape_plan_collect_lookups (hb_shape_plan_t *shape_plan,
#ifdef HB_NOT_IMPLEMENTED
void
-hb_ot_layout_lookup_collect_glyphs (hb_face_t *face,
+Xhb_ot_layout_lookup_collect_glyphs (hb_face_t *face,
hb_tag_t table_tag,
unsigned int lookup_index,
hb_set_t *glyphs_before, /* OUT. May be NULL */
@@ -227,7 +227,7 @@ typedef hb_bool_t
void *user_data);
void
-hb_ot_layout_lookup_enumerate_sequences (hb_face_t *face,
+Xhb_ot_layout_lookup_enumerate_sequences (hb_face_t *face,
hb_tag_t table_tag,
unsigned int lookup_index,
hb_ot_layout_glyph_sequence_func_t callback,
@@ -258,7 +258,7 @@ hb_ot_layout_lookup_substitute_closure (hb_face_t *face,
#ifdef HB_NOT_IMPLEMENTED
/* Note: You better have GDEF when using this API, or marks won't do much. */
hb_bool_t
-hb_ot_layout_lookup_substitute (hb_font_t *font,
+Xhb_ot_layout_lookup_substitute (hb_font_t *font,
unsigned int lookup_index,
const hb_ot_layout_glyph_sequence_t *sequence,
unsigned int out_size,
@@ -278,7 +278,7 @@ hb_ot_layout_has_positioning (hb_face_t *face);
#ifdef HB_NOT_IMPLEMENTED
/* Note: You better have GDEF when using this API, or marks won't do much. */
hb_bool_t
-hb_ot_layout_lookup_position (hb_font_t *font,
+Xhb_ot_layout_lookup_position (hb_font_t *font,
unsigned int lookup_index,
const hb_ot_layout_glyph_sequence_t *sequence,
hb_glyph_position_t *positions /* IN / OUT */);
diff --git a/src/hb-shape-plan.h b/src/hb-shape-plan.h
index 1f74ba5..e4ea94b 100644
--- a/src/hb-shape-plan.h
+++ b/src/hb-shape-plan.h
@@ -82,7 +82,7 @@ hb_shape_plan_execute (hb_shape_plan_t *shape_plan,
#ifdef HB_NOT_IMPLEMENTED
const char *
-hb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan);
+Xhb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan);
#endif
commit eba312c8d1b2bbe8cb9b6414e843e78d2c521aa4
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Fri Nov 16 12:39:23 2012 -0800
Plumbing to get shape plan and font into complex decompose function
So we can handle Sinhala split matras smartly... Coming soon.
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index 7645e22..6a17330 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -83,8 +83,8 @@ normalization_preference_default (const hb_segment_properties_t *props)
return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
}
-static hb_bool_t
-compose_default (hb_unicode_funcs_t *unicode,
+static bool
+compose_default (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab)
@@ -123,7 +123,7 @@ compose_default (hb_unicode_funcs_t *unicode,
0xFB4A // TAV
};
- hb_bool_t found = unicode->compose (a, b, ab);
+ bool found = c->unicode->compose (a, b, ab);
if (!found && (b & ~0x7F) == 0x0580) {
// special-case Hebrew presentation forms that are excluded from
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index eaa77b7..b185824 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1278,8 +1278,8 @@ normalization_preference_indic (const hb_segment_properties_t *props)
return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
}
-static hb_bool_t
-decompose_indic (hb_unicode_funcs_t *unicode,
+static bool
+decompose_indic (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b)
@@ -1328,23 +1328,23 @@ decompose_indic (hb_unicode_funcs_t *unicode,
case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
}
- return unicode->decompose (ab, a, b);
+ return c->unicode->decompose (ab, a, b);
}
-static hb_bool_t
-compose_indic (hb_unicode_funcs_t *unicode,
+static bool
+compose_indic (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab)
{
/* Avoid recomposing split matras. */
- if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (unicode->general_category (a)))
+ if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
return false;
/* Composition-exclusion exceptions that we want to recompose. */
if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
- return unicode->compose (a, b, ab);
+ return c->unicode->compose (a, b, ab);
}
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 9f8cecd..75dcc04 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -106,19 +106,19 @@ struct hb_ot_complex_shaper_t
* Called during shape()'s normalization.
* May be NULL.
*/
- hb_bool_t (*decompose) (hb_unicode_funcs_t *unicode,
- hb_codepoint_t ab,
- hb_codepoint_t *a,
- hb_codepoint_t *b);
+ bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b);
/* compose()
* Called during shape()'s normalization.
* May be NULL.
*/
- hb_bool_t (*compose) (hb_unicode_funcs_t *unicode,
- hb_codepoint_t a,
- hb_codepoint_t b,
- hb_codepoint_t *ab);
+ bool (*compose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab);
/* setup_masks()
* Called during shape().
diff --git a/src/hb-ot-shape-normalize-private.hh b/src/hb-ot-shape-normalize-private.hh
index 4b77699..8112f03 100644
--- a/src/hb-ot-shape-normalize-private.hh
+++ b/src/hb-ot-shape-normalize-private.hh
@@ -35,7 +35,7 @@
/* buffer var allocations, used during the normalization process */
#define glyph_index() var1.u32
-struct hb_ot_complex_shaper_t;
+struct hb_ot_shape_plan_t;
enum hb_ot_shape_normalization_mode_t {
HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
@@ -46,8 +46,26 @@ enum hb_ot_shape_normalization_mode_t {
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
};
-HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_complex_shaper_t *shaper,
+HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,
hb_buffer_t *buffer,
hb_font_t *font);
+
+struct hb_ot_shape_normalize_context_t
+{
+ const hb_ot_shape_plan_t *plan;
+ hb_buffer_t *buffer;
+ hb_font_t *font;
+ hb_unicode_funcs_t *unicode;
+ bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t ab,
+ hb_codepoint_t *a,
+ hb_codepoint_t *b);
+ bool (*compose) (const hb_ot_shape_normalize_context_t *c,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab);
+};
+
+
#endif /* HB_OT_SHAPE_NORMALIZE_PRIVATE_HH */
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index df15f7d..c5325e4 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -82,22 +82,22 @@
* egrep "`echo -n ';('; grep ';<' UnicodeData.txt | cut -d';' -f1 | tr '\n' '|'; echo ') '`" UnicodeData.txt
*/
-static hb_bool_t
-decompose_unicode (hb_unicode_funcs_t *unicode,
+static bool
+decompose_unicode (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b)
{
- return unicode->decompose (ab, a, b);
+ return c->unicode->decompose (ab, a, b);
}
-static hb_bool_t
-compose_unicode (hb_unicode_funcs_t *unicode,
+static bool
+compose_unicode (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t a,
hb_codepoint_t b,
hb_codepoint_t *ab)
{
- return unicode->compose (a, b, ab);
+ return c->unicode->compose (a, b, ab);
}
static inline void
@@ -127,27 +127,13 @@ skip_char (hb_buffer_t *buffer)
buffer->skip_glyph ();
}
-struct normalize_context_t
-{
- hb_buffer_t *buffer;
- hb_font_t *font;
- hb_bool_t (*decompose) (hb_unicode_funcs_t *unicode,
- hb_codepoint_t ab,
- hb_codepoint_t *a,
- hb_codepoint_t *b);
- hb_bool_t (*compose) (hb_unicode_funcs_t *unicode,
- hb_codepoint_t a,
- hb_codepoint_t b,
- hb_codepoint_t *ab);
-};
-
/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
static inline unsigned int
-decompose (const normalize_context_t *c, bool shortest, hb_codepoint_t ab)
+decompose (const hb_ot_shape_normalize_context_t *c, bool shortest, hb_codepoint_t ab)
{
hb_codepoint_t a, b, a_glyph, b_glyph;
- if (!c->decompose (c->buffer->unicode, ab, &a, &b) ||
+ if (!c->decompose (c, ab, &a, &b) ||
(b && !c->font->get_glyph (b, 0, &b_glyph)))
return 0;
@@ -185,7 +171,7 @@ decompose (const normalize_context_t *c, bool shortest, hb_codepoint_t ab)
/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
static inline bool
-decompose_compatibility (const normalize_context_t *c, hb_codepoint_t u)
+decompose_compatibility (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t u)
{
unsigned int len, i;
hb_codepoint_t decomposed[HB_UNICODE_MAX_DECOMPOSITION_LEN];
@@ -207,7 +193,7 @@ decompose_compatibility (const normalize_context_t *c, hb_codepoint_t u)
/* Returns true if recomposition may be benefitial. */
static inline bool
-decompose_current_character (const normalize_context_t *c, bool shortest)
+decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shortest)
{
hb_buffer_t * const buffer = c->buffer;
hb_codepoint_t glyph;
@@ -233,7 +219,7 @@ decompose_current_character (const normalize_context_t *c, bool shortest)
}
static inline void
-handle_variation_selector_cluster (const normalize_context_t *c, unsigned int end)
+handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end)
{
hb_buffer_t * const buffer = c->buffer;
for (; buffer->idx < end - 1;) {
@@ -254,7 +240,7 @@ handle_variation_selector_cluster (const normalize_context_t *c, unsigned int en
/* Returns true if recomposition may be benefitial. */
static inline bool
-decompose_multi_char_cluster (const normalize_context_t *c, unsigned int end)
+decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end)
{
hb_buffer_t * const buffer = c->buffer;
/* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
@@ -272,7 +258,7 @@ decompose_multi_char_cluster (const normalize_context_t *c, unsigned int end)
}
static inline bool
-decompose_cluster (const normalize_context_t *c, bool short_circuit, unsigned int end)
+decompose_cluster (const hb_ot_shape_normalize_context_t *c, bool short_circuit, unsigned int end)
{
if (likely (c->buffer->idx + 1 == end))
return decompose_current_character (c, short_circuit);
@@ -292,18 +278,20 @@ compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
void
-_hb_ot_shape_normalize (const hb_ot_complex_shaper_t *shaper,
+_hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
- hb_ot_shape_normalization_mode_t mode = shaper->normalization_preference ?
- shaper->normalization_preference (&buffer->props) :
+ hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference ?
+ plan->shaper->normalization_preference (&buffer->props) :
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT;
- const normalize_context_t c = {
+ const hb_ot_shape_normalize_context_t c = {
+ plan,
buffer,
font,
- shaper->decompose ? shaper->decompose : decompose_unicode,
- shaper->compose ? shaper->compose : compose_unicode
+ buffer->unicode,
+ plan->shaper->decompose ? plan->shaper->decompose : decompose_unicode,
+ plan->shaper->compose ? plan->shaper->compose : compose_unicode
};
bool short_circuit = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED &&
@@ -389,7 +377,7 @@ _hb_ot_shape_normalize (const hb_ot_complex_shaper_t *shaper,
(starter == buffer->out_len - 1 ||
_hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_glyph_info_get_modified_combining_class (&buffer->cur())) &&
/* And compose. */
- c.compose (buffer->unicode,
+ c.compose (&c,
buffer->out_info[starter].codepoint,
buffer->cur().codepoint,
&composed) &&
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 450d609..8601127 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -362,7 +362,7 @@ hb_ot_substitute_default (hb_ot_shape_context_t *c)
HB_BUFFER_ALLOCATE_VAR (c->buffer, glyph_index);
- _hb_ot_shape_normalize (c->plan->shaper, c->buffer, c->font);
+ _hb_ot_shape_normalize (c->plan, c->buffer, c->font);
hb_ot_shape_setup_masks (c);
More information about the HarfBuzz
mailing list