[HarfBuzz] harfbuzz: Branch 'master' - 5 commits
Behdad Esfahbod
behdad at kemper.freedesktop.org
Wed Nov 14 17:53:22 PST 2012
src/Makefile.am | 3
src/hb-ot-map-private.hh | 8
src/hb-ot-map.cc | 6
src/hb-ot-shape-complex-arabic.cc | 1
src/hb-ot-shape-complex-default.cc | 225 +++++
src/hb-ot-shape-complex-indic.cc | 2
src/hb-ot-shape-complex-misc.cc | 339 --------
src/hb-ot-shape-complex-private.hh | 13
src/hb-ot-shape-complex-thai.cc | 378 ++++++++++
test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST | 1
test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt | 11
11 files changed, 635 insertions(+), 352 deletions(-)
New commits:
commit 1eb3e94fe99a072ce422e60ac4d4d89ef489b08a
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Nov 14 17:25:03 2012 -0800
[Thai] Implement PUA-based fallback shaping
As explained here:
http://linux.thai.net/~thep/th-otf/shaping.html
Our output now matches Uniscribe for old fonts (eg. XP Tahoma) with no
Thai GSUB table.
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index 0736eca..87fe521 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -29,13 +29,230 @@
/* Thai / Lao shaper */
+
+/* PUA shaping */
+
+
+enum thai_consonant_type_t
+{
+ NC,
+ AC,
+ RC,
+ DC,
+ NOT_CONSONANT,
+ NUM_CONSONANT_TYPES = NOT_CONSONANT
+};
+
+static thai_consonant_type_t
+get_consonant_type (hb_codepoint_t u)
+{
+ if (u == 0x0E1B || u == 0x0E1D || u == 0x0E1F || u == 0x0E2C)
+ return AC;
+ if (u == 0x0E0D || u == 0x0E10)
+ return RC;
+ if (u == 0x0E0E || u == 0x0E0F)
+ return DC;
+ if (hb_in_range<hb_codepoint_t> (u, 0x0E01, 0x0E2E))
+ return NC;
+ return NOT_CONSONANT;
+}
+
+
+enum thai_mark_type_t
+{
+ AV,
+ BV,
+ T,
+ NOT_MARK,
+ NUM_MARK_TYPES = NOT_MARK
+};
+
+static thai_mark_type_t
+get_mark_type (hb_codepoint_t u)
+{
+ if (u == 0x0E31 || hb_in_range<hb_codepoint_t> (u, 0x0E34, 0x0E37) ||
+ u == 0x0E47 || hb_in_range<hb_codepoint_t> (u, 0x0E4D, 0x0E4E))
+ return AV;
+ if (hb_in_range<hb_codepoint_t> (u, 0x0E38, 0x0E3A))
+ return BV;
+ if (hb_in_range<hb_codepoint_t> (u, 0x0E48, 0x0E4C))
+ return T;
+ return NOT_MARK;
+}
+
+
+enum thai_action_t
+{
+ NOP,
+ SD, /* Shift combining-mark down */
+ SL, /* Shift combining-mark left */
+ SDL, /* Shift combining-mark down-left */
+ RD /* Remove descender from base */
+};
+
+static hb_codepoint_t
+thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font)
+{
+ struct thai_pua_mapping_t {
+ hb_codepoint_t u;
+ hb_codepoint_t win_pua;
+ hb_codepoint_t mac_pua;
+ } const *pua_mappings = NULL;
+ static const thai_pua_mapping_t SD_mappings[] = {
+ {0x0E48, 0xF70A, 0xF88B}, /* MAI EK */
+ {0x0E49, 0xF70B, 0xF88E}, /* MAI THO */
+ {0x0E4A, 0xF70C, 0xF891}, /* MAI TRI */
+ {0x0E4B, 0xF70D, 0xF894}, /* MAI CHATTAWA */
+ {0x0E4C, 0xF70E, 0xF897}, /* THANTHAKHAT */
+ {0x0E38, 0xF718, 0xF89B}, /* SARA U */
+ {0x0E39, 0xF719, 0xF89C}, /* SARA UU */
+ {0x0E3A, 0xF71A, 0xF89D}, /* PHINTHU */
+ {0x0000, 0x0000, 0x0000}
+ };
+ static const thai_pua_mapping_t SDL_mappings[] = {
+ {0x0E48, 0xF705, 0xF88C}, /* MAI EK */
+ {0x0E49, 0xF706, 0xF88F}, /* MAI THO */
+ {0x0E4A, 0xF707, 0xF892}, /* MAI TRI */
+ {0x0E4B, 0xF708, 0xF895}, /* MAI CHATTAWA */
+ {0x0E4C, 0xF709, 0xF898}, /* THANTHAKHAT */
+ {0x0000, 0x0000, 0x0000}
+ };
+ static const thai_pua_mapping_t SL_mappings[] = {
+ {0x0E48, 0xF713, 0xF88A}, /* MAI EK */
+ {0x0E49, 0xF714, 0xF88D}, /* MAI THO */
+ {0x0E4A, 0xF715, 0xF890}, /* MAI TRI */
+ {0x0E4B, 0xF716, 0xF893}, /* MAI CHATTAWA */
+ {0x0E4C, 0xF717, 0xF896}, /* THANTHAKHAT */
+ {0x0E31, 0xF710, 0xF884}, /* MAI HAN-AKAT */
+ {0x0E34, 0xF701, 0xF885}, /* SARA I */
+ {0x0E35, 0xF702, 0xF886}, /* SARA II */
+ {0x0E36, 0xF703, 0xF887}, /* SARA UE */
+ {0x0E37, 0xF704, 0xF888}, /* SARA UEE */
+ {0x0E47, 0xF712, 0xF889}, /* MAITAIKHU */
+ {0x0E4D, 0xF711, 0xF899}, /* NIKHAHIT */
+ {0x0000, 0x0000, 0x0000}
+ };
+ static const thai_pua_mapping_t RD_mappings[] = {
+ {0x0E0D, 0xF70F, 0xF89A}, /* YO YING */
+ {0x0E10, 0xF700, 0xF89E}, /* THO THAN */
+ {0x0000, 0x0000, 0x0000}
+ };
+
+ switch (action) {
+ default: assert (false); /* Fallthrough */
+ case NOP: return u;
+ case SD: pua_mappings = SD_mappings; break;
+ case SDL: pua_mappings = SDL_mappings; break;
+ case SL: pua_mappings = SL_mappings; break;
+ case RD: pua_mappings = RD_mappings; break;
+ }
+ for (; pua_mappings->u; pua_mappings++)
+ if (pua_mappings->u == u)
+ {
+ hb_codepoint_t glyph;
+ if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph))
+ return pua_mappings->win_pua;
+ if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph))
+ return pua_mappings->mac_pua;
+ break;
+ }
+ return u;
+}
+
+
+static enum thai_above_state_t
+{ /* Cluster above looks like: */
+ T0, /* ⣤ */
+ T1, /* ⣼ */
+ T2, /* ⣾ */
+ T3, /* ⣿ */
+ NUM_ABOVE_STATES
+} thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
+{
+ T0, /* NC */
+ T1, /* AC */
+ T0, /* RC */
+ T0, /* DC */
+ T3, /* NOT_CONSONANT */
+};
+
+static const struct thai_above_state_machine_edge_t {
+ thai_action_t action;
+ thai_above_state_t next_state;
+} thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] =
+{ /*AV*/ /*BV*/ /*T*/
+/*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}},
+/*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}},
+/*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}},
+/*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}},
+};
+
+
+static enum thai_below_state_t
+{
+ B0, /* No descender */
+ B1, /* Removable descender */
+ B2, /* Strict descender */
+ NUM_BELOW_STATES
+} thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
+{
+ B0, /* NC */
+ B0, /* AC */
+ B1, /* RC */
+ B2, /* DC */
+ B2, /* NOT_CONSONANT */
+};
+
+static const struct thai_below_state_machine_edge_t {
+ thai_action_t action;
+ thai_below_state_t next_state;
+} thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] =
+{ /*AV*/ /*BV*/ /*T*/
+/*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}},
+/*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}},
+/*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}},
+};
+
+
static void
do_thai_pua_shaping (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
+ thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT];
+ thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT];
+ unsigned int base = 0;
+
+ hb_glyph_info_t *info = buffer->info;
+ unsigned int count = buffer->len;
+ for (unsigned int i = 0; i < count; i++)
+ {
+ thai_mark_type_t mt = get_mark_type (info[i].codepoint);
+
+ if (mt == NOT_MARK) {
+ thai_consonant_type_t ct = get_consonant_type (info[i].codepoint);
+ above_state = thai_above_start_state[ct];
+ below_state = thai_below_start_state[ct];
+ base = i;
+ continue;
+ }
+
+ const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt];
+ const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt];
+ above_state = above_edge.next_state;
+ below_state = below_edge.next_state;
+
+ /* At least one of the above/below actions is NOP. */
+ thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action;
+
+ if (action == RD)
+ info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font);
+ else
+ info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font);
+ }
}
+
static void
preprocess_text_thai (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
commit 851784f8372004e0a40b698c0cdc2d7db8629aa2
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Nov 14 16:24:05 2012 -0800
Improve shaper selection
diff --git a/src/hb-ot-map-private.hh b/src/hb-ot-map-private.hh
index 11d97e1..c7dbea5 100644
--- a/src/hb-ot-map-private.hh
+++ b/src/hb-ot-map-private.hh
@@ -115,9 +115,6 @@ struct hb_ot_map_t
*lookup_count = end - start;
}
- inline hb_tag_t get_chosen_script (unsigned int table_index) const
- { return chosen_script[table_index]; }
-
HB_INTERNAL void substitute_closure (const struct hb_ot_shape_plan_t *plan, hb_face_t *face, hb_set_t *glyphs) const;
HB_INTERNAL void substitute (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
HB_INTERNAL void position (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
@@ -130,6 +127,9 @@ struct hb_ot_map_t
pauses[1].finish ();
}
+ public:
+ hb_tag_t chosen_script[2];
+ bool found_script[2];
private:
@@ -140,7 +140,6 @@ struct hb_ot_map_t
hb_mask_t global_mask;
- hb_tag_t chosen_script[2];
hb_prealloced_array_t<feature_map_t, 8> features;
hb_prealloced_array_t<lookup_map_t, 32> lookups[2]; /* GSUB/GPOS */
hb_prealloced_array_t<pause_map_t, 1> pauses[2]; /* GSUB/GPOS */
@@ -200,6 +199,7 @@ struct hb_ot_map_builder_t
hb_segment_properties_t props;
hb_tag_t chosen_script[2];
+ bool found_script[2];
unsigned int script_index[2], language_index[2];
private:
diff --git a/src/hb-ot-map.cc b/src/hb-ot-map.cc
index 046fa97..024b7df 100644
--- a/src/hb-ot-map.cc
+++ b/src/hb-ot-map.cc
@@ -79,7 +79,7 @@ hb_ot_map_builder_t::hb_ot_map_builder_t (hb_face_t *face_,
for (unsigned int table_index = 0; table_index < 2; table_index++) {
hb_tag_t table_tag = table_tags[table_index];
- hb_ot_layout_table_choose_script (face, table_tag, script_tags, &script_index[table_index], &chosen_script[table_index]);
+ found_script[table_index] = hb_ot_layout_table_choose_script (face, table_tag, script_tags, &script_index[table_index], &chosen_script[table_index]);
hb_ot_layout_script_find_language (face, table_tag, script_index[table_index], language_tag, &language_index[table_index]);
}
}
@@ -161,8 +161,10 @@ hb_ot_map_builder_t::compile (hb_ot_map_t &m)
{
m.global_mask = 1;
- for (unsigned int table_index = 0; table_index < 2; table_index++)
+ for (unsigned int table_index = 0; table_index < 2; table_index++) {
m.chosen_script[table_index] = chosen_script[table_index];
+ m.found_script[table_index] = found_script[table_index];
+ }
if (!feature_infos.len)
return;
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index a7f9b60..324a04b 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -329,7 +329,7 @@ data_create_indic (const hb_ot_shape_plan_t *plan)
break;
}
- indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.get_chosen_script (0) & 0x000000FF) != '2');
+ indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FF) != '2');
indic_plan->virama_glyph = (hb_codepoint_t) -1;
indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'));
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 043e8e3..9f8cecd 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -278,15 +278,18 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
case HB_SCRIPT_TAKRI:
/* Only use Indic shaper if the font has Indic tables. */
- if (planner->map.chosen_script[0] == HB_OT_TAG_DEFAULT_SCRIPT)
- return &_hb_ot_complex_shaper_default;
- else
+ if (planner->map.found_script[0])
return &_hb_ot_complex_shaper_indic;
+ else
+ return &_hb_ot_complex_shaper_default;
case HB_SCRIPT_KHMER:
/* If the font has 'liga', let the generic shaper do it. */
- if (planner->map.chosen_script[0] == HB_OT_TAG_DEFAULT_SCRIPT ||
- hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB, planner->map.script_index[0], planner->map.language_index[0], HB_TAG ('l','i','g','a'), NULL))
+ if (!planner->map.found_script[0] ||
+ hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB,
+ planner->map.script_index[0],
+ planner->map.language_index[0],
+ HB_TAG ('l','i','g','a'), NULL))
return &_hb_ot_complex_shaper_default;
else
return &_hb_ot_complex_shaper_indic;
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index d6f4669..0736eca 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -30,10 +30,26 @@
/* Thai / Lao shaper */
static void
-preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
+do_thai_pua_shaping (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+}
+
+static void
+preprocess_text_thai (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
- hb_font_t *font HB_UNUSED)
+ hb_font_t *font)
{
+ /* This function implements the shaping logic documented here:
+ *
+ * http://linux.thai.net/~thep/th-otf/shaping.html
+ *
+ * The first shaping rule listed there is needed even if the font has Thai
+ * OpenType tables. The rest do fallback positioning based on PUA codepoints.
+ * We implement that only if there exist no Thai GSUB in the font.
+ */
+
/* The following is NOT specified in the MS OT Thai spec, however, it seems
* to be what Uniscribe and other engines implement. According to Eric Muller:
*
@@ -122,6 +138,10 @@ preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
}
}
buffer->swap_buffers ();
+
+ /* If font has Thai GSUB, we are done. */
+ if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0])
+ do_thai_pua_shaping (plan, buffer, font);
}
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
commit f3584d3a3a627e38dfd7769975a670db340d2a48
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Nov 14 15:55:17 2012 -0800
Add test cases for Thai PUA shaping
diff --git a/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST
index 6aa865b..911099e 100644
--- a/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST
@@ -1,2 +1,3 @@
phinthu.txt
+pua-shaping.txt
sara-am.txt
diff --git a/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt
new file mode 100644
index 0000000..c17834b
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt
@@ -0,0 +1,11 @@
+à¸à¸µ
+à¸à¹
+à¸à¸µà¹
+à¸à¹à¸µ
+à¸à¹
+à¸
+à¸à¸¸
+à¸à¸´
+à¸à¸´à¹
+à¸à¹
+à¸à¸¹
commit 43f04a7456419153cb03e610a825056a47824780
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Nov 14 15:51:54 2012 -0800
Move Thai shaper into a separate file
diff --git a/src/Makefile.am b/src/Makefile.am
index 372c10f..aca8abf 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -81,11 +81,12 @@ HBSOURCES += \
hb-ot-shape-complex-arabic.cc \
hb-ot-shape-complex-arabic-fallback.hh \
hb-ot-shape-complex-arabic-table.hh \
+ hb-ot-shape-complex-default.cc \
hb-ot-shape-complex-indic.cc \
hb-ot-shape-complex-indic-machine.hh \
hb-ot-shape-complex-indic-private.hh \
hb-ot-shape-complex-indic-table.hh \
- hb-ot-shape-complex-misc.cc \
+ hb-ot-shape-complex-thai.cc \
hb-ot-shape-complex-private.hh \
hb-ot-shape-normalize-private.hh \
hb-ot-shape-normalize.cc \
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
new file mode 100644
index 0000000..7645e22
--- /dev/null
+++ b/src/hb-ot-shape-complex-default.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+/* TODO Add kana, and other small shapers here */
+
+
+/* The default shaper *only* adds additional per-script features.*/
+
+static const hb_tag_t hangul_features[] =
+{
+ HB_TAG('l','j','m','o'),
+ HB_TAG('v','j','m','o'),
+ HB_TAG('t','j','m','o'),
+ HB_TAG_NONE
+};
+
+static const hb_tag_t tibetan_features[] =
+{
+ HB_TAG('a','b','v','s'),
+ HB_TAG('b','l','w','s'),
+ HB_TAG('a','b','v','m'),
+ HB_TAG('b','l','w','m'),
+ HB_TAG_NONE
+};
+
+static void
+collect_features_default (hb_ot_shape_planner_t *plan)
+{
+ const hb_tag_t *script_features = NULL;
+
+ switch ((hb_tag_t) plan->props.script)
+ {
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_HANGUL:
+ script_features = hangul_features;
+ break;
+
+ /* Unicode-2.0 additions */
+ case HB_SCRIPT_TIBETAN:
+ script_features = tibetan_features;
+ break;
+ }
+
+ for (; script_features && *script_features; script_features++)
+ plan->map.add_bool_feature (*script_features);
+}
+
+static hb_ot_shape_normalization_mode_t
+normalization_preference_default (const hb_segment_properties_t *props)
+{
+ switch ((hb_tag_t) props->script)
+ {
+ /* Unicode-1.1 additions */
+ case HB_SCRIPT_HANGUL:
+ return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
+ }
+ return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
+}
+
+static hb_bool_t
+compose_default (hb_unicode_funcs_t *unicode,
+ hb_codepoint_t a,
+ hb_codepoint_t b,
+ hb_codepoint_t *ab)
+{
+ /* Hebrew presentation-form shaping.
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
+ // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
+ // note that some letters do not have a dagesh presForm encoded
+ static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
+ 0xFB30, // ALEF
+ 0xFB31, // BET
+ 0xFB32, // GIMEL
+ 0xFB33, // DALET
+ 0xFB34, // HE
+ 0xFB35, // VAV
+ 0xFB36, // ZAYIN
+ 0, // HET
+ 0xFB38, // TET
+ 0xFB39, // YOD
+ 0xFB3A, // FINAL KAF
+ 0xFB3B, // KAF
+ 0xFB3C, // LAMED
+ 0, // FINAL MEM
+ 0xFB3E, // MEM
+ 0, // FINAL NUN
+ 0xFB40, // NUN
+ 0xFB41, // SAMEKH
+ 0, // AYIN
+ 0xFB43, // FINAL PE
+ 0xFB44, // PE
+ 0, // FINAL TSADI
+ 0xFB46, // TSADI
+ 0xFB47, // QOF
+ 0xFB48, // RESH
+ 0xFB49, // SHIN
+ 0xFB4A // TAV
+ };
+
+ hb_bool_t found = unicode->compose (a, b, ab);
+
+ if (!found && (b & ~0x7F) == 0x0580) {
+ // special-case Hebrew presentation forms that are excluded from
+ // standard normalization, but wanted for old fonts
+ switch (b) {
+ case 0x05B4: // HIRIQ
+ if (a == 0x05D9) { // YOD
+ *ab = 0xFB1D;
+ found = true;
+ }
+ break;
+ case 0x05B7: // patah
+ if (a == 0x05F2) { // YIDDISH YOD YOD
+ *ab = 0xFB1F;
+ found = true;
+ } else if (a == 0x05D0) { // ALEF
+ *ab = 0xFB2E;
+ found = true;
+ }
+ break;
+ case 0x05B8: // QAMATS
+ if (a == 0x05D0) { // ALEF
+ *ab = 0xFB2F;
+ found = true;
+ }
+ break;
+ case 0x05B9: // HOLAM
+ if (a == 0x05D5) { // VAV
+ *ab = 0xFB4B;
+ found = true;
+ }
+ break;
+ case 0x05BC: // DAGESH
+ if (a >= 0x05D0 && a <= 0x05EA) {
+ *ab = sDageshForms[a - 0x05D0];
+ found = (*ab != 0);
+ } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
+ *ab = 0xFB2C;
+ found = true;
+ } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
+ *ab = 0xFB2D;
+ found = true;
+ }
+ break;
+ case 0x05BF: // RAFE
+ switch (a) {
+ case 0x05D1: // BET
+ *ab = 0xFB4C;
+ found = true;
+ break;
+ case 0x05DB: // KAF
+ *ab = 0xFB4D;
+ found = true;
+ break;
+ case 0x05E4: // PE
+ *ab = 0xFB4E;
+ found = true;
+ break;
+ }
+ break;
+ case 0x05C1: // SHIN DOT
+ if (a == 0x05E9) { // SHIN
+ *ab = 0xFB2A;
+ found = true;
+ } else if (a == 0xFB49) { // SHIN WITH DAGESH
+ *ab = 0xFB2C;
+ found = true;
+ }
+ break;
+ case 0x05C2: // SIN DOT
+ if (a == 0x05E9) { // SHIN
+ *ab = 0xFB2B;
+ found = true;
+ } else if (a == 0xFB49) { // SHIN WITH DAGESH
+ *ab = 0xFB2D;
+ found = true;
+ }
+ break;
+ }
+ }
+
+ return found;
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
+{
+ "default",
+ collect_features_default,
+ NULL, /* override_features */
+ NULL, /* data_create */
+ NULL, /* data_destroy */
+ NULL, /* preprocess_text */
+ normalization_preference_default,
+ NULL, /* decompose */
+ compose_default,
+ NULL, /* setup_masks */
+ true, /* zero_width_attached_marks */
+ true, /* fallback_position */
+};
diff --git a/src/hb-ot-shape-complex-misc.cc b/src/hb-ot-shape-complex-misc.cc
deleted file mode 100644
index afe5f70..0000000
--- a/src/hb-ot-shape-complex-misc.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright © 2010,2012 Google, Inc.
- *
- * This is part of HarfBuzz, a text shaping library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- *
- * Google Author(s): Behdad Esfahbod
- */
-
-#include "hb-ot-shape-complex-private.hh"
-
-
-/* TODO Add kana, and other small shapers here */
-
-
-/* The default shaper *only* adds additional per-script features.*/
-
-static const hb_tag_t hangul_features[] =
-{
- HB_TAG('l','j','m','o'),
- HB_TAG('v','j','m','o'),
- HB_TAG('t','j','m','o'),
- HB_TAG_NONE
-};
-
-static const hb_tag_t tibetan_features[] =
-{
- HB_TAG('a','b','v','s'),
- HB_TAG('b','l','w','s'),
- HB_TAG('a','b','v','m'),
- HB_TAG('b','l','w','m'),
- HB_TAG_NONE
-};
-
-static void
-collect_features_default (hb_ot_shape_planner_t *plan)
-{
- const hb_tag_t *script_features = NULL;
-
- switch ((hb_tag_t) plan->props.script)
- {
- /* Unicode-1.1 additions */
- case HB_SCRIPT_HANGUL:
- script_features = hangul_features;
- break;
-
- /* Unicode-2.0 additions */
- case HB_SCRIPT_TIBETAN:
- script_features = tibetan_features;
- break;
- }
-
- for (; script_features && *script_features; script_features++)
- plan->map.add_bool_feature (*script_features);
-}
-
-static hb_ot_shape_normalization_mode_t
-normalization_preference_default (const hb_segment_properties_t *props)
-{
- switch ((hb_tag_t) props->script)
- {
- /* Unicode-1.1 additions */
- case HB_SCRIPT_HANGUL:
- return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
- }
- return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
-}
-
-static hb_bool_t
-compose_default (hb_unicode_funcs_t *unicode,
- hb_codepoint_t a,
- hb_codepoint_t b,
- hb_codepoint_t *ab)
-{
- /* Hebrew presentation-form shaping.
- * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
- // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
- // note that some letters do not have a dagesh presForm encoded
- static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
- 0xFB30, // ALEF
- 0xFB31, // BET
- 0xFB32, // GIMEL
- 0xFB33, // DALET
- 0xFB34, // HE
- 0xFB35, // VAV
- 0xFB36, // ZAYIN
- 0, // HET
- 0xFB38, // TET
- 0xFB39, // YOD
- 0xFB3A, // FINAL KAF
- 0xFB3B, // KAF
- 0xFB3C, // LAMED
- 0, // FINAL MEM
- 0xFB3E, // MEM
- 0, // FINAL NUN
- 0xFB40, // NUN
- 0xFB41, // SAMEKH
- 0, // AYIN
- 0xFB43, // FINAL PE
- 0xFB44, // PE
- 0, // FINAL TSADI
- 0xFB46, // TSADI
- 0xFB47, // QOF
- 0xFB48, // RESH
- 0xFB49, // SHIN
- 0xFB4A // TAV
- };
-
- hb_bool_t found = unicode->compose (a, b, ab);
-
- if (!found && (b & ~0x7F) == 0x0580) {
- // special-case Hebrew presentation forms that are excluded from
- // standard normalization, but wanted for old fonts
- switch (b) {
- case 0x05B4: // HIRIQ
- if (a == 0x05D9) { // YOD
- *ab = 0xFB1D;
- found = true;
- }
- break;
- case 0x05B7: // patah
- if (a == 0x05F2) { // YIDDISH YOD YOD
- *ab = 0xFB1F;
- found = true;
- } else if (a == 0x05D0) { // ALEF
- *ab = 0xFB2E;
- found = true;
- }
- break;
- case 0x05B8: // QAMATS
- if (a == 0x05D0) { // ALEF
- *ab = 0xFB2F;
- found = true;
- }
- break;
- case 0x05B9: // HOLAM
- if (a == 0x05D5) { // VAV
- *ab = 0xFB4B;
- found = true;
- }
- break;
- case 0x05BC: // DAGESH
- if (a >= 0x05D0 && a <= 0x05EA) {
- *ab = sDageshForms[a - 0x05D0];
- found = (*ab != 0);
- } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
- *ab = 0xFB2C;
- found = true;
- } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
- *ab = 0xFB2D;
- found = true;
- }
- break;
- case 0x05BF: // RAFE
- switch (a) {
- case 0x05D1: // BET
- *ab = 0xFB4C;
- found = true;
- break;
- case 0x05DB: // KAF
- *ab = 0xFB4D;
- found = true;
- break;
- case 0x05E4: // PE
- *ab = 0xFB4E;
- found = true;
- break;
- }
- break;
- case 0x05C1: // SHIN DOT
- if (a == 0x05E9) { // SHIN
- *ab = 0xFB2A;
- found = true;
- } else if (a == 0xFB49) { // SHIN WITH DAGESH
- *ab = 0xFB2C;
- found = true;
- }
- break;
- case 0x05C2: // SIN DOT
- if (a == 0x05E9) { // SHIN
- *ab = 0xFB2B;
- found = true;
- } else if (a == 0xFB49) { // SHIN WITH DAGESH
- *ab = 0xFB2D;
- found = true;
- }
- break;
- }
- }
-
- return found;
-}
-
-const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
-{
- "default",
- collect_features_default,
- NULL, /* override_features */
- NULL, /* data_create */
- NULL, /* data_destroy */
- NULL, /* preprocess_text */
- normalization_preference_default,
- NULL, /* decompose */
- compose_default,
- NULL, /* setup_masks */
- true, /* zero_width_attached_marks */
- true, /* fallback_position */
-};
-
-
-/* Thai / Lao shaper */
-
-static void
-preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
- hb_buffer_t *buffer,
- hb_font_t *font HB_UNUSED)
-{
- /* The following is NOT specified in the MS OT Thai spec, however, it seems
- * to be what Uniscribe and other engines implement. According to Eric Muller:
- *
- * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
- * NIKHAHIT backwards over any tone mark (0E48-0E4B).
- *
- * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
- *
- * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
- * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
- * not what a user wanted, but the rendering is nevertheless nikhahit above
- * chattawa.
- *
- * Same for Lao.
- *
- * Note:
- *
- * Uniscribe also does so below-marks reordering. Namely, it positions U+0E3A
- * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.
- * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A
- * equivalent.
- */
-
-
- /*
- * Here are the characters of significance:
- *
- * Thai Lao
- * SARA AM: U+0E33 U+0EB3
- * SARA AA: U+0E32 U+0EB2
- * Nikhahit: U+0E4D U+0ECD
- *
- * Testing shows that Uniscribe reorder the following marks:
- * Thai: <0E31,0E34..0E37,0E47..0E4E>
- * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE>
- *
- * Note how the Lao versions are the same as Thai + 0x80.
- */
-
- /* We only get one script at a time, so a script-agnostic implementation
- * is adequate here. */
-#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)
-#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)
-#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
-#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080, 0x0E34, 0x0E37, 0x0E47, 0x0E4E, 0x0E31, 0x0E31))
-
- buffer->clear_output ();
- unsigned int count = buffer->len;
- for (buffer->idx = 0; buffer->idx < count;)
- {
- hb_codepoint_t u = buffer->cur().codepoint;
- if (likely (!IS_SARA_AM (u))) {
- buffer->next_glyph ();
- continue;
- }
-
- /* Is SARA AM. Decompose and reorder. */
- hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),
- hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};
- buffer->replace_glyphs (1, 2, decomposed);
- if (unlikely (buffer->in_error))
- return;
-
- /* Ok, let's see... */
- unsigned int end = buffer->out_len;
- unsigned int start = end - 2;
- while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
- start--;
-
- if (start + 2 < end)
- {
- /* Move Nikhahit (end-2) to the beginning */
- buffer->merge_out_clusters (start, end);
- hb_glyph_info_t t = buffer->out_info[end - 2];
- memmove (buffer->out_info + start + 1,
- buffer->out_info + start,
- sizeof (buffer->out_info[0]) * (end - start - 2));
- buffer->out_info[start] = t;
- }
- else
- {
- /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
- * previous cluster. */
- if (start)
- buffer->merge_out_clusters (start - 1, end);
- }
- }
- buffer->swap_buffers ();
-}
-
-const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
-{
- "thai",
- NULL, /* collect_features */
- NULL, /* override_features */
- NULL, /* data_create */
- NULL, /* data_destroy */
- preprocess_text_thai,
- NULL, /* normalization_preference */
- NULL, /* decompose */
- NULL, /* compose */
- NULL, /* setup_masks */
- true, /* zero_width_attached_marks */
- false,/* fallback_position */
-};
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
new file mode 100644
index 0000000..d6f4669
--- /dev/null
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright © 2010,2012 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+/* Thai / Lao shaper */
+
+static void
+preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
+ hb_buffer_t *buffer,
+ hb_font_t *font HB_UNUSED)
+{
+ /* The following is NOT specified in the MS OT Thai spec, however, it seems
+ * to be what Uniscribe and other engines implement. According to Eric Muller:
+ *
+ * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
+ * NIKHAHIT backwards over any tone mark (0E48-0E4B).
+ *
+ * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
+ *
+ * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
+ * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
+ * not what a user wanted, but the rendering is nevertheless nikhahit above
+ * chattawa.
+ *
+ * Same for Lao.
+ *
+ * Note:
+ *
+ * Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A
+ * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.
+ * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A
+ * equivalent.
+ */
+
+
+ /*
+ * Here are the characters of significance:
+ *
+ * Thai Lao
+ * SARA AM: U+0E33 U+0EB3
+ * SARA AA: U+0E32 U+0EB2
+ * Nikhahit: U+0E4D U+0ECD
+ *
+ * Testing shows that Uniscribe reorder the following marks:
+ * Thai: <0E31,0E34..0E37,0E47..0E4E>
+ * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE>
+ *
+ * Note how the Lao versions are the same as Thai + 0x80.
+ */
+
+ /* We only get one script at a time, so a script-agnostic implementation
+ * is adequate here. */
+#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)
+#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)
+#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
+#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080, 0x0E34, 0x0E37, 0x0E47, 0x0E4E, 0x0E31, 0x0E31))
+
+ buffer->clear_output ();
+ unsigned int count = buffer->len;
+ for (buffer->idx = 0; buffer->idx < count;)
+ {
+ hb_codepoint_t u = buffer->cur().codepoint;
+ if (likely (!IS_SARA_AM (u))) {
+ buffer->next_glyph ();
+ continue;
+ }
+
+ /* Is SARA AM. Decompose and reorder. */
+ hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),
+ hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};
+ buffer->replace_glyphs (1, 2, decomposed);
+ if (unlikely (buffer->in_error))
+ return;
+
+ /* Ok, let's see... */
+ unsigned int end = buffer->out_len;
+ unsigned int start = end - 2;
+ while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
+ start--;
+
+ if (start + 2 < end)
+ {
+ /* Move Nikhahit (end-2) to the beginning */
+ buffer->merge_out_clusters (start, end);
+ hb_glyph_info_t t = buffer->out_info[end - 2];
+ memmove (buffer->out_info + start + 1,
+ buffer->out_info + start,
+ sizeof (buffer->out_info[0]) * (end - start - 2));
+ buffer->out_info[start] = t;
+ }
+ else
+ {
+ /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
+ * previous cluster. */
+ if (start)
+ buffer->merge_out_clusters (start - 1, end);
+ }
+ }
+ buffer->swap_buffers ();
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
+{
+ "thai",
+ NULL, /* collect_features */
+ NULL, /* override_features */
+ NULL, /* data_create */
+ NULL, /* data_destroy */
+ preprocess_text_thai,
+ NULL, /* normalization_preference */
+ NULL, /* decompose */
+ NULL, /* compose */
+ NULL, /* setup_masks */
+ true, /* zero_width_attached_marks */
+ false,/* fallback_position */
+};
commit ba82325b7a6311b787ae47f41a56964e2f2cba9f
Author: Behdad Esfahbod <behdad at behdad.org>
Date: Wed Nov 14 15:36:53 2012 -0800
Add note re 'Phags-pa letter U+A872, which is Joining_Type=L
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index ea2a68f..e9db005 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -81,6 +81,7 @@ static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_categ
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xA840, 0xA872)))
{
if (unlikely (u == 0xA872))
+ /* XXX Looks like this should be TYPE_L, but we don't support that yet! */
return JOINING_TYPE_R;
return JOINING_TYPE_D;
More information about the HarfBuzz
mailing list