[HarfBuzz] harfbuzz: Branch 'master' - 5 commits

Wed Nov 14 17:53:22 PST 2012

src/Makefile.am                                                         |    3 
 src/hb-ot-map-private.hh                                                |    8 
 src/hb-ot-map.cc                                                        |    6 
 src/hb-ot-shape-complex-arabic.cc                                       |    1 
 src/hb-ot-shape-complex-default.cc                                      |  225 +++++
 src/hb-ot-shape-complex-indic.cc                                        |    2 
 src/hb-ot-shape-complex-misc.cc                                         |  339 --------
 src/hb-ot-shape-complex-private.hh                                      |   13 
 src/hb-ot-shape-complex-thai.cc                                         |  378 ++++++++++
 test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST        |    1 
 test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt |   11 
 11 files changed, 635 insertions(+), 352 deletions(-)

New commits:
commit 1eb3e94fe99a072ce422e60ac4d4d89ef489b08a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 14 17:25:03 2012 -0800

    [Thai] Implement PUA-based fallback shaping
    
    As explained here:
    
      http://linux.thai.net/~thep/th-otf/shaping.html
    
    Our output now matches Uniscribe for old fonts (eg. XP Tahoma) with no
    Thai GSUB table.

diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index 0736eca..87fe521 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -29,13 +29,230 @@
 
 /* Thai / Lao shaper */
 
+
+/* PUA shaping */
+
+
+enum thai_consonant_type_t
+{
+  NC,
+  AC,
+  RC,
+  DC,
+  NOT_CONSONANT,
+  NUM_CONSONANT_TYPES = NOT_CONSONANT
+};
+
+static thai_consonant_type_t
+get_consonant_type (hb_codepoint_t u)
+{
+  if (u == 0x0E1B || u == 0x0E1D || u == 0x0E1F || u == 0x0E2C)
+    return AC;
+  if (u == 0x0E0D || u == 0x0E10)
+    return RC;
+  if (u == 0x0E0E || u == 0x0E0F)
+    return DC;
+  if (hb_in_range<hb_codepoint_t> (u, 0x0E01, 0x0E2E))
+    return NC;
+  return NOT_CONSONANT;
+}
+
+
+enum thai_mark_type_t
+{
+  AV,
+  BV,
+  T,
+  NOT_MARK,
+  NUM_MARK_TYPES = NOT_MARK
+};
+
+static thai_mark_type_t
+get_mark_type (hb_codepoint_t u)
+{
+  if (u == 0x0E31 || hb_in_range<hb_codepoint_t> (u, 0x0E34, 0x0E37) ||
+      u == 0x0E47 || hb_in_range<hb_codepoint_t> (u, 0x0E4D, 0x0E4E))
+    return AV;
+  if (hb_in_range<hb_codepoint_t> (u, 0x0E38, 0x0E3A))
+    return BV;
+  if (hb_in_range<hb_codepoint_t> (u, 0x0E48, 0x0E4C))
+    return T;
+  return NOT_MARK;
+}
+
+
+enum thai_action_t
+{
+  NOP,
+  SD,  /* Shift combining-mark down */
+  SL,  /* Shift combining-mark left */
+  SDL, /* Shift combining-mark down-left */
+  RD   /* Remove descender from base */
+};
+
+static hb_codepoint_t
+thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font)
+{
+  struct thai_pua_mapping_t {
+    hb_codepoint_t u;
+    hb_codepoint_t win_pua;
+    hb_codepoint_t mac_pua;
+  } const *pua_mappings = NULL;
+  static const thai_pua_mapping_t SD_mappings[] = {
+    {0x0E48, 0xF70A, 0xF88B}, /* MAI EK */
+    {0x0E49, 0xF70B, 0xF88E}, /* MAI THO */
+    {0x0E4A, 0xF70C, 0xF891}, /* MAI TRI */
+    {0x0E4B, 0xF70D, 0xF894}, /* MAI CHATTAWA */
+    {0x0E4C, 0xF70E, 0xF897}, /* THANTHAKHAT */
+    {0x0E38, 0xF718, 0xF89B}, /* SARA U */
+    {0x0E39, 0xF719, 0xF89C}, /* SARA UU */
+    {0x0E3A, 0xF71A, 0xF89D}, /* PHINTHU */
+    {0x0000, 0x0000, 0x0000}
+  };
+  static const thai_pua_mapping_t SDL_mappings[] = {
+    {0x0E48, 0xF705, 0xF88C}, /* MAI EK */
+    {0x0E49, 0xF706, 0xF88F}, /* MAI THO */
+    {0x0E4A, 0xF707, 0xF892}, /* MAI TRI */
+    {0x0E4B, 0xF708, 0xF895}, /* MAI CHATTAWA */
+    {0x0E4C, 0xF709, 0xF898}, /* THANTHAKHAT */
+    {0x0000, 0x0000, 0x0000}
+  };
+  static const thai_pua_mapping_t SL_mappings[] = {
+    {0x0E48, 0xF713, 0xF88A}, /* MAI EK */
+    {0x0E49, 0xF714, 0xF88D}, /* MAI THO */
+    {0x0E4A, 0xF715, 0xF890}, /* MAI TRI */
+    {0x0E4B, 0xF716, 0xF893}, /* MAI CHATTAWA */
+    {0x0E4C, 0xF717, 0xF896}, /* THANTHAKHAT */
+    {0x0E31, 0xF710, 0xF884}, /* MAI HAN-AKAT */
+    {0x0E34, 0xF701, 0xF885}, /* SARA I */
+    {0x0E35, 0xF702, 0xF886}, /* SARA II */
+    {0x0E36, 0xF703, 0xF887}, /* SARA UE */
+    {0x0E37, 0xF704, 0xF888}, /* SARA UEE */
+    {0x0E47, 0xF712, 0xF889}, /* MAITAIKHU */
+    {0x0E4D, 0xF711, 0xF899}, /* NIKHAHIT */
+    {0x0000, 0x0000, 0x0000}
+  };
+  static const thai_pua_mapping_t RD_mappings[] = {
+    {0x0E0D, 0xF70F, 0xF89A}, /* YO YING */
+    {0x0E10, 0xF700, 0xF89E}, /* THO THAN */
+    {0x0000, 0x0000, 0x0000}
+  };
+
+  switch (action) {
+    default: assert (false); /* Fallthrough */
+    case NOP: return u;
+    case SD:  pua_mappings = SD_mappings; break;
+    case SDL: pua_mappings = SDL_mappings; break;
+    case SL:  pua_mappings = SL_mappings; break;
+    case RD:  pua_mappings = RD_mappings; break;
+  }
+  for (; pua_mappings->u; pua_mappings++)
+    if (pua_mappings->u == u)
+    {
+      hb_codepoint_t glyph;
+      if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph))
+	return pua_mappings->win_pua;
+      if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph))
+	return pua_mappings->mac_pua;
+      break;
+    }
+  return u;
+}
+
+
+static enum thai_above_state_t
+{     /* Cluster above looks like: */
+  T0, /*  â£¤                      */
+  T1, /*     â£¼                   */
+  T2, /*        â£¾                */
+  T3, /*           â£¿             */
+  NUM_ABOVE_STATES
+} thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
+{
+  T0, /* NC */
+  T1, /* AC */
+  T0, /* RC */
+  T0, /* DC */
+  T3, /* NOT_CONSONANT */
+};
+
+static const struct thai_above_state_machine_edge_t {
+  thai_action_t action;
+  thai_above_state_t next_state;
+} thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] =
+{        /*AV*/    /*BV*/    /*T*/
+/*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}},
+/*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}},
+/*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}},
+/*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}},
+};
+
+
+static enum thai_below_state_t
+{
+  B0, /* No descender */
+  B1, /* Removable descender */
+  B2, /* Strict descender */
+  NUM_BELOW_STATES
+} thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
+{
+  B0, /* NC */
+  B0, /* AC */
+  B1, /* RC */
+  B2, /* DC */
+  B2, /* NOT_CONSONANT */
+};
+
+static const struct thai_below_state_machine_edge_t {
+  thai_action_t action;
+  thai_below_state_t next_state;
+} thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] =
+{        /*AV*/    /*BV*/    /*T*/
+/*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}},
+/*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}},
+/*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}},
+};
+
+
 static void
 do_thai_pua_shaping (const hb_ot_shape_plan_t *plan,
 		     hb_buffer_t              *buffer,
 		     hb_font_t                *font)
 {
+  thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT];
+  thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT];
+  unsigned int base = 0;
+
+  hb_glyph_info_t *info = buffer->info;
+  unsigned int count = buffer->len;
+  for (unsigned int i = 0; i < count; i++)
+  {
+    thai_mark_type_t mt = get_mark_type (info[i].codepoint);
+
+    if (mt == NOT_MARK) {
+      thai_consonant_type_t ct = get_consonant_type (info[i].codepoint);
+      above_state = thai_above_start_state[ct];
+      below_state = thai_below_start_state[ct];
+      base = i;
+      continue;
+    }
+
+    const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt];
+    const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt];
+    above_state = above_edge.next_state;
+    below_state = below_edge.next_state;
+
+    /* At least one of the above/below actions is NOP. */
+    thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action;
+
+    if (action == RD)
+      info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font);
+    else
+      info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font);
+  }
 }
 
+
 static void
 preprocess_text_thai (const hb_ot_shape_plan_t *plan,
 		      hb_buffer_t              *buffer,
commit 851784f8372004e0a40b698c0cdc2d7db8629aa2
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 14 16:24:05 2012 -0800

    Improve shaper selection

diff --git a/src/hb-ot-map-private.hh b/src/hb-ot-map-private.hh
index 11d97e1..c7dbea5 100644
--- a/src/hb-ot-map-private.hh
+++ b/src/hb-ot-map-private.hh
@@ -115,9 +115,6 @@ struct hb_ot_map_t
     *lookup_count = end - start;
   }
 
-  inline hb_tag_t get_chosen_script (unsigned int table_index) const
-  { return chosen_script[table_index]; }
-
   HB_INTERNAL void substitute_closure (const struct hb_ot_shape_plan_t *plan, hb_face_t *face, hb_set_t *glyphs) const;
   HB_INTERNAL void substitute (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
   HB_INTERNAL void position (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const;
@@ -130,6 +127,9 @@ struct hb_ot_map_t
     pauses[1].finish ();
   }
 
+  public:
+  hb_tag_t chosen_script[2];
+  bool found_script[2];
 
   private:
 
@@ -140,7 +140,6 @@ struct hb_ot_map_t
 
   hb_mask_t global_mask;
 
-  hb_tag_t chosen_script[2];
   hb_prealloced_array_t<feature_map_t, 8> features;
   hb_prealloced_array_t<lookup_map_t, 32> lookups[2]; /* GSUB/GPOS */
   hb_prealloced_array_t<pause_map_t, 1> pauses[2]; /* GSUB/GPOS */
@@ -200,6 +199,7 @@ struct hb_ot_map_builder_t
   hb_segment_properties_t props;
 
   hb_tag_t chosen_script[2];
+  bool found_script[2];
   unsigned int script_index[2], language_index[2];
 
   private:
diff --git a/src/hb-ot-map.cc b/src/hb-ot-map.cc
index 046fa97..024b7df 100644
--- a/src/hb-ot-map.cc
+++ b/src/hb-ot-map.cc
@@ -79,7 +79,7 @@ hb_ot_map_builder_t::hb_ot_map_builder_t (hb_face_t *face_,
 
   for (unsigned int table_index = 0; table_index < 2; table_index++) {
     hb_tag_t table_tag = table_tags[table_index];
-    hb_ot_layout_table_choose_script (face, table_tag, script_tags, &script_index[table_index], &chosen_script[table_index]);
+    found_script[table_index] = hb_ot_layout_table_choose_script (face, table_tag, script_tags, &script_index[table_index], &chosen_script[table_index]);
     hb_ot_layout_script_find_language (face, table_tag, script_index[table_index], language_tag, &language_index[table_index]);
   }
 }
@@ -161,8 +161,10 @@ hb_ot_map_builder_t::compile (hb_ot_map_t &m)
 {
   m.global_mask = 1;
 
-  for (unsigned int table_index = 0; table_index < 2; table_index++)
+  for (unsigned int table_index = 0; table_index < 2; table_index++) {
     m.chosen_script[table_index] = chosen_script[table_index];
+    m.found_script[table_index] = found_script[table_index];
+  }
 
   if (!feature_infos.len)
     return;
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index a7f9b60..324a04b 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -329,7 +329,7 @@ data_create_indic (const hb_ot_shape_plan_t *plan)
       break;
     }
 
-  indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.get_chosen_script (0) & 0x000000FF) != '2');
+  indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FF) != '2');
   indic_plan->virama_glyph = (hb_codepoint_t) -1;
 
   indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'));
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 043e8e3..9f8cecd 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -278,15 +278,18 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
     case HB_SCRIPT_TAKRI:
 
       /* Only use Indic shaper if the font has Indic tables. */
-      if (planner->map.chosen_script[0] == HB_OT_TAG_DEFAULT_SCRIPT)
-	return &_hb_ot_complex_shaper_default;
-      else
+      if (planner->map.found_script[0])
 	return &_hb_ot_complex_shaper_indic;
+      else
+	return &_hb_ot_complex_shaper_default;
 
     case HB_SCRIPT_KHMER:
       /* If the font has 'liga', let the generic shaper do it. */
-      if (planner->map.chosen_script[0] == HB_OT_TAG_DEFAULT_SCRIPT ||
-	  hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB, planner->map.script_index[0], planner->map.language_index[0], HB_TAG ('l','i','g','a'), NULL))
+      if (!planner->map.found_script[0] ||
+	  hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB,
+					      planner->map.script_index[0],
+					      planner->map.language_index[0],
+					      HB_TAG ('l','i','g','a'), NULL))
 	return &_hb_ot_complex_shaper_default;
       else
 	return &_hb_ot_complex_shaper_indic;
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index d6f4669..0736eca 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -30,10 +30,26 @@
 /* Thai / Lao shaper */
 
 static void
-preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
+do_thai_pua_shaping (const hb_ot_shape_plan_t *plan,
+		     hb_buffer_t              *buffer,
+		     hb_font_t                *font)
+{
+}
+
+static void
+preprocess_text_thai (const hb_ot_shape_plan_t *plan,
 		      hb_buffer_t              *buffer,
-		      hb_font_t                *font HB_UNUSED)
+		      hb_font_t                *font)
 {
+  /* This function implements the shaping logic documented here:
+   *
+   *   http://linux.thai.net/~thep/th-otf/shaping.html
+   *
+   * The first shaping rule listed there is needed even if the font has Thai
+   * OpenType tables.  The rest do fallback positioning based on PUA codepoints.
+   * We implement that only if there exist no Thai GSUB in the font.
+   */
+
   /* The following is NOT specified in the MS OT Thai spec, however, it seems
    * to be what Uniscribe and other engines implement.  According to Eric Muller:
    *
@@ -122,6 +138,10 @@ preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
     }
   }
   buffer->swap_buffers ();
+
+  /* If font has Thai GSUB, we are done. */
+  if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0])
+    do_thai_pua_shaping (plan, buffer, font);
 }
 
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
commit f3584d3a3a627e38dfd7769975a670db340d2a48
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 14 15:55:17 2012 -0800

    Add test cases for Thai PUA shaping

diff --git a/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST
index 6aa865b..911099e 100644
--- a/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/MANIFEST
@@ -1,2 +1,3 @@
 phinthu.txt
+pua-shaping.txt
 sara-am.txt
diff --git a/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt
new file mode 100644
index 0000000..c17834b
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-thai/script-thai/misc/pua-shaping.txt
@@ -0,0 +1,11 @@
+à¸™à¸µ
+à¸™à¹ˆ
+à¸™à¸µà¹ˆ
+à¸™à¹ˆà¸µ
+à¸›à¹‡
+à¸
+à¸à¸¸
+à¸à¸´
+à¸à¸´à¹ˆ
+à¸à¹ˆ
+à¸Žà¸¹
commit 43f04a7456419153cb03e610a825056a47824780
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 14 15:51:54 2012 -0800

    Move Thai shaper into a separate file

diff --git a/src/Makefile.am b/src/Makefile.am
index 372c10f..aca8abf 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -81,11 +81,12 @@ HBSOURCES += \
 	hb-ot-shape-complex-arabic.cc \
 	hb-ot-shape-complex-arabic-fallback.hh \
 	hb-ot-shape-complex-arabic-table.hh \
+	hb-ot-shape-complex-default.cc \
 	hb-ot-shape-complex-indic.cc \
 	hb-ot-shape-complex-indic-machine.hh \
 	hb-ot-shape-complex-indic-private.hh \
 	hb-ot-shape-complex-indic-table.hh \
-	hb-ot-shape-complex-misc.cc \
+	hb-ot-shape-complex-thai.cc \
 	hb-ot-shape-complex-private.hh \
 	hb-ot-shape-normalize-private.hh \
 	hb-ot-shape-normalize.cc \
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
new file mode 100644
index 0000000..7645e22
--- /dev/null
+++ b/src/hb-ot-shape-complex-default.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright Â© 2010,2012  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+/* TODO Add kana, and other small shapers here */
+
+
+/* The default shaper *only* adds additional per-script features.*/
+
+static const hb_tag_t hangul_features[] =
+{
+  HB_TAG('l','j','m','o'),
+  HB_TAG('v','j','m','o'),
+  HB_TAG('t','j','m','o'),
+  HB_TAG_NONE
+};
+
+static const hb_tag_t tibetan_features[] =
+{
+  HB_TAG('a','b','v','s'),
+  HB_TAG('b','l','w','s'),
+  HB_TAG('a','b','v','m'),
+  HB_TAG('b','l','w','m'),
+  HB_TAG_NONE
+};
+
+static void
+collect_features_default (hb_ot_shape_planner_t *plan)
+{
+  const hb_tag_t *script_features = NULL;
+
+  switch ((hb_tag_t) plan->props.script)
+  {
+    /* Unicode-1.1 additions */
+    case HB_SCRIPT_HANGUL:
+      script_features = hangul_features;
+      break;
+
+    /* Unicode-2.0 additions */
+    case HB_SCRIPT_TIBETAN:
+      script_features = tibetan_features;
+      break;
+  }
+
+  for (; script_features && *script_features; script_features++)
+    plan->map.add_bool_feature (*script_features);
+}
+
+static hb_ot_shape_normalization_mode_t
+normalization_preference_default (const hb_segment_properties_t *props)
+{
+  switch ((hb_tag_t) props->script)
+  {
+    /* Unicode-1.1 additions */
+    case HB_SCRIPT_HANGUL:
+      return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
+  }
+  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
+}
+
+static hb_bool_t
+compose_default (hb_unicode_funcs_t *unicode,
+		 hb_codepoint_t  a,
+		 hb_codepoint_t  b,
+		 hb_codepoint_t *ab)
+{
+  /* Hebrew presentation-form shaping.
+   * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
+  // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
+  // note that some letters do not have a dagesh presForm encoded
+  static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
+    0xFB30, // ALEF
+    0xFB31, // BET
+    0xFB32, // GIMEL
+    0xFB33, // DALET
+    0xFB34, // HE
+    0xFB35, // VAV
+    0xFB36, // ZAYIN
+    0, // HET
+    0xFB38, // TET
+    0xFB39, // YOD
+    0xFB3A, // FINAL KAF
+    0xFB3B, // KAF
+    0xFB3C, // LAMED
+    0, // FINAL MEM
+    0xFB3E, // MEM
+    0, // FINAL NUN
+    0xFB40, // NUN
+    0xFB41, // SAMEKH
+    0, // AYIN
+    0xFB43, // FINAL PE
+    0xFB44, // PE
+    0, // FINAL TSADI
+    0xFB46, // TSADI
+    0xFB47, // QOF
+    0xFB48, // RESH
+    0xFB49, // SHIN
+    0xFB4A // TAV
+  };
+
+  hb_bool_t found = unicode->compose (a, b, ab);
+
+  if (!found && (b & ~0x7F) == 0x0580) {
+      // special-case Hebrew presentation forms that are excluded from
+      // standard normalization, but wanted for old fonts
+      switch (b) {
+      case 0x05B4: // HIRIQ
+	  if (a == 0x05D9) { // YOD
+	      *ab = 0xFB1D;
+	      found = true;
+	  }
+	  break;
+      case 0x05B7: // patah
+	  if (a == 0x05F2) { // YIDDISH YOD YOD
+	      *ab = 0xFB1F;
+	      found = true;
+	  } else if (a == 0x05D0) { // ALEF
+	      *ab = 0xFB2E;
+	      found = true;
+	  }
+	  break;
+      case 0x05B8: // QAMATS
+	  if (a == 0x05D0) { // ALEF
+	      *ab = 0xFB2F;
+	      found = true;
+	  }
+	  break;
+      case 0x05B9: // HOLAM
+	  if (a == 0x05D5) { // VAV
+	      *ab = 0xFB4B;
+	      found = true;
+	  }
+	  break;
+      case 0x05BC: // DAGESH
+	  if (a >= 0x05D0 && a <= 0x05EA) {
+	      *ab = sDageshForms[a - 0x05D0];
+	      found = (*ab != 0);
+	  } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
+	      *ab = 0xFB2C;
+	      found = true;
+	  } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
+	      *ab = 0xFB2D;
+	      found = true;
+	  }
+	  break;
+      case 0x05BF: // RAFE
+	  switch (a) {
+	  case 0x05D1: // BET
+	      *ab = 0xFB4C;
+	      found = true;
+	      break;
+	  case 0x05DB: // KAF
+	      *ab = 0xFB4D;
+	      found = true;
+	      break;
+	  case 0x05E4: // PE
+	      *ab = 0xFB4E;
+	      found = true;
+	      break;
+	  }
+	  break;
+      case 0x05C1: // SHIN DOT
+	  if (a == 0x05E9) { // SHIN
+	      *ab = 0xFB2A;
+	      found = true;
+	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
+	      *ab = 0xFB2C;
+	      found = true;
+	  }
+	  break;
+      case 0x05C2: // SIN DOT
+	  if (a == 0x05E9) { // SHIN
+	      *ab = 0xFB2B;
+	      found = true;
+	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
+	      *ab = 0xFB2D;
+	      found = true;
+	  }
+	  break;
+      }
+  }
+
+  return found;
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
+{
+  "default",
+  collect_features_default,
+  NULL, /* override_features */
+  NULL, /* data_create */
+  NULL, /* data_destroy */
+  NULL, /* preprocess_text */
+  normalization_preference_default,
+  NULL, /* decompose */
+  compose_default,
+  NULL, /* setup_masks */
+  true, /* zero_width_attached_marks */
+  true, /* fallback_position */
+};
diff --git a/src/hb-ot-shape-complex-misc.cc b/src/hb-ot-shape-complex-misc.cc
deleted file mode 100644
index afe5f70..0000000
--- a/src/hb-ot-shape-complex-misc.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright Â© 2010,2012  Google, Inc.
- *
- *  This is part of HarfBuzz, a text shaping library.
- *
- * Permission is hereby granted, without written agreement and without
- * license or royalty fees, to use, copy, modify, and distribute this
- * software and its documentation for any purpose, provided that the
- * above copyright notice and the following two paragraphs appear in
- * all copies of this software.
- *
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
- * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
- * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
- * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- *
- * Google Author(s): Behdad Esfahbod
- */
-
-#include "hb-ot-shape-complex-private.hh"
-
-
-/* TODO Add kana, and other small shapers here */
-
-
-/* The default shaper *only* adds additional per-script features.*/
-
-static const hb_tag_t hangul_features[] =
-{
-  HB_TAG('l','j','m','o'),
-  HB_TAG('v','j','m','o'),
-  HB_TAG('t','j','m','o'),
-  HB_TAG_NONE
-};
-
-static const hb_tag_t tibetan_features[] =
-{
-  HB_TAG('a','b','v','s'),
-  HB_TAG('b','l','w','s'),
-  HB_TAG('a','b','v','m'),
-  HB_TAG('b','l','w','m'),
-  HB_TAG_NONE
-};
-
-static void
-collect_features_default (hb_ot_shape_planner_t *plan)
-{
-  const hb_tag_t *script_features = NULL;
-
-  switch ((hb_tag_t) plan->props.script)
-  {
-    /* Unicode-1.1 additions */
-    case HB_SCRIPT_HANGUL:
-      script_features = hangul_features;
-      break;
-
-    /* Unicode-2.0 additions */
-    case HB_SCRIPT_TIBETAN:
-      script_features = tibetan_features;
-      break;
-  }
-
-  for (; script_features && *script_features; script_features++)
-    plan->map.add_bool_feature (*script_features);
-}
-
-static hb_ot_shape_normalization_mode_t
-normalization_preference_default (const hb_segment_properties_t *props)
-{
-  switch ((hb_tag_t) props->script)
-  {
-    /* Unicode-1.1 additions */
-    case HB_SCRIPT_HANGUL:
-      return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
-  }
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
-}
-
-static hb_bool_t
-compose_default (hb_unicode_funcs_t *unicode,
-		 hb_codepoint_t  a,
-		 hb_codepoint_t  b,
-		 hb_codepoint_t *ab)
-{
-  /* Hebrew presentation-form shaping.
-   * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
-  // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
-  // note that some letters do not have a dagesh presForm encoded
-  static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
-    0xFB30, // ALEF
-    0xFB31, // BET
-    0xFB32, // GIMEL
-    0xFB33, // DALET
-    0xFB34, // HE
-    0xFB35, // VAV
-    0xFB36, // ZAYIN
-    0, // HET
-    0xFB38, // TET
-    0xFB39, // YOD
-    0xFB3A, // FINAL KAF
-    0xFB3B, // KAF
-    0xFB3C, // LAMED
-    0, // FINAL MEM
-    0xFB3E, // MEM
-    0, // FINAL NUN
-    0xFB40, // NUN
-    0xFB41, // SAMEKH
-    0, // AYIN
-    0xFB43, // FINAL PE
-    0xFB44, // PE
-    0, // FINAL TSADI
-    0xFB46, // TSADI
-    0xFB47, // QOF
-    0xFB48, // RESH
-    0xFB49, // SHIN
-    0xFB4A // TAV
-  };
-
-  hb_bool_t found = unicode->compose (a, b, ab);
-
-  if (!found && (b & ~0x7F) == 0x0580) {
-      // special-case Hebrew presentation forms that are excluded from
-      // standard normalization, but wanted for old fonts
-      switch (b) {
-      case 0x05B4: // HIRIQ
-	  if (a == 0x05D9) { // YOD
-	      *ab = 0xFB1D;
-	      found = true;
-	  }
-	  break;
-      case 0x05B7: // patah
-	  if (a == 0x05F2) { // YIDDISH YOD YOD
-	      *ab = 0xFB1F;
-	      found = true;
-	  } else if (a == 0x05D0) { // ALEF
-	      *ab = 0xFB2E;
-	      found = true;
-	  }
-	  break;
-      case 0x05B8: // QAMATS
-	  if (a == 0x05D0) { // ALEF
-	      *ab = 0xFB2F;
-	      found = true;
-	  }
-	  break;
-      case 0x05B9: // HOLAM
-	  if (a == 0x05D5) { // VAV
-	      *ab = 0xFB4B;
-	      found = true;
-	  }
-	  break;
-      case 0x05BC: // DAGESH
-	  if (a >= 0x05D0 && a <= 0x05EA) {
-	      *ab = sDageshForms[a - 0x05D0];
-	      found = (*ab != 0);
-	  } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
-	      *ab = 0xFB2C;
-	      found = true;
-	  } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
-	      *ab = 0xFB2D;
-	      found = true;
-	  }
-	  break;
-      case 0x05BF: // RAFE
-	  switch (a) {
-	  case 0x05D1: // BET
-	      *ab = 0xFB4C;
-	      found = true;
-	      break;
-	  case 0x05DB: // KAF
-	      *ab = 0xFB4D;
-	      found = true;
-	      break;
-	  case 0x05E4: // PE
-	      *ab = 0xFB4E;
-	      found = true;
-	      break;
-	  }
-	  break;
-      case 0x05C1: // SHIN DOT
-	  if (a == 0x05E9) { // SHIN
-	      *ab = 0xFB2A;
-	      found = true;
-	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
-	      *ab = 0xFB2C;
-	      found = true;
-	  }
-	  break;
-      case 0x05C2: // SIN DOT
-	  if (a == 0x05E9) { // SHIN
-	      *ab = 0xFB2B;
-	      found = true;
-	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
-	      *ab = 0xFB2D;
-	      found = true;
-	  }
-	  break;
-      }
-  }
-
-  return found;
-}
-
-const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
-{
-  "default",
-  collect_features_default,
-  NULL, /* override_features */
-  NULL, /* data_create */
-  NULL, /* data_destroy */
-  NULL, /* preprocess_text */
-  normalization_preference_default,
-  NULL, /* decompose */
-  compose_default,
-  NULL, /* setup_masks */
-  true, /* zero_width_attached_marks */
-  true, /* fallback_position */
-};
-
-
-/* Thai / Lao shaper */
-
-static void
-preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
-		      hb_buffer_t              *buffer,
-		      hb_font_t                *font HB_UNUSED)
-{
-  /* The following is NOT specified in the MS OT Thai spec, however, it seems
-   * to be what Uniscribe and other engines implement.  According to Eric Muller:
-   *
-   * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
-   * NIKHAHIT backwards over any tone mark (0E48-0E4B).
-   *
-   * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
-   *
-   * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
-   * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
-   * not what a user wanted, but the rendering is nevertheless nikhahit above
-   * chattawa.
-   *
-   * Same for Lao.
-   *
-   * Note:
-   *
-   * Uniscribe also does so below-marks reordering.  Namely, it positions U+0E3A
-   * after U+0E38 and U+0E39.  We do that by modifying the ccc for U+0E3A.
-   * See unicode->modified_combining_class ().  Lao does NOT have a U+0E3A
-   * equivalent.
-   */
-
-
-  /*
-   * Here are the characters of significance:
-   *
-   *			Thai	Lao
-   * SARA AM:		U+0E33	U+0EB3
-   * SARA AA:		U+0E32	U+0EB2
-   * Nikhahit:		U+0E4D	U+0ECD
-   *
-   * Testing shows that Uniscribe reorder the following marks:
-   * Thai:	<0E31,0E34..0E37,0E47..0E4E>
-   * Lao:	<0EB1,0EB4..0EB7,0EC7..0ECE>
-   *
-   * Note how the Lao versions are the same as Thai + 0x80.
-   */
-
-  /* We only get one script at a time, so a script-agnostic implementation
-   * is adequate here. */
-#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)
-#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)
-#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
-#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080, 0x0E34, 0x0E37, 0x0E47, 0x0E4E, 0x0E31, 0x0E31))
-
-  buffer->clear_output ();
-  unsigned int count = buffer->len;
-  for (buffer->idx = 0; buffer->idx < count;)
-  {
-    hb_codepoint_t u = buffer->cur().codepoint;
-    if (likely (!IS_SARA_AM (u))) {
-      buffer->next_glyph ();
-      continue;
-    }
-
-    /* Is SARA AM. Decompose and reorder. */
-    hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),
-				    hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};
-    buffer->replace_glyphs (1, 2, decomposed);
-    if (unlikely (buffer->in_error))
-      return;
-
-    /* Ok, let's see... */
-    unsigned int end = buffer->out_len;
-    unsigned int start = end - 2;
-    while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
-      start--;
-
-    if (start + 2 < end)
-    {
-      /* Move Nikhahit (end-2) to the beginning */
-      buffer->merge_out_clusters (start, end);
-      hb_glyph_info_t t = buffer->out_info[end - 2];
-      memmove (buffer->out_info + start + 1,
-	       buffer->out_info + start,
-	       sizeof (buffer->out_info[0]) * (end - start - 2));
-      buffer->out_info[start] = t;
-    }
-    else
-    {
-      /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
-       * previous cluster. */
-      if (start)
-	buffer->merge_out_clusters (start - 1, end);
-    }
-  }
-  buffer->swap_buffers ();
-}
-
-const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
-{
-  "thai",
-  NULL, /* collect_features */
-  NULL, /* override_features */
-  NULL, /* data_create */
-  NULL, /* data_destroy */
-  preprocess_text_thai,
-  NULL, /* normalization_preference */
-  NULL, /* decompose */
-  NULL, /* compose */
-  NULL, /* setup_masks */
-  true, /* zero_width_attached_marks */
-  false,/* fallback_position */
-};
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
new file mode 100644
index 0000000..d6f4669
--- /dev/null
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright Â© 2010,2012  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+/* Thai / Lao shaper */
+
+static void
+preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,
+		      hb_buffer_t              *buffer,
+		      hb_font_t                *font HB_UNUSED)
+{
+  /* The following is NOT specified in the MS OT Thai spec, however, it seems
+   * to be what Uniscribe and other engines implement.  According to Eric Muller:
+   *
+   * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
+   * NIKHAHIT backwards over any tone mark (0E48-0E4B).
+   *
+   * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
+   *
+   * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
+   * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
+   * not what a user wanted, but the rendering is nevertheless nikhahit above
+   * chattawa.
+   *
+   * Same for Lao.
+   *
+   * Note:
+   *
+   * Uniscribe also does some below-marks reordering.  Namely, it positions U+0E3A
+   * after U+0E38 and U+0E39.  We do that by modifying the ccc for U+0E3A.
+   * See unicode->modified_combining_class ().  Lao does NOT have a U+0E3A
+   * equivalent.
+   */
+
+
+  /*
+   * Here are the characters of significance:
+   *
+   *			Thai	Lao
+   * SARA AM:		U+0E33	U+0EB3
+   * SARA AA:		U+0E32	U+0EB2
+   * Nikhahit:		U+0E4D	U+0ECD
+   *
+   * Testing shows that Uniscribe reorder the following marks:
+   * Thai:	<0E31,0E34..0E37,0E47..0E4E>
+   * Lao:	<0EB1,0EB4..0EB7,0EC7..0ECE>
+   *
+   * Note how the Lao versions are the same as Thai + 0x80.
+   */
+
+  /* We only get one script at a time, so a script-agnostic implementation
+   * is adequate here. */
+#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)
+#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)
+#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
+#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080, 0x0E34, 0x0E37, 0x0E47, 0x0E4E, 0x0E31, 0x0E31))
+
+  buffer->clear_output ();
+  unsigned int count = buffer->len;
+  for (buffer->idx = 0; buffer->idx < count;)
+  {
+    hb_codepoint_t u = buffer->cur().codepoint;
+    if (likely (!IS_SARA_AM (u))) {
+      buffer->next_glyph ();
+      continue;
+    }
+
+    /* Is SARA AM. Decompose and reorder. */
+    hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),
+				    hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};
+    buffer->replace_glyphs (1, 2, decomposed);
+    if (unlikely (buffer->in_error))
+      return;
+
+    /* Ok, let's see... */
+    unsigned int end = buffer->out_len;
+    unsigned int start = end - 2;
+    while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
+      start--;
+
+    if (start + 2 < end)
+    {
+      /* Move Nikhahit (end-2) to the beginning */
+      buffer->merge_out_clusters (start, end);
+      hb_glyph_info_t t = buffer->out_info[end - 2];
+      memmove (buffer->out_info + start + 1,
+	       buffer->out_info + start,
+	       sizeof (buffer->out_info[0]) * (end - start - 2));
+      buffer->out_info[start] = t;
+    }
+    else
+    {
+      /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
+       * previous cluster. */
+      if (start)
+	buffer->merge_out_clusters (start - 1, end);
+    }
+  }
+  buffer->swap_buffers ();
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
+{
+  "thai",
+  NULL, /* collect_features */
+  NULL, /* override_features */
+  NULL, /* data_create */
+  NULL, /* data_destroy */
+  preprocess_text_thai,
+  NULL, /* normalization_preference */
+  NULL, /* decompose */
+  NULL, /* compose */
+  NULL, /* setup_masks */
+  true, /* zero_width_attached_marks */
+  false,/* fallback_position */
+};
commit ba82325b7a6311b787ae47f41a56964e2f2cba9f
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 14 15:36:53 2012 -0800

    Add note re 'Phags-pa letter U+A872, which is Joining_Type=L

diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index ea2a68f..e9db005 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -81,6 +81,7 @@ static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_categ
   if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xA840, 0xA872)))
   {
       if (unlikely (u == 0xA872))
+	/* XXX Looks like this should be TYPE_L, but we don't support that yet! */
 	return JOINING_TYPE_R;
 
       return JOINING_TYPE_D;