[HarfBuzz] harfbuzz: Branch 'master' - 4 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Tue Dec 31 01:42:25 PST 2013


 src/Makefile.am                    |    3 
 src/hb-ot-shape-complex-arabic.cc  |    2 
 src/hb-ot-shape-complex-default.cc |  184 -----------------------------
 src/hb-ot-shape-complex-hangul.cc  |  232 +++++++++++++++++++++++++++++++++++++
 src/hb-ot-shape-complex-hebrew.cc  |  172 +++++++++++++++++++++++++++
 src/hb-ot-shape-complex-indic.cc   |    8 -
 src/hb-ot-shape-complex-myanmar.cc |    9 -
 src/hb-ot-shape-complex-private.hh |   38 +++---
 src/hb-ot-shape-complex-sea.cc     |    9 -
 src/hb-ot-shape-complex-thai.cc    |    4 
 src/hb-ot-shape-complex-tibetan.cc |   61 +++++++++
 src/hb-ot-shape-normalize.cc       |    4 
 src/hb-private.hh                  |    6 
 13 files changed, 504 insertions(+), 228 deletions(-)

New commits:
commit f14bb7de631b20e2868fb62e5311cd0d9e24bb49
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Dec 31 16:49:15 2013 +0800

    [ot] Separate out hebrew and tibetan shapers from default
    
    Now default shaper is truly no-op.

diff --git a/src/Makefile.am b/src/Makefile.am
index 62544db..c650b11 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -93,6 +93,7 @@ HBSOURCES += \
 	hb-ot-shape-complex-arabic-table.hh \
 	hb-ot-shape-complex-default.cc \
 	hb-ot-shape-complex-hangul.cc \
+	hb-ot-shape-complex-hebrew.cc \
 	hb-ot-shape-complex-indic.cc \
 	hb-ot-shape-complex-indic-machine.hh \
 	hb-ot-shape-complex-indic-private.hh \
@@ -102,6 +103,7 @@ HBSOURCES += \
 	hb-ot-shape-complex-sea.cc \
 	hb-ot-shape-complex-sea-machine.hh \
 	hb-ot-shape-complex-thai.cc \
+	hb-ot-shape-complex-tibetan.cc \
 	hb-ot-shape-complex-private.hh \
 	hb-ot-shape-normalize-private.hh \
 	hb-ot-shape-normalize.cc \
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index aca01bd..f7f097e 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -27,174 +27,17 @@
 #include "hb-ot-shape-complex-private.hh"
 
 
-/* TODO Add kana, and other small shapers here */
-
-
-/* The default shaper *only* adds additional per-script features.*/
-
-static const hb_tag_t tibetan_features[] =
-{
-  HB_TAG('a','b','v','s'),
-  HB_TAG('b','l','w','s'),
-  HB_TAG('a','b','v','m'),
-  HB_TAG('b','l','w','m'),
-  HB_TAG_NONE
-};
-
-static void
-collect_features_default (hb_ot_shape_planner_t *plan)
-{
-  const hb_tag_t *script_features = NULL;
-
-  switch ((hb_tag_t) plan->props.script)
-  {
-    /* Unicode-2.0 additions */
-    case HB_SCRIPT_TIBETAN:
-      script_features = tibetan_features;
-      break;
-  }
-
-  for (; script_features && *script_features; script_features++)
-    plan->map.add_global_bool_feature (*script_features);
-}
-
-static bool
-compose_default (const hb_ot_shape_normalize_context_t *c,
-		 hb_codepoint_t  a,
-		 hb_codepoint_t  b,
-		 hb_codepoint_t *ab)
-{
-  /* Hebrew presentation-form shaping.
-   * https://bugzilla.mozilla.org/show_bug.cgi?id=728866
-   * Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
-   * Note that some letters do not have a dagesh presForm encoded.
-   */
-  static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
-    0xFB30, /* ALEF */
-    0xFB31, /* BET */
-    0xFB32, /* GIMEL */
-    0xFB33, /* DALET */
-    0xFB34, /* HE */
-    0xFB35, /* VAV */
-    0xFB36, /* ZAYIN */
-    0x0000, /* HET */
-    0xFB38, /* TET */
-    0xFB39, /* YOD */
-    0xFB3A, /* FINAL KAF */
-    0xFB3B, /* KAF */
-    0xFB3C, /* LAMED */
-    0x0000, /* FINAL MEM */
-    0xFB3E, /* MEM */
-    0x0000, /* FINAL NUN */
-    0xFB40, /* NUN */
-    0xFB41, /* SAMEKH */
-    0x0000, /* AYIN */
-    0xFB43, /* FINAL PE */
-    0xFB44, /* PE */
-    0x0000, /* FINAL TSADI */
-    0xFB46, /* TSADI */
-    0xFB47, /* QOF */
-    0xFB48, /* RESH */
-    0xFB49, /* SHIN */
-    0xFB4A /* TAV */
-  };
-
-  bool found = c->unicode->compose (a, b, ab);
-
-  if (!found && (b & ~0x7F) == 0x0580) {
-      /* Special-case Hebrew presentation forms that are excluded from
-       * standard normalization, but wanted for old fonts. */
-      switch (b) {
-      case 0x05B4: /* HIRIQ */
-	  if (a == 0x05D9) { /* YOD */
-	      *ab = 0xFB1D;
-	      found = true;
-	  }
-	  break;
-      case 0x05B7: /* patah */
-	  if (a == 0x05F2) { /* YIDDISH YOD YOD */
-	      *ab = 0xFB1F;
-	      found = true;
-	  } else if (a == 0x05D0) { /* ALEF */
-	      *ab = 0xFB2E;
-	      found = true;
-	  }
-	  break;
-      case 0x05B8: /* QAMATS */
-	  if (a == 0x05D0) { /* ALEF */
-	      *ab = 0xFB2F;
-	      found = true;
-	  }
-	  break;
-      case 0x05B9: /* HOLAM */
-	  if (a == 0x05D5) { /* VAV */
-	      *ab = 0xFB4B;
-	      found = true;
-	  }
-	  break;
-      case 0x05BC: /* DAGESH */
-	  if (a >= 0x05D0 && a <= 0x05EA) {
-	      *ab = sDageshForms[a - 0x05D0];
-	      found = (*ab != 0);
-	  } else if (a == 0xFB2A) { /* SHIN WITH SHIN DOT */
-	      *ab = 0xFB2C;
-	      found = true;
-	  } else if (a == 0xFB2B) { /* SHIN WITH SIN DOT */
-	      *ab = 0xFB2D;
-	      found = true;
-	  }
-	  break;
-      case 0x05BF: /* RAFE */
-	  switch (a) {
-	  case 0x05D1: /* BET */
-	      *ab = 0xFB4C;
-	      found = true;
-	      break;
-	  case 0x05DB: /* KAF */
-	      *ab = 0xFB4D;
-	      found = true;
-	      break;
-	  case 0x05E4: /* PE */
-	      *ab = 0xFB4E;
-	      found = true;
-	      break;
-	  }
-	  break;
-      case 0x05C1: /* SHIN DOT */
-	  if (a == 0x05E9) { /* SHIN */
-	      *ab = 0xFB2A;
-	      found = true;
-	  } else if (a == 0xFB49) { /* SHIN WITH DAGESH */
-	      *ab = 0xFB2C;
-	      found = true;
-	  }
-	  break;
-      case 0x05C2: /* SIN DOT */
-	  if (a == 0x05E9) { /* SHIN */
-	      *ab = 0xFB2B;
-	      found = true;
-	  } else if (a == 0xFB49) { /* SHIN WITH DAGESH */
-	      *ab = 0xFB2D;
-	      found = true;
-	  }
-	  break;
-      }
-  }
-
-  return found;
-}
-
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
 {
   "default",
-  collect_features_default,
+  NULL, /* collect_features */
   NULL, /* override_features */
   NULL, /* data_create */
   NULL, /* data_destroy */
   NULL, /* preprocess_text */
   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   NULL, /* decompose */
-  compose_default,
+  NULL, /* compose */
   NULL, /* setup_masks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
   true, /* fallback_position */
diff --git a/src/hb-ot-shape-complex-hebrew.cc b/src/hb-ot-shape-complex-hebrew.cc
new file mode 100644
index 0000000..efef8c1
--- /dev/null
+++ b/src/hb-ot-shape-complex-hebrew.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2010,2012  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+static bool
+compose_hebrew (const hb_ot_shape_normalize_context_t *c,
+		hb_codepoint_t  a,
+		hb_codepoint_t  b,
+		hb_codepoint_t *ab)
+{
+  /* Hebrew presentation-form shaping.
+   * https://bugzilla.mozilla.org/show_bug.cgi?id=728866
+   * Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
+   * Note that some letters do not have a dagesh presForm encoded.
+   */
+  static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
+    0xFB30, /* ALEF */
+    0xFB31, /* BET */
+    0xFB32, /* GIMEL */
+    0xFB33, /* DALET */
+    0xFB34, /* HE */
+    0xFB35, /* VAV */
+    0xFB36, /* ZAYIN */
+    0x0000, /* HET */
+    0xFB38, /* TET */
+    0xFB39, /* YOD */
+    0xFB3A, /* FINAL KAF */
+    0xFB3B, /* KAF */
+    0xFB3C, /* LAMED */
+    0x0000, /* FINAL MEM */
+    0xFB3E, /* MEM */
+    0x0000, /* FINAL NUN */
+    0xFB40, /* NUN */
+    0xFB41, /* SAMEKH */
+    0x0000, /* AYIN */
+    0xFB43, /* FINAL PE */
+    0xFB44, /* PE */
+    0x0000, /* FINAL TSADI */
+    0xFB46, /* TSADI */
+    0xFB47, /* QOF */
+    0xFB48, /* RESH */
+    0xFB49, /* SHIN */
+    0xFB4A /* TAV */
+  };
+
+  bool found = c->unicode->compose (a, b, ab);
+
+  if (!found)
+  {
+      /* Special-case Hebrew presentation forms that are excluded from
+       * standard normalization, but wanted for old fonts. */
+      switch (b) {
+      case 0x05B4: /* HIRIQ */
+	  if (a == 0x05D9) { /* YOD */
+	      *ab = 0xFB1D;
+	      found = true;
+	  }
+	  break;
+      case 0x05B7: /* patah */
+	  if (a == 0x05F2) { /* YIDDISH YOD YOD */
+	      *ab = 0xFB1F;
+	      found = true;
+	  } else if (a == 0x05D0) { /* ALEF */
+	      *ab = 0xFB2E;
+	      found = true;
+	  }
+	  break;
+      case 0x05B8: /* QAMATS */
+	  if (a == 0x05D0) { /* ALEF */
+	      *ab = 0xFB2F;
+	      found = true;
+	  }
+	  break;
+      case 0x05B9: /* HOLAM */
+	  if (a == 0x05D5) { /* VAV */
+	      *ab = 0xFB4B;
+	      found = true;
+	  }
+	  break;
+      case 0x05BC: /* DAGESH */
+	  if (a >= 0x05D0 && a <= 0x05EA) {
+	      *ab = sDageshForms[a - 0x05D0];
+	      found = (*ab != 0);
+	  } else if (a == 0xFB2A) { /* SHIN WITH SHIN DOT */
+	      *ab = 0xFB2C;
+	      found = true;
+	  } else if (a == 0xFB2B) { /* SHIN WITH SIN DOT */
+	      *ab = 0xFB2D;
+	      found = true;
+	  }
+	  break;
+      case 0x05BF: /* RAFE */
+	  switch (a) {
+	  case 0x05D1: /* BET */
+	      *ab = 0xFB4C;
+	      found = true;
+	      break;
+	  case 0x05DB: /* KAF */
+	      *ab = 0xFB4D;
+	      found = true;
+	      break;
+	  case 0x05E4: /* PE */
+	      *ab = 0xFB4E;
+	      found = true;
+	      break;
+	  }
+	  break;
+      case 0x05C1: /* SHIN DOT */
+	  if (a == 0x05E9) { /* SHIN */
+	      *ab = 0xFB2A;
+	      found = true;
+	  } else if (a == 0xFB49) { /* SHIN WITH DAGESH */
+	      *ab = 0xFB2C;
+	      found = true;
+	  }
+	  break;
+      case 0x05C2: /* SIN DOT */
+	  if (a == 0x05E9) { /* SHIN */
+	      *ab = 0xFB2B;
+	      found = true;
+	  } else if (a == 0xFB49) { /* SHIN WITH DAGESH */
+	      *ab = 0xFB2D;
+	      found = true;
+	  }
+	  break;
+      }
+  }
+
+  return found;
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew =
+{
+  "hebrew",
+  NULL, /* collect_features */
+  NULL, /* override_features */
+  NULL, /* data_create */
+  NULL, /* data_destroy */
+  NULL, /* preprocess_text */
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+  NULL, /* decompose */
+  compose_hebrew,
+  NULL, /* setup_masks */
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
+  true, /* fallback_position */
+};
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 44387f5..104726e 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -55,10 +55,12 @@ enum hb_ot_shape_zero_width_marks_type_t {
   HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \
   HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
   HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
+  HB_COMPLEX_SHAPER_IMPLEMENT (hebrew) \
   HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
   HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \
   HB_COMPLEX_SHAPER_IMPLEMENT (sea) \
   HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
+  HB_COMPLEX_SHAPER_IMPLEMENT (tibetan) \
   /* ^--- Add new shapers here */
 
 
@@ -193,6 +195,18 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
       return &_hb_ot_complex_shaper_hangul;
 
 
+    /* Unicode-2.0 additions */
+    case HB_SCRIPT_TIBETAN:
+
+      return &_hb_ot_complex_shaper_tibetan;
+
+
+    /* Unicode-1.1 additions */
+    case HB_SCRIPT_HEBREW:
+
+      return &_hb_ot_complex_shaper_hebrew;
+
+
     /* ^--- Add new shapers here */
 
 
@@ -230,9 +244,6 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
     case HB_SCRIPT_LAO:
     case HB_SCRIPT_THAI:
 
-    /* Unicode-2.0 additions */
-    case HB_SCRIPT_TIBETAN:
-
     /* Unicode-3.2 additions */
     case HB_SCRIPT_TAGALOG:
     case HB_SCRIPT_TAGBANWA:
diff --git a/src/hb-ot-shape-complex-tibetan.cc b/src/hb-ot-shape-complex-tibetan.cc
new file mode 100644
index 0000000..01465a4
--- /dev/null
+++ b/src/hb-ot-shape-complex-tibetan.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2010,2012  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+static const hb_tag_t tibetan_features[] =
+{
+  HB_TAG('a','b','v','s'),
+  HB_TAG('b','l','w','s'),
+  HB_TAG('a','b','v','m'),
+  HB_TAG('b','l','w','m'),
+  HB_TAG_NONE
+};
+
+static void
+collect_features_tibetan (hb_ot_shape_planner_t *plan)
+{
+  for (const hb_tag_t *script_features = tibetan_features; script_features && *script_features; script_features++)
+    plan->map.add_global_bool_feature (*script_features);
+}
+
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_tibetan =
+{
+  "default",
+  collect_features_tibetan,
+  NULL, /* override_features */
+  NULL, /* data_create */
+  NULL, /* data_destroy */
+  NULL, /* preprocess_text */
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
+  NULL, /* decompose */
+  NULL, /* compose */
+  NULL, /* setup_masks */
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
+  true, /* fallback_position */
+};
commit 6300cd72539284ca294ee8286bbbb7f9c72af320
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Dec 31 16:38:47 2013 +0800

    [ot] Define HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT

diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index dfb28d8..aca01bd 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -196,6 +196,6 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
   NULL, /* decompose */
   compose_default,
   NULL, /* setup_masks */
-  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE,
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
   true, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc
index 093f465..94cf5e5 100644
--- a/src/hb-ot-shape-complex-hangul.cc
+++ b/src/hb-ot-shape-complex-hangul.cc
@@ -227,6 +227,6 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
   NULL, /* decompose */
   NULL, /* compose */
   NULL, /* setup_masks */
-  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE,
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
   false, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 320c4e6..44387f5 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -44,7 +44,9 @@ enum hb_ot_shape_zero_width_marks_type_t {
 //  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_EARLY,
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE,
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
-  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
+
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT = HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE
 };
 
 
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index 04ca7ac..8664eca 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -373,6 +373,6 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
   NULL, /* decompose */
   NULL, /* compose */
   NULL, /* setup_masks */
-  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE,
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
   false,/* fallback_position */
 };
commit 3d6ca0d32e5c6597acfcf59301cb1905586ddb52
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Dec 31 16:04:35 2013 +0800

    [ot] Simplify normalization_preference again
    
    No shaper has more than one behavior re this, so no need for a callback.

diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index 4f6c86e..f576720 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -366,7 +366,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
   data_create_arabic,
   data_destroy_arabic,
   NULL, /* preprocess_text_arabic */
-  NULL, /* normalization_preference */
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   NULL, /* decompose */
   NULL, /* compose */
   setup_masks_arabic,
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index 519790c..dfb28d8 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -58,12 +58,6 @@ collect_features_default (hb_ot_shape_planner_t *plan)
     plan->map.add_global_bool_feature (*script_features);
 }
 
-static hb_ot_shape_normalization_mode_t
-normalization_preference_default (const hb_segment_properties_t *props)
-{
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
-}
-
 static bool
 compose_default (const hb_ot_shape_normalize_context_t *c,
 		 hb_codepoint_t  a,
@@ -198,7 +192,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
   NULL, /* data_create */
   NULL, /* data_destroy */
   NULL, /* preprocess_text */
-  normalization_preference_default,
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   NULL, /* decompose */
   compose_default,
   NULL, /* setup_masks */
diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc
index 1b89f20..093f465 100644
--- a/src/hb-ot-shape-complex-hangul.cc
+++ b/src/hb-ot-shape-complex-hangul.cc
@@ -223,7 +223,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
   NULL, /* data_create */
   NULL, /* data_destroy */
   preprocess_text_hangul,
-  NULL, /* normalization_preference */
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   NULL, /* decompose */
   NULL, /* compose */
   NULL, /* setup_masks */
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 9edefe3..1e07d33 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1690,12 +1690,6 @@ clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 }
 
 
-static hb_ot_shape_normalization_mode_t
-normalization_preference_indic (const hb_segment_properties_t *props HB_UNUSED)
-{
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
-}
-
 static bool
 decompose_indic (const hb_ot_shape_normalize_context_t *c,
 		 hb_codepoint_t  ab,
@@ -1806,7 +1800,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
   data_create_indic,
   data_destroy_indic,
   NULL, /* preprocess_text */
-  normalization_preference_indic,
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
   decompose_indic,
   compose_indic,
   setup_masks_indic,
diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc
index 25ba726..50209ff 100644
--- a/src/hb-ot-shape-complex-myanmar.cc
+++ b/src/hb-ot-shape-complex-myanmar.cc
@@ -541,13 +541,6 @@ final_reordering (const hb_ot_shape_plan_t *plan,
 }
 
 
-static hb_ot_shape_normalization_mode_t
-normalization_preference_myanmar (const hb_segment_properties_t *props HB_UNUSED)
-{
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
-}
-
-
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
 {
   "myanmar",
@@ -556,7 +549,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
   NULL, /* data_create */
   NULL, /* data_destroy */
   NULL, /* preprocess_text */
-  normalization_preference_myanmar,
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
   NULL, /* decompose */
   NULL, /* compose */
   setup_masks_myanmar,
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 17b95e2..320c4e6 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -106,12 +106,7 @@ struct hb_ot_complex_shaper_t
 			   hb_font_t                *font);
 
 
-  /* normalization_preference()
-   * Called during shape().
-   * May be NULL.
-   */
-  hb_ot_shape_normalization_mode_t
-  (*normalization_preference) (const hb_segment_properties_t *props);
+  hb_ot_shape_normalization_mode_t normalization_preference;
 
   /* decompose()
    * Called during shape()'s normalization.
diff --git a/src/hb-ot-shape-complex-sea.cc b/src/hb-ot-shape-complex-sea.cc
index da687ed..6288a90 100644
--- a/src/hb-ot-shape-complex-sea.cc
+++ b/src/hb-ot-shape-complex-sea.cc
@@ -360,13 +360,6 @@ final_reordering (const hb_ot_shape_plan_t *plan,
 }
 
 
-static hb_ot_shape_normalization_mode_t
-normalization_preference_sea (const hb_segment_properties_t *props HB_UNUSED)
-{
-  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
-}
-
-
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_sea =
 {
   "sea",
@@ -375,7 +368,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_sea =
   NULL, /* data_create */
   NULL, /* data_destroy */
   NULL, /* preprocess_text */
-  normalization_preference_sea,
+  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
   NULL, /* decompose */
   NULL, /* compose */
   setup_masks_sea,
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index 4594533..04ca7ac 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -369,7 +369,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
   NULL, /* data_create */
   NULL, /* data_destroy */
   preprocess_text_thai,
-  NULL, /* normalization_preference */
+  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   NULL, /* decompose */
   NULL, /* compose */
   NULL, /* setup_masks */
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index 6531e1b..9f3cd76 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -289,9 +289,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
 			hb_buffer_t *buffer,
 			hb_font_t *font)
 {
-  hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference ?
-					  plan->shaper->normalization_preference (&buffer->props) :
-					  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT;
+  hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference;
   const hb_ot_shape_normalize_context_t c = {
     plan,
     buffer,
commit c98b7183f7dc453d5bac1f2503017cded317a495
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Dec 31 15:55:40 2013 +0800

    [ot] Add Hangul shaper
    
    Not exhaustively tested, but I think I got the intended logic
    right.
    
    The logic can perhaps be simplified.  Maybe we should disabled
    normalization with this shaper.  Then again, for now focusing on
    correctness.

diff --git a/src/Makefile.am b/src/Makefile.am
index 67a328c..62544db 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -92,6 +92,7 @@ HBSOURCES += \
 	hb-ot-shape-complex-arabic-fallback.hh \
 	hb-ot-shape-complex-arabic-table.hh \
 	hb-ot-shape-complex-default.cc \
+	hb-ot-shape-complex-hangul.cc \
 	hb-ot-shape-complex-indic.cc \
 	hb-ot-shape-complex-indic-machine.hh \
 	hb-ot-shape-complex-indic-private.hh \
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index d6afa0e..519790c 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -32,14 +32,6 @@
 
 /* The default shaper *only* adds additional per-script features.*/
 
-static const hb_tag_t hangul_features[] =
-{
-  HB_TAG('l','j','m','o'),
-  HB_TAG('v','j','m','o'),
-  HB_TAG('t','j','m','o'),
-  HB_TAG_NONE
-};
-
 static const hb_tag_t tibetan_features[] =
 {
   HB_TAG('a','b','v','s'),
@@ -56,11 +48,6 @@ collect_features_default (hb_ot_shape_planner_t *plan)
 
   switch ((hb_tag_t) plan->props.script)
   {
-    /* Unicode-1.1 additions */
-    case HB_SCRIPT_HANGUL:
-      script_features = hangul_features;
-      break;
-
     /* Unicode-2.0 additions */
     case HB_SCRIPT_TIBETAN:
       script_features = tibetan_features;
diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc
new file mode 100644
index 0000000..1b89f20
--- /dev/null
+++ b/src/hb-ot-shape-complex-hangul.cc
@@ -0,0 +1,232 @@
+/*
+ * Copyright © 2013  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+/* Hangul shaper */
+
+
+static const hb_tag_t hangul_features[] =
+{
+  HB_TAG('l','j','m','o'),
+  HB_TAG('v','j','m','o'),
+  HB_TAG('t','j','m','o'),
+  HB_TAG_NONE
+};
+
+static void
+collect_features_hangul (hb_ot_shape_planner_t *plan)
+{
+  for (const hb_tag_t *script_features = hangul_features; script_features && *script_features; script_features++)
+    plan->map.add_global_bool_feature (*script_features);
+}
+
+#define LBase 0x1100
+#define VBase 0x1161
+#define TBase 0x11A7
+#define LCount 19
+#define VCount 21
+#define TCount 28
+#define SBase 0xAC00
+#define NCount (VCount * TCount)
+#define SCount (LCount * NCount)
+
+#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1))
+#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1))
+#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1))
+#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1))
+
+#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u),  0x11A8, 0x11FF, 0xD7C8, 0xD7FF))
+
+static void
+preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
+			hb_buffer_t              *buffer,
+			hb_font_t                *font)
+{
+  /* Hangul syllables come in two shapes: LV, and LVT.  Of those:
+   *
+   *   - LV can be precomposed, or decomposed.  Lets call those
+   *     <LV> and <L,V>,
+   *   - LVT can be fully precomposed, partically precomposed, or
+   *     fully decomposed.  Ie. <LVT>, <LV,T>, or <L,V,T>.
+   *
+   * The composition / decomposition is mechanical.  However, not
+   * all <L,V> sequences compose, and not all <LV,T> sequences
+   * compose.
+   *
+   * Here are the specifics:
+   *
+   *   - <L>: U+1100..115F, U+A960..A97F
+   *   - <V>: U+1160..11A7, U+D7B0..D7C7
+   *   - <T>: U+11A8..11FF, U+D7C8..D7FF
+   *
+   *   - Only the <L,V> sequences for the 11xx ranges combine.
+   *   - Only <LV,T> sequences for T in U+11A8..11C3 combine.
+   *
+   * Here is what we want to accomplish in this shaper:
+   *
+   *   - If the whole syllable can be precomposed, do that,
+   *   - Otherwise, fully decompose.
+   *
+   * That is, of the different possible syllables:
+   *
+   *   <L>
+   *   <L,V>
+   *   <L,V,T>
+   *   <LV>
+   *   <LVT>
+   *   <LV, T>
+   *
+   * - <L> needs no work.
+   *
+   * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we
+   *   should fully decompose them if font supports.
+   *
+   * - <L,V> and <L,V,T> we should compose if the whole thing can be composed.
+   *
+   * - <LV,T> we should compose if the whole thing can be composed, otherwise we should
+   *   decompose.
+   */
+
+  buffer->clear_output ();
+  unsigned int count = buffer->len;
+  for (buffer->idx = 0; buffer->idx < count;)
+  {
+    hb_codepoint_t u = buffer->cur().codepoint;
+
+    if (isCombiningL(u) && buffer->idx + 1 < count)
+    {
+      hb_codepoint_t l = u;
+      hb_codepoint_t v = buffer->cur(+1).codepoint;
+      if (isCombiningV(v))
+      {
+        /* Have <L,V> or <L,V,T>. */
+        unsigned int len = 2;
+	unsigned int tindex = 0;
+	if (buffer->idx + 2 < count)
+	{
+	  hb_codepoint_t t = buffer->cur(+2).codepoint;
+	  if (isCombiningT(t))
+	  {
+	    len = 3;
+	    tindex = t - TBase;
+	  }
+	  else if (isT (t))
+	  {
+	    /* Old T jamo.  Doesn't combine.  Don't combine *anything*. */
+	   len = 0;
+	  }
+	}
+
+	if (len)
+	{
+	  hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex;
+	  hb_codepoint_t glyph;
+	  if (font->get_glyph (s, 0, &glyph))
+	  {
+	    buffer->replace_glyphs (len, 1, &s);
+	    if (unlikely (buffer->in_error))
+	      return;
+	    continue;
+	  }
+	}
+      }
+    }
+
+    else if (isCombinedS(u))
+    {
+       /* Have <LV>, <LVT>, or <LV,T> */
+      hb_codepoint_t s = u;
+      hb_codepoint_t glyph;
+      bool has_glyph = font->get_glyph (s, 0, &glyph);
+      unsigned int lindex = (s - SBase) / NCount;
+      unsigned int nindex = (s - SBase) % NCount;
+      unsigned int vindex = nindex / VCount;
+      unsigned int tindex = nindex % VCount;
+
+      if (tindex && has_glyph)
+	goto next; /* <LVT> supported.  Nothing to do. */
+
+      if (!tindex &&
+	  buffer->idx + 1 < count &&
+	  isCombiningT (buffer->cur(+1).codepoint))
+      {
+	/* <LV,T>, try to combine. */
+	tindex = buffer->cur(+1).codepoint - TBase;
+	hb_codepoint_t new_s = s + tindex;
+        if (font->get_glyph (new_s, 0, &glyph))
+	{
+	  buffer->replace_glyphs (2, 1, &new_s);
+	  if (unlikely (buffer->in_error))
+	    return;
+	  continue;
+	}
+      }
+
+      /* Otherwise, decompose if font doesn't support <LV>,
+       * or if having non-combining <LV,T>.  Note that we
+       * already handled combining <LV,T> above. */
+      if (!has_glyph ||
+	  (buffer->idx + 1 < count &&
+	   isT (buffer->cur(+1).codepoint)))
+      {
+	hb_codepoint_t decomposed[3] = {LBase + lindex,
+					VBase + vindex,
+					TBase + tindex};
+        if (font->get_glyph (decomposed[0], 0, &glyph) &&
+	    font->get_glyph (decomposed[1], 0, &glyph) &&
+	    (tindex && font->get_glyph (decomposed[2], 0, &glyph)))
+	{
+	  buffer->replace_glyphs (1, tindex ? 3 : 2, decomposed);
+	  if (unlikely (buffer->in_error))
+	    return;
+	  continue;
+	}
+      }
+    }
+
+    next:
+    buffer->next_glyph ();
+  }
+  buffer->swap_buffers ();
+}
+
+const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
+{
+  "hangul",
+  collect_features_hangul,
+  NULL, /* override_features */
+  NULL, /* data_create */
+  NULL, /* data_destroy */
+  preprocess_text_hangul,
+  NULL, /* normalization_preference */
+  NULL, /* decompose */
+  NULL, /* compose */
+  NULL, /* setup_masks */
+  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE,
+  false, /* fallback_position */
+};
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index ac0072b..17b95e2 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -52,6 +52,7 @@ enum hb_ot_shape_zero_width_marks_type_t {
 #define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \
   HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \
   HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
+  HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
   HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
   HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \
   HB_COMPLEX_SHAPER_IMPLEMENT (sea) \
@@ -189,19 +190,10 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
       return &_hb_ot_complex_shaper_thai;
 
 
-#if 0
-    /* Note:
-     * Currently we don't have a separate Hangul shaper.  The default shaper handles
-     * Hangul by enabling jamo features.  We may want to implement a separate shaper
-     * in the future.  See this thread for details of what such a shaper would do:
-     *
-     *   http://lists.freedesktop.org/archives/harfbuzz/2013-April/003070.html
-     */
     /* Unicode-1.1 additions */
     case HB_SCRIPT_HANGUL:
 
       return &_hb_ot_complex_shaper_hangul;
-#endif
 
 
     /* ^--- Add new shapers here */
diff --git a/src/hb-private.hh b/src/hb-private.hh
index 4b72260..680b21e 100644
--- a/src/hb-private.hh
+++ b/src/hb-private.hh
@@ -808,6 +808,12 @@ hb_in_range (T u, T lo, T hi)
 }
 
 template <typename T> static inline bool
+hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2)
+{
+  return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2);
+}
+
+template <typename T> static inline bool
 hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2, T lo3, T hi3)
 {
   return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2) || hb_in_range (u, lo3, hi3);


More information about the HarfBuzz mailing list