[HarfBuzz] harfbuzz-ng: Branch 'master' - 5 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Wed Jul 20 20:20:05 PDT 2011


 src/hb-glib.cc      |  149 +++++++++++++++++++++++++++++++++++++++++++---------
 src/hb-icu.cc       |  145 ++++++++++++++++++++++++++++++++++++++++++--------
 src/hb-unicode.cc   |    6 +-
 test/test-unicode.c |   67 +++++++++++++++++++++++
 4 files changed, 317 insertions(+), 50 deletions(-)

New commits:
commit 498e1a9be673bb02c00aac3f12bb4c6993a85910
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 20 23:19:49 2011 -0400

    [icu] Implement compose()/decompose()

diff --git a/src/hb-icu.cc b/src/hb-icu.cc
index 4aa56f6..384b77d 100644
--- a/src/hb-icu.cc
+++ b/src/hb-icu.cc
@@ -35,6 +35,8 @@
 
 #include <unicode/uversion.h>
 #include <unicode/uchar.h>
+#include <unicode/unorm.h>
+#include <unicode/unistr.h>
 
 HB_BEGIN_DECLS
 
@@ -170,7 +172,34 @@ hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
 			hb_codepoint_t     *ab,
 			void               *user_data HB_UNUSED)
 {
-  return FALSE;
+  if (!a || !b)
+    return FALSE;
+
+  UChar utf16[4], normalized[5];
+  gint len;
+  hb_bool_t ret, err;
+  UErrorCode icu_err;
+
+  len = 0;
+  err = FALSE;
+  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err);
+  if (err) return FALSE;
+  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err);
+  if (err) return FALSE;
+
+  icu_err = U_ZERO_ERROR;
+  len = unorm_normalize (utf16, len, UNORM_NFC, 0, normalized, ARRAY_LENGTH (normalized), &icu_err);
+  if (icu_err)
+    return FALSE;
+  normalized[len] = 0;
+  if (u_strlen (normalized) == 1) {
+    U16_GET_UNSAFE (normalized, 0, *ab);
+    ret = TRUE;
+  } else {
+    ret = FALSE;
+  }
+
+  return ret;
 }
 
 static hb_bool_t
@@ -180,7 +209,61 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
 			  hb_codepoint_t     *b,
 			  void               *user_data HB_UNUSED)
 {
-  return FALSE;
+  UChar utf16[2], normalized[20];
+  gint len;
+  hb_bool_t ret, err;
+  UErrorCode icu_err;
+
+  len = 0;
+  err = FALSE;
+  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
+  if (err) return FALSE;
+
+  icu_err = U_ZERO_ERROR;
+  len = unorm_normalize (utf16, len, UNORM_NFD, 0, normalized, ARRAY_LENGTH (normalized), &icu_err);
+  if (icu_err)
+    return FALSE;
+
+  normalized[len] = 0;
+  len = u_strlen (normalized);
+
+  if (len == 1) {
+    U16_GET_UNSAFE (normalized, 0, *a);
+    *b = 0;
+    ret = *a != ab;
+  } else if (len == 2) {
+    /* Here's the ugly part: if ab decomposes to a single character and
+     * that character decomposes again, we have to detect that and undo
+     * the second part :-(. */
+    UChar recomposed[20];
+    icu_err = U_ZERO_ERROR;
+    len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
+    if (icu_err)
+      return FALSE;
+    U16_GET_UNSAFE (recomposed, 0, *a);
+    if (*a != ab) {
+      *b = 0;
+    } else {
+      len =0;
+      U16_NEXT_UNSAFE (normalized, len, *a);
+      U16_GET_UNSAFE (normalized, len, *b);
+    }
+    ret = TRUE;
+  } else {
+    /* If decomposed to more than two characters, take the last one,
+     * and recompose the rest to get the first component. */
+    U16_PREV_UNSAFE (normalized, len, *b);
+    UChar recomposed[20];
+    icu_err = U_ZERO_ERROR;
+    len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
+    if (icu_err)
+      return FALSE;
+    /* We expect that recomposed has exactly one character now. */
+    U16_GET_UNSAFE (recomposed, 0, *a);
+    ret = TRUE;
+  }
+
+  return ret;
 }
 
 extern HB_INTERNAL hb_unicode_funcs_t _hb_unicode_funcs_icu;
diff --git a/test/test-unicode.c b/test/test-unicode.c
index 09dde54..dd57984 100644
--- a/test/test-unicode.c
+++ b/test/test-unicode.c
@@ -864,6 +864,7 @@ main (int argc, char **argv)
 #endif
 #ifdef HAVE_ICU
   hb_test_add_data_flavor (hb_icu_get_unicode_funcs (),              "icu",     test_unicode_properties);
+  hb_test_add_data_flavor (hb_icu_get_unicode_funcs (),              "icu",     test_unicode_normalization);
   hb_test_add_data_flavor ((gconstpointer) script_roundtrip_icu,     "icu",     test_unicode_script_roundtrip);
 #endif
 
commit ffd4a436f7baccb68a0c3602f94ea0246e32844f
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 20 22:30:29 2011 -0400

    Add tests for compose()/decompose()
    
    Adjust glib fallback implementation.
    
    The tests are not hooked up for ICU yet.

diff --git a/src/hb-glib.cc b/src/hb-glib.cc
index 6174498..fbf8cf5 100644
--- a/src/hb-glib.cc
+++ b/src/hb-glib.cc
@@ -244,6 +244,9 @@ hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   /* We don't ifdef-out the fallback code such that compiler always
    * sees it and makes sure it's compilable. */
 
+  if (!a || !b)
+    return FALSE;
+
   gchar utf8[12];
   gchar *normalized;
   gint len;
@@ -293,8 +296,18 @@ hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
     *b = 0;
     ret = *a != ab;
   } else if (len == 2) {
-    *a = g_utf8_get_char (normalized);
-    *b = g_utf8_get_char (g_utf8_next_char (normalized));
+    /* Here's the ugly part: if ab decomposes to a single character and
+     * that character decomposes again, we have to detect that and undo
+     * the second part :-(. */
+    gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
+    if (g_utf8_get_char (recomposed) != ab) {
+      *a = g_utf8_get_char (recomposed);
+      *b = 0;
+    } else {
+      *a = g_utf8_get_char (normalized);
+      *b = g_utf8_get_char (g_utf8_next_char (normalized));
+    }
+    g_free (recomposed);
     ret = TRUE;
   } else {
     /* If decomposed to more than two characters, take the last one,
diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc
index 63e83b7..aba2cd3 100644
--- a/src/hb-unicode.cc
+++ b/src/hb-unicode.cc
@@ -267,7 +267,7 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
 		      hb_codepoint_t     *a,
 		      hb_codepoint_t     *b)
 {
-  *a = *b = 0;
+  *a = ab; *b = 0;
   return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
 }
 
diff --git a/test/test-unicode.c b/test/test-unicode.c
index a691cb4..09dde54 100644
--- a/test/test-unicode.c
+++ b/test/test-unicode.c
@@ -780,6 +780,70 @@ test_unicode_script_roundtrip (gconstpointer user_data)
 
 
 /* TODO test compose() and decompose() */
+static void
+test_unicode_normalization (gconstpointer user_data)
+{
+  hb_unicode_funcs_t *uf = (hb_unicode_funcs_t *) user_data;
+  gunichar a, b, ab;
+
+
+  /* Test compose() */
+
+  /* Not composable */
+  g_assert (!hb_unicode_compose (uf, 0x0041, 0x0042, &ab) && ab == 0);
+  g_assert (!hb_unicode_compose (uf, 0x0041, 0, &ab) && ab == 0);
+  g_assert (!hb_unicode_compose (uf, 0x0066, 0x0069, &ab) && ab == 0);
+
+  /* Singletons should not compose */
+  g_assert (!hb_unicode_compose (uf, 0x212B, 0, &ab) && ab == 0);
+  g_assert (!hb_unicode_compose (uf, 0x00C5, 0, &ab) && ab == 0);
+  g_assert (!hb_unicode_compose (uf, 0x2126, 0, &ab) && ab == 0);
+  g_assert (!hb_unicode_compose (uf, 0x03A9, 0, &ab) && ab == 0);
+
+  /* Pairs */
+  g_assert (hb_unicode_compose (uf, 0x0041, 0x030A, &ab) && ab == 0x00C5);
+  g_assert (hb_unicode_compose (uf, 0x006F, 0x0302, &ab) && ab == 0x00F4);
+  g_assert (hb_unicode_compose (uf, 0x1E63, 0x0307, &ab) && ab == 0x1E69);
+  g_assert (hb_unicode_compose (uf, 0x0073, 0x0323, &ab) && ab == 0x1E63);
+  g_assert (hb_unicode_compose (uf, 0x0064, 0x0307, &ab) && ab == 0x1E0B);
+  g_assert (hb_unicode_compose (uf, 0x0064, 0x0323, &ab) && ab == 0x1E0D);
+
+  /* Hangul */
+  g_assert (hb_unicode_compose (uf, 0xD4CC, 0x11B6, &ab) && ab == 0xD4DB);
+  g_assert (hb_unicode_compose (uf, 0x1111, 0x1171, &ab) && ab == 0xD4CC);
+  g_assert (hb_unicode_compose (uf, 0xCE20, 0x11B8, &ab) && ab == 0xCE31);
+  g_assert (hb_unicode_compose (uf, 0x110E, 0x1173, &ab) && ab == 0xCE20);
+
+
+  /* Test decompose() */
+
+  /* Not decomposable */
+  g_assert (!hb_unicode_decompose (uf, 0x0041, &a, &b) && a == 0x0041 && b == 0);
+  g_assert (!hb_unicode_decompose (uf, 0xFB01, &a, &b) && a == 0xFB01 && b == 0);
+
+  /* Singletons */
+  g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b));
+  g_assert_cmphex (a, ==, 0x00C5);
+  g_assert_cmphex (b, ==, 0);
+  g_assert (hb_unicode_decompose (uf, 0x212B, &a, &b) && a == 0x00C5 && b == 0);
+  g_assert (hb_unicode_decompose (uf, 0x2126, &a, &b) && a == 0x03A9 && b == 0);
+
+  /* Pairs */
+  g_assert (hb_unicode_decompose (uf, 0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
+  g_assert (hb_unicode_decompose (uf, 0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
+  g_assert (hb_unicode_decompose (uf, 0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307);
+  g_assert (hb_unicode_decompose (uf, 0x1E63, &a, &b) && a == 0x0073 && b == 0x0323);
+  g_assert (hb_unicode_decompose (uf, 0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307);
+  g_assert (hb_unicode_decompose (uf, 0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323);
+
+  /* Hangul */
+  g_assert (hb_unicode_decompose (uf, 0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6);
+  g_assert (hb_unicode_decompose (uf, 0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171);
+  g_assert (hb_unicode_decompose (uf, 0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8);
+  g_assert (hb_unicode_decompose (uf, 0xCE20, &a, &b) && a == 0x110E && b == 0x1173);
+
+}
+
 
 
 int
@@ -791,9 +855,11 @@ main (int argc, char **argv)
   hb_test_add (test_unicode_properties_empty);
 
   hb_test_add_data_flavor (hb_unicode_funcs_get_default (),          "default", test_unicode_properties);
+  hb_test_add_data_flavor (hb_unicode_funcs_get_default (),          "default", test_unicode_normalization);
   hb_test_add_data_flavor ((gconstpointer) script_roundtrip_default, "default", test_unicode_script_roundtrip);
 #ifdef HAVE_GLIB
   hb_test_add_data_flavor (hb_glib_get_unicode_funcs (),             "glib",    test_unicode_properties);
+  hb_test_add_data_flavor (hb_glib_get_unicode_funcs (),             "glib",    test_unicode_normalization);
   hb_test_add_data_flavor ((gconstpointer) script_roundtrip_glib,    "glib",    test_unicode_script_roundtrip);
 #endif
 #ifdef HAVE_ICU
commit fca0923b04aeff9369849da97d247a647611f346
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 20 22:16:13 2011 -0400

    Minor

diff --git a/src/hb-icu.cc b/src/hb-icu.cc
index 4797cc5..4aa56f6 100644
--- a/src/hb-icu.cc
+++ b/src/hb-icu.cc
@@ -63,18 +63,18 @@ hb_icu_script_from_script (hb_script_t script)
 
 
 static unsigned int
-hb_icu_get_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-			    hb_codepoint_t      unicode,
-			    void               *user_data HB_UNUSED)
+hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+				hb_codepoint_t      unicode,
+				void               *user_data HB_UNUSED)
 
 {
   return u_getCombiningClass (unicode);
 }
 
 static unsigned int
-hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-			    hb_codepoint_t      unicode,
-			    void               *user_data HB_UNUSED)
+hb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+				hb_codepoint_t      unicode,
+				void               *user_data HB_UNUSED)
 {
   switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
   {
@@ -91,9 +91,9 @@ hb_icu_get_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
 }
 
 static hb_unicode_general_category_t
-hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-			     hb_codepoint_t      unicode,
-			     void               *user_data HB_UNUSED)
+hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+				 hb_codepoint_t      unicode,
+				 void               *user_data HB_UNUSED)
 {
   switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
   {
@@ -142,17 +142,17 @@ hb_icu_get_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
 }
 
 static hb_codepoint_t
-hb_icu_get_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-		      hb_codepoint_t      unicode,
-		      void               *user_data HB_UNUSED)
+hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			  hb_codepoint_t      unicode,
+			  void               *user_data HB_UNUSED)
 {
   return u_charMirror(unicode);
 }
 
 static hb_script_t
-hb_icu_get_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-		   hb_codepoint_t      unicode,
-		   void               *user_data HB_UNUSED)
+hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+		       hb_codepoint_t      unicode,
+		       void               *user_data HB_UNUSED)
 {
   UErrorCode status = U_ZERO_ERROR;
   UScriptCode scriptCode = uscript_getScript(unicode, &status);
@@ -163,6 +163,26 @@ hb_icu_get_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   return hb_icu_script_to_script (scriptCode);
 }
 
+static hb_bool_t
+hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			hb_codepoint_t      a,
+			hb_codepoint_t      b,
+			hb_codepoint_t     *ab,
+			void               *user_data HB_UNUSED)
+{
+  return FALSE;
+}
+
+static hb_bool_t
+hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			  hb_codepoint_t      ab,
+			  hb_codepoint_t     *a,
+			  hb_codepoint_t     *b,
+			  void               *user_data HB_UNUSED)
+{
+  return FALSE;
+}
+
 extern HB_INTERNAL hb_unicode_funcs_t _hb_unicode_funcs_icu;
 hb_unicode_funcs_t _hb_icu_unicode_funcs = {
   HB_OBJECT_HEADER_STATIC,
@@ -170,15 +190,9 @@ hb_unicode_funcs_t _hb_icu_unicode_funcs = {
   NULL, /* parent */
   TRUE, /* immutable */
   {
-    hb_icu_get_combining_class,
-    hb_icu_get_eastasian_width,
-    hb_icu_get_general_category,
-    hb_icu_get_mirroring,
-    hb_icu_get_script,
-    /* TODO
-    hb_icu_compose,
-    hb_icu_decompose,
-    */
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name,
+    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
   }
 };
 
commit 26b6024962b254b624d4f22088b6c87745074743
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 20 21:58:14 2011 -0400

    [glib] Use g_unicode_script_to/from_iso15924() if available

diff --git a/src/hb-glib.cc b/src/hb-glib.cc
index 7a177e4..6174498 100644
--- a/src/hb-glib.cc
+++ b/src/hb-glib.cc
@@ -36,6 +36,7 @@
 
 HB_BEGIN_DECLS
 
+#if !GLIB_CHECK_VERSION(2,29,14)
 static const hb_script_t
 glib_script_to_script[] =
 {
@@ -148,10 +149,14 @@ glib_script_to_script[] =
   HB_SCRIPT_BRAHMI,
   HB_SCRIPT_MANDAIC
 };
+#endif
 
 hb_script_t
 hb_glib_script_to_script (GUnicodeScript script)
 {
+#if GLIB_CHECK_VERSION(2,29,14)
+  return (hb_script_t) g_unicode_script_to_iso15924 (script);
+#else
   if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script)))
     return glib_script_to_script[script];
 
@@ -159,11 +164,15 @@ hb_glib_script_to_script (GUnicodeScript script)
     return HB_SCRIPT_INVALID;
 
   return HB_SCRIPT_UNKNOWN;
+#endif
 }
 
 GUnicodeScript
 hb_glib_script_from_script (hb_script_t script)
 {
+#if GLIB_CHECK_VERSION(2,29,14)
+  return g_unicode_script_from_iso15924 (script);
+#else
   unsigned int count = ARRAY_LENGTH (glib_script_to_script);
   for (unsigned int i = 0; i < count; i++)
     if (glib_script_to_script[i] == script)
@@ -173,6 +182,7 @@ hb_glib_script_from_script (hb_script_t script)
     return G_UNICODE_SCRIPT_INVALID_CODE;
 
   return G_UNICODE_SCRIPT_UNKNOWN;
+#endif
 }
 
 
commit 22fdc66712464bdb02e45eed49e4be57e79b442f
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Jul 20 21:51:37 2011 -0400

    [glib] Add compose() and decompose() implementations with fallback

diff --git a/src/hb-glib.cc b/src/hb-glib.cc
index de90972..7a177e4 100644
--- a/src/hb-glib.cc
+++ b/src/hb-glib.cc
@@ -177,26 +177,26 @@ hb_glib_script_from_script (hb_script_t script)
 
 
 static unsigned int
-hb_glib_get_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-                             hb_codepoint_t      unicode,
-                             void               *user_data HB_UNUSED)
+hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+				 hb_codepoint_t      unicode,
+				 void               *user_data HB_UNUSED)
 
 {
   return g_unichar_combining_class (unicode);
 }
 
 static unsigned int
-hb_glib_get_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-                             hb_codepoint_t      unicode,
-                             void               *user_data HB_UNUSED)
+hb_glib_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+				 hb_codepoint_t      unicode,
+				 void               *user_data HB_UNUSED)
 {
   return g_unichar_iswide (unicode) ? 2 : 1;
 }
 
 static hb_unicode_general_category_t
-hb_glib_get_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-                              hb_codepoint_t      unicode,
-                              void               *user_data HB_UNUSED)
+hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+				  hb_codepoint_t      unicode,
+				  void               *user_data HB_UNUSED)
 
 {
   /* hb_unicode_general_category_t and GUnicodeType are identical */
@@ -204,22 +204,106 @@ hb_glib_get_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
 }
 
 static hb_codepoint_t
-hb_glib_get_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-                       hb_codepoint_t      unicode,
-                       void               *user_data HB_UNUSED)
+hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			   hb_codepoint_t      unicode,
+			   void               *user_data HB_UNUSED)
 {
   g_unichar_get_mirror_char (unicode, &unicode);
   return unicode;
 }
 
 static hb_script_t
-hb_glib_get_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
-                    hb_codepoint_t      unicode,
-                    void               *user_data HB_UNUSED)
+hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			hb_codepoint_t      unicode,
+			void               *user_data HB_UNUSED)
 {
   return hb_glib_script_to_script (g_unichar_get_script (unicode));
 }
 
+static hb_bool_t
+hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			 hb_codepoint_t      a,
+			 hb_codepoint_t      b,
+			 hb_codepoint_t     *ab,
+			 void               *user_data HB_UNUSED)
+{
+#if GLIB_CHECK_VERSION(2,29,12)
+  return g_unichar_compose (a, b, ab);
+#endif
+
+  /* We don't ifdef-out the fallback code such that compiler always
+   * sees it and makes sure it's compilable. */
+
+  gchar utf8[12];
+  gchar *normalized;
+  gint len;
+  hb_bool_t ret;
+
+  len = g_unichar_to_utf8 (a, utf8);
+  len += g_unichar_to_utf8 (b, utf8 + len);
+  normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC);
+
+  len = g_utf8_strlen (normalized, -1);
+  if (len == 1) {
+    *ab = g_utf8_get_char (normalized);
+    ret = TRUE;
+  } else {
+    ret = FALSE;
+  }
+
+  g_free (normalized);
+  return ret;
+}
+
+static hb_bool_t
+hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
+			   hb_codepoint_t      ab,
+			   hb_codepoint_t     *a,
+			   hb_codepoint_t     *b,
+			   void               *user_data HB_UNUSED)
+{
+#if GLIB_CHECK_VERSION(2,29,12)
+  return g_unichar_decompose (ab, a, b);
+#endif
+
+  /* We don't ifdef-out the fallback code such that compiler always
+   * sees it and makes sure it's compilable. */
+
+  gchar utf8[6];
+  gchar *normalized;
+  gint len;
+  hb_bool_t ret;
+
+  len = g_unichar_to_utf8 (ab, utf8);
+  normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD);
+
+  len = g_utf8_strlen (normalized, -1);
+  if (len == 1) {
+    *a = g_utf8_get_char (normalized);
+    *b = 0;
+    ret = *a != ab;
+  } else if (len == 2) {
+    *a = g_utf8_get_char (normalized);
+    *b = g_utf8_get_char (g_utf8_next_char (normalized));
+    ret = TRUE;
+  } else {
+    /* If decomposed to more than two characters, take the last one,
+     * and recompose the rest to get the first component. */
+    gchar *end = g_utf8_offset_to_pointer (normalized, len - 1);
+    gchar *recomposed;
+    *b = g_utf8_get_char (end);
+    recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC);
+    /* We expect that recomposed has exactly one character now. */
+    *a = g_utf8_get_char (recomposed);
+    g_free (recomposed);
+    ret = TRUE;
+  }
+
+  g_free (normalized);
+  return ret;
+}
+
+
 extern HB_INTERNAL hb_unicode_funcs_t _hb_unicode_funcs_glib;
 hb_unicode_funcs_t _hb_glib_unicode_funcs = {
   HB_OBJECT_HEADER_STATIC,
@@ -227,15 +311,9 @@ hb_unicode_funcs_t _hb_glib_unicode_funcs = {
   NULL, /* parent */
   TRUE, /* immutable */
   {
-    hb_glib_get_combining_class,
-    hb_glib_get_eastasian_width,
-    hb_glib_get_general_category,
-    hb_glib_get_mirroring,
-    hb_glib_get_script,
-    /* TODO
-    hb_glib_compose,
-    hb_glib_decompose,
-    */
+#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name,
+    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
+#undef HB_UNICODE_FUNC_IMPLEMENT
   }
 };
 
diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc
index e2043c2..63e83b7 100644
--- a/src/hb-unicode.cc
+++ b/src/hb-unicode.cc
@@ -80,7 +80,7 @@ hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
 }
 
 static hb_bool_t
-hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs,
+hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
 			hb_codepoint_t      a         HB_UNUSED,
 			hb_codepoint_t      b         HB_UNUSED,
 			hb_codepoint_t     *ab        HB_UNUSED,
@@ -91,7 +91,7 @@ hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs,
 }
 
 static hb_bool_t
-hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs,
+hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
 			  hb_codepoint_t      ab        HB_UNUSED,
 			  hb_codepoint_t     *a         HB_UNUSED,
 			  hb_codepoint_t     *b         HB_UNUSED,
@@ -257,6 +257,7 @@ hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
 		    hb_codepoint_t      b,
 		    hb_codepoint_t     *ab)
 {
+  *ab = 0;
   return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
 }
 
@@ -266,6 +267,7 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
 		      hb_codepoint_t     *a,
 		      hb_codepoint_t     *b)
 {
+  *a = *b = 0;
   return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
 }
 



More information about the HarfBuzz mailing list