[HarfBuzz] harfbuzz: Branch 'master' - 9 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Wed Nov 4 15:52:00 PST 2015


 src/hb-buffer-private.hh                                                |    4 
 src/hb-ot-layout-gsubgpos-private.hh                                    |    1 
 src/hb-ot-layout-private.hh                                             |   66 +++++++++-
 src/hb-ot-shape-normalize.cc                                            |   12 +
 src/hb-unicode-private.hh                                               |   45 ++++++
 test/shaping/Makefile.am                                                |    1 
 test/shaping/fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf |binary
 test/shaping/fonts/sha1sum/MANIFEST                                     |    1 
 test/shaping/record-test.sh                                             |   15 +-
 test/shaping/tests/MANIFEST                                             |    1 
 test/shaping/tests/spaces.tests                                         |   17 ++
 11 files changed, 152 insertions(+), 11 deletions(-)

New commits:
commit 7793aad946e09b53523b30d57de85abd1d15f8b6
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 14:48:46 2015 -0800

    Normalize various spaces to space if font doesn't support
    
    This resurrects the space fallback feature, after I disabled
    the compatibility decomposition.  Now I can release HarfBuzz
    again without breaking Pango!
    
    It also remembers which space character it was, such that later
    on we can approximate the width of this particular space
    character.  That part is not implemented yet.
    
    We normalize all GC=Zs chars except for U+1680 OGHA SPACE MARK,
    which is better left alone.

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 9020c89..1759520 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -237,23 +237,6 @@ enum {
   UPROPS_MASK_GEN_CAT   = 0x1Fu
 };
 
-enum space_t {
-  SPACE = 0,
-  SPACE_NBSP,
-  SPACE_EN,
-  SPACE_EM,
-  SPACE_EM_3,
-  SPACE_EM_4,
-  SPACE_EM_6,
-  SPACE_FIGURE,
-  SPACE_PUNCTUATION,
-  SPACE_THIN,
-  SPACE_HAIR,
-  SPACE_NARROW,
-  SPACE_MEDIUM,
-  SPACE_IDEOGRAPHIC,
-};
-
 static inline void
 _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
 {
@@ -331,36 +314,18 @@ _hb_glyph_info_is_unicode_space (const hb_glyph_info_t *info)
 	 HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
 }
 static inline void
-_hb_glyph_info_set_unicode_space_for_char (hb_glyph_info_t *info, hb_codepoint_t u)
+_hb_glyph_info_set_unicode_space_fallback_type (hb_glyph_info_t *info, hb_unicode_funcs_t::space_t s)
 {
   if (unlikely (!_hb_glyph_info_is_unicode_space (info)))
     return;
-
-  space_t s;
-  switch (u)
-  {
-    default:      s = SPACE;		break; /* Shouldn't happen. */
-    case 0x00A0u: s = SPACE_NBSP;	break;
-    case 0x2002u: s = SPACE_EN;		break;
-    case 0x2003u: s = SPACE_EM;		break;
-    case 0x2004u: s = SPACE_EM_3;	break;
-    case 0x2005u: s = SPACE_EM_4;	break;
-    case 0x2006u: s = SPACE_EM_6;	break;
-    case 0x2007u: s = SPACE_FIGURE;	break;
-    case 0x2008u: s = SPACE_PUNCTUATION;break;
-    case 0x2009u: s = SPACE_THIN;	break;
-    case 0x200Au: s = SPACE_HAIR;	break;
-    case 0x202Fu: s = SPACE_NARROW;	break;
-    case 0x205Fu: s = SPACE_MEDIUM;	break;
-    case 0x3000u: s = SPACE_IDEOGRAPHIC;break;
-  }
-
   info->unicode_props() = (((unsigned int) s)<<8) | (info->unicode_props() & 0xFF);
 }
-static inline space_t
-_hb_glyph_info_get_unicode_space (const hb_glyph_info_t *info)
+static inline hb_unicode_funcs_t::space_t
+_hb_glyph_info_get_unicode_space_fallback_type (const hb_glyph_info_t *info)
 {
-  return _hb_glyph_info_is_unicode_space (info) ? (space_t) (info->unicode_props()>>8) : SPACE;
+  return _hb_glyph_info_is_unicode_space (info) ?
+	 (hb_unicode_funcs_t::space_t) (info->unicode_props()>>8) :
+	 hb_unicode_funcs_t::NOT_SPACE;
 }
 
 static inline bool _hb_glyph_info_ligated (const hb_glyph_info_t *info);
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index 0a4d404..c86c634 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -98,7 +98,7 @@ static inline void
 output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph)
 {
   buffer->cur().glyph_index() = glyph;
-  buffer->output_glyph (unichar);
+  buffer->output_glyph (unichar); /* This is very confusing indeed. */
   _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer->unicode);
 }
 
@@ -164,7 +164,8 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor
 {
   hb_buffer_t * const buffer = c->buffer;
   hb_codepoint_t u = buffer->cur().codepoint;
-  hb_codepoint_t glyph;
+  hb_codepoint_t glyph, space_glyph;
+  hb_unicode_funcs_t::space_t space_type;
 
   /* Kind of a cute waterfall here... */
   if (shortest && c->font->get_glyph (u, 0, &glyph))
@@ -173,6 +174,13 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor
     skip_char (buffer);
   else if (!shortest && c->font->get_glyph (u, 0, &glyph))
     next_char (buffer, glyph);
+  else if (_hb_glyph_info_is_unicode_space (&buffer->cur()) &&
+	   (space_type = buffer->unicode->space_fallback_type (u)) != hb_unicode_funcs_t::NOT_SPACE &&
+	   c->font->get_glyph (0x0020u, 0, &space_glyph))
+  {
+    _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type);
+    next_char (buffer, space_glyph);
+  }
   else
     next_char (buffer, glyph); /* glyph is initialized in earlier branches. */
 }
diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh
index e729826..43bbed6 100644
--- a/src/hb-unicode-private.hh
+++ b/src/hb-unicode-private.hh
@@ -199,6 +199,46 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
     }
   }
 
+  enum space_t {
+    NOT_SPACE = 0,
+    SPACE_NBSP,
+    SPACE_EN,
+    SPACE_EM,
+    SPACE_EM_3,
+    SPACE_EM_4,
+    SPACE_EM_6,
+    SPACE_FIGURE,
+    SPACE_PUNCTUATION,
+    SPACE_THIN,
+    SPACE_HAIR,
+    SPACE_NARROW,
+    SPACE_MEDIUM,
+    SPACE_IDEOGRAPHIC,
+  };
+  static inline space_t
+  space_fallback_type (hb_codepoint_t u)
+  {
+    switch (u)
+    {
+      /* All GC=Zs chars that can use a fallback. */
+      default:	  return NOT_SPACE; /* Shouldn't happen. */
+      case 0x00A0u: return SPACE_NBSP;
+      case 0x2000u: return SPACE_EN;
+      case 0x2001u: return SPACE_EM;
+      case 0x2002u: return SPACE_EN;
+      case 0x2003u: return SPACE_EM;
+      case 0x2004u: return SPACE_EM_3;
+      case 0x2005u: return SPACE_EM_4;
+      case 0x2006u: return SPACE_EM_6;
+      case 0x2007u: return SPACE_FIGURE;
+      case 0x2008u: return SPACE_PUNCTUATION;
+      case 0x2009u: return SPACE_THIN;
+      case 0x200Au: return SPACE_HAIR;
+      case 0x202Fu: return SPACE_NARROW;
+      case 0x205Fu: return SPACE_MEDIUM;
+      case 0x3000u: return SPACE_IDEOGRAPHIC;
+    }
+  }
 
   struct {
 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name;
diff --git a/test/shaping/Makefile.am b/test/shaping/Makefile.am
index 5694572..607da6d 100644
--- a/test/shaping/Makefile.am
+++ b/test/shaping/Makefile.am
@@ -52,6 +52,7 @@ TESTS = \
 	tests/indic-old-spec.tests \
 	tests/indic-pref-blocking.tests \
 	tests/mongolian-variation-selector.tests \
+	tests/spaces.tests \
 	tests/vertical.tests \
 	tests/zero-width-marks.tests \
 	$(NULL)
diff --git a/test/shaping/fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf b/test/shaping/fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf
new file mode 100644
index 0000000..213e7ce
Binary files /dev/null and b/test/shaping/fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf differ
diff --git a/test/shaping/fonts/sha1sum/MANIFEST b/test/shaping/fonts/sha1sum/MANIFEST
index 785e6ef..879d929 100644
--- a/test/shaping/fonts/sha1sum/MANIFEST
+++ b/test/shaping/fonts/sha1sum/MANIFEST
@@ -2,6 +2,7 @@
 051d92f8bc6ff724511b296c27623f824de256e9.ttf
 191826b9643e3f124d865d617ae609db6a2ce203.ttf
 1a6f1687b7a221f9f2c834b0b360d3c8463b6daf.ttf
+1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf
 226bc2deab3846f1a682085f70c67d0421014144.ttf
 270b89df543a7e48e206a2d830c0e10e5265c630.ttf
 298c9e1d955f10f6f72c6915c3c6ff9bf9695cec.ttf
diff --git a/test/shaping/tests/MANIFEST b/test/shaping/tests/MANIFEST
index 6ae62dc..457c2eb 100644
--- a/test/shaping/tests/MANIFEST
+++ b/test/shaping/tests/MANIFEST
@@ -10,5 +10,6 @@ indic-joiner-candrabindu.tests
 indic-old-spec.tests
 indic-pref-blocking.tests
 mongolian-variation-selector.tests
+spaces.tests
 vertical.tests
 zero-width-marks.tests
diff --git a/test/shaping/tests/spaces.tests b/test/shaping/tests/spaces.tests
new file mode 100644
index 0000000..d9e5d09
--- /dev/null
+++ b/test/shaping/tests/spaces.tests
@@ -0,0 +1,17 @@
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+0020:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+00A0:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+1680:[gid0=0+692]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2000:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2001:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2002:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2003:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2004:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2005:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2006:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2007:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2008:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+2009:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+200A:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+202F:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+205F:[gid1=0+560]
+fonts/sha1sum/1c2c3fc37b2d4c3cb2ef726c6cdaaabd4b7f3eb9.ttf:--font-funcs=ot:U+3000:[gid1=0+560]
commit 8b3c7f9ede77052225cff8495d660860bf9c7629
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 15:30:48 2015 -0800

    [test] Support recording multiple lines of text in record-test.sh

diff --git a/test/shaping/record-test.sh b/test/shaping/record-test.sh
index 691f78d..33afd60 100755
--- a/test/shaping/record-test.sh
+++ b/test/shaping/record-test.sh
@@ -35,7 +35,7 @@ done
 if ! $have_text; then
 	text=`cat`
 fi
-unicodes=`./hb-unicode-decode "$text"`
+unicodes=`echo "$text" | ./hb-unicode-decode`
 glyphs=`echo "$text" | $hb_shape $options "$fontfile"`
 if test $? != 0; then
 	echo "hb-shape failed." >&2
@@ -77,7 +77,18 @@ sha1sum=`sha1sum "$dir/font.ttf.subset" | cut -d' ' -f1`
 subset="fonts/sha1sum/$sha1sum.ttf"
 mv "$dir/font.ttf.subset" "$subset"
 
-echo "$subset:$options:$unicodes:$glyphs"
+# There ought to be an easier way to do this, but it escapes me...
+unicodes_file=`mktemp`
+glyphs_file=`mktemp`
+echo "$unicodes" > "$unicodes_file"
+echo "$glyphs" > "$glyphs_file"
+# Open the "file"s
+exec 3<"$unicodes_file"
+exec 4<"$glyphs_file"
+while read uline <&3 && read gline <&4; do
+	echo "$subset:$options:$uline:$gline"
+done
+
 
 rm -f "$dir/font.ttf"
 rmdir "$dir"
commit 85658394809fe0593ab5dfb30fd96118765c7dc5
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 14:46:52 2015 -0800

    Protect against possible invalid-memory access after OOM

diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index c255408..521214d 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -76,8 +76,8 @@ struct hb_buffer_t {
   inline hb_glyph_position_t &cur_pos (unsigned int i = 0) { return pos[idx + i]; }
   inline hb_glyph_position_t cur_pos (unsigned int i = 0) const { return pos[idx + i]; }
 
-  inline hb_glyph_info_t &prev (void) { return out_info[out_len - 1]; }
-  inline hb_glyph_info_t prev (void) const { return out_info[out_len - 1]; }
+  inline hb_glyph_info_t &prev (void) { return out_info[out_len ? out_len - 1 : 0]; }
+  inline hb_glyph_info_t prev (void) const { return out_info[out_len ? out_len - 1 : 0]; }
 
   inline bool has_separate_output (void) const { return info != out_info; }
 
commit 4cc80bed25450af3d84a30ea417fa9275b15e014
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 14:46:10 2015 -0800

    Fix typo!
    
    Ouch!  Fortunately that function was unused.

diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index 7fed738..c255408 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -77,7 +77,7 @@ struct hb_buffer_t {
   inline hb_glyph_position_t cur_pos (unsigned int i = 0) const { return pos[idx + i]; }
 
   inline hb_glyph_info_t &prev (void) { return out_info[out_len - 1]; }
-  inline hb_glyph_info_t prev (void) const { return info[out_len - 1]; }
+  inline hb_glyph_info_t prev (void) const { return out_info[out_len - 1]; }
 
   inline bool has_separate_output (void) const { return info != out_info; }
 
commit 9ac4b9656dd78ffd129bc3d560a92e2692bc3058
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 14:18:39 2015 -0800

    Add Unicode space category
    
    Unused so far.

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 8e56803..9020c89 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -213,6 +213,23 @@ _next_syllable (hb_buffer_t *buffer, unsigned int start)
 
 /* unicode_props */
 
+/* Design:
+ * unicode_props() is a two-byte number.  The low byte includes:
+ * - General_Category: 5 bits.
+ * - A bit each for:
+ *   * Is it Default_Ignorable(); we have a modified Default_Ignorable().
+ *   * Is it U+200D ZWJ?
+ *   * Is it U+200C ZWNJ?
+ *
+ * The high-byte has different meanings, switched by the Gen-Cat:
+ * - For Mn,Mc,Me: the modified Combining_Class.
+ * - For Ws: index of which space character this is, if space fallback
+ *   is needed, ie. we don't set this by default, only if asked to.
+ *
+ * If needed, we can use the ZWJ/ZWNJ to use the high byte as well,
+ * freeing two more bits.
+ */
+
 enum {
   UPROPS_MASK_ZWJ       = 0x20u,
   UPROPS_MASK_ZWNJ      = 0x40u,
@@ -220,6 +237,23 @@ enum {
   UPROPS_MASK_GEN_CAT   = 0x1Fu
 };
 
+enum space_t {
+  SPACE = 0,
+  SPACE_NBSP,
+  SPACE_EN,
+  SPACE_EM,
+  SPACE_EM_3,
+  SPACE_EM_4,
+  SPACE_EM_6,
+  SPACE_FIGURE,
+  SPACE_PUNCTUATION,
+  SPACE_THIN,
+  SPACE_HAIR,
+  SPACE_NARROW,
+  SPACE_MEDIUM,
+  SPACE_IDEOGRAPHIC,
+};
+
 static inline void
 _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
 {
@@ -246,6 +280,9 @@ _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *uni
        * property value 0.
        * 1.1.5+
        * """
+       *
+       * Also, all Mn's that are Default_Ignorable, have ccc=0, hence
+       * the "else if".
        */
       props |= unicode->modified_combining_class (info->codepoint)<<8;
     }
@@ -273,7 +310,6 @@ _hb_glyph_info_is_unicode_mark (const hb_glyph_info_t *info)
 {
   return HB_UNICODE_GENERAL_CATEGORY_IS_MARK (info->unicode_props() & UPROPS_MASK_GEN_CAT);
 }
-
 static inline void
 _hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
 					     unsigned int modified_class)
@@ -282,13 +318,51 @@ _hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
     return;
   info->unicode_props() = (modified_class<<8) | (info->unicode_props() & 0xFF);
 }
-
 static inline unsigned int
 _hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
 {
   return _hb_glyph_info_is_unicode_mark (info) ? info->unicode_props()>>8 : 0;
 }
 
+static inline bool
+_hb_glyph_info_is_unicode_space (const hb_glyph_info_t *info)
+{
+  return _hb_glyph_info_get_general_category (info) ==
+	 HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
+}
+static inline void
+_hb_glyph_info_set_unicode_space_for_char (hb_glyph_info_t *info, hb_codepoint_t u)
+{
+  if (unlikely (!_hb_glyph_info_is_unicode_space (info)))
+    return;
+
+  space_t s;
+  switch (u)
+  {
+    default:      s = SPACE;		break; /* Shouldn't happen. */
+    case 0x00A0u: s = SPACE_NBSP;	break;
+    case 0x2002u: s = SPACE_EN;		break;
+    case 0x2003u: s = SPACE_EM;		break;
+    case 0x2004u: s = SPACE_EM_3;	break;
+    case 0x2005u: s = SPACE_EM_4;	break;
+    case 0x2006u: s = SPACE_EM_6;	break;
+    case 0x2007u: s = SPACE_FIGURE;	break;
+    case 0x2008u: s = SPACE_PUNCTUATION;break;
+    case 0x2009u: s = SPACE_THIN;	break;
+    case 0x200Au: s = SPACE_HAIR;	break;
+    case 0x202Fu: s = SPACE_NARROW;	break;
+    case 0x205Fu: s = SPACE_MEDIUM;	break;
+    case 0x3000u: s = SPACE_IDEOGRAPHIC;break;
+  }
+
+  info->unicode_props() = (((unsigned int) s)<<8) | (info->unicode_props() & 0xFF);
+}
+static inline space_t
+_hb_glyph_info_get_unicode_space (const hb_glyph_info_t *info)
+{
+  return _hb_glyph_info_is_unicode_space (info) ? (space_t) (info->unicode_props()>>8) : SPACE;
+}
+
 static inline bool _hb_glyph_info_ligated (const hb_glyph_info_t *info);
 
 static inline hb_bool_t
commit 8249ec3f86510fd24462ce71ed64a6978f0ade17
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 13:26:17 2015 -0800

    Make top-byte of unicode_props available to be used differently per-GC

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 43a8bc1..8e56803 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -278,13 +278,15 @@ static inline void
 _hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
 					     unsigned int modified_class)
 {
+  if (unlikely (!_hb_glyph_info_is_unicode_mark (info)))
+    return;
   info->unicode_props() = (modified_class<<8) | (info->unicode_props() & 0xFF);
 }
 
 static inline unsigned int
 _hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
 {
-  return info->unicode_props()>>8;
+  return _hb_glyph_info_is_unicode_mark (info) ? info->unicode_props()>>8 : 0;
 }
 
 static inline bool _hb_glyph_info_ligated (const hb_glyph_info_t *info);
commit cc5d3a33882b52f906ee4346707700f5e846d2ac
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 13:21:25 2015 -0800

    Towards using top-byte of unicode-props for more things

diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 1390a2e..aeaaefa 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -835,7 +835,6 @@ static inline bool ligate_input (hb_apply_context_t *c,
     _hb_glyph_info_set_lig_props_for_ligature (&buffer->cur(), lig_id, total_component_count);
     if (_hb_glyph_info_get_general_category (&buffer->cur()) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
     {
-      _hb_glyph_info_set_modified_combining_class (&buffer->cur(), 0);
       _hb_glyph_info_set_general_category (&buffer->cur(), HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER);
     }
   }
diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 8e6164e..43a8bc1 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -258,7 +258,8 @@ static inline void
 _hb_glyph_info_set_general_category (hb_glyph_info_t *info,
 				     hb_unicode_general_category_t gen_cat)
 {
-  info->unicode_props() = (unsigned int) gen_cat | (info->unicode_props() & ~UPROPS_MASK_GEN_CAT);
+  /* Clears top-byte. */
+  info->unicode_props() = (unsigned int) gen_cat | (info->unicode_props() & (0xFF & ~UPROPS_MASK_GEN_CAT));
 }
 
 static inline hb_unicode_general_category_t
commit 2f38dde5a1ea5459789fabaee661cae9235d204e
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Wed Nov 4 13:17:33 2015 -0800

    Add _hb_glyph_info_is_unicode_mark()
    
    Unused right now.

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index 80a8854..8e6164e 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -267,6 +267,12 @@ _hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
   return (hb_unicode_general_category_t) (info->unicode_props() & UPROPS_MASK_GEN_CAT);
 }
 
+static inline bool
+_hb_glyph_info_is_unicode_mark (const hb_glyph_info_t *info)
+{
+  return HB_UNICODE_GENERAL_CATEGORY_IS_MARK (info->unicode_props() & UPROPS_MASK_GEN_CAT);
+}
+
 static inline void
 _hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
 					     unsigned int modified_class)
commit 90d75f93bb85aeb627c4e6bb9e4cbd75895c99f7
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Tue Nov 3 12:58:12 2015 -0800

    Tighten ccc-setting a bit and document it

diff --git a/src/hb-ot-layout-private.hh b/src/hb-ot-layout-private.hh
index e13eaae..80a8854 100644
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -235,8 +235,18 @@ _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *uni
       if (u == 0x200Cu) props |= UPROPS_MASK_ZWNJ;
       if (u == 0x200Du) props |= UPROPS_MASK_ZWJ;
     }
-    else if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (gen_cat)))
+    else if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK (gen_cat)))
     {
+      /* Only Mn and Mc can have non-zero ccc:
+       * http://www.unicode.org/policies/stability_policy.html#Property_Value
+       * """
+       * Canonical_Combining_Class, General_Category
+       * All characters other than those with General_Category property values
+       * Spacing_Mark (Mc) and Nonspacing_Mark (Mn) have the Canonical_Combining_Class
+       * property value 0.
+       * 1.1.5+
+       * """
+       */
       props |= unicode->modified_combining_class (info->codepoint)<<8;
     }
   }
diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh
index 968bca5..e729826 100644
--- a/src/hb-unicode-private.hh
+++ b/src/hb-unicode-private.hh
@@ -313,5 +313,10 @@ extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
 	  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
 	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
 
+#define HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK(gen_cat) \
+	(FLAG_SAFE (gen_cat) & \
+	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
+
 
 #endif /* HB_UNICODE_PRIVATE_HH */


More information about the HarfBuzz mailing list