[HarfBuzz] harfbuzz: Branch 'master' - 11 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Sat Dec 16 16:50:46 UTC 2017


 src/hb-debug.hh                    |   16 ++++-
 src/hb-ot-layout-common-private.hh |   91 ++++++++++++++++++++++++------
 src/hb-ot-layout-gpos-table.hh     |   40 ++++---------
 src/hb-ot-layout-gsub-table.hh     |   35 ++++-------
 src/hb-set-digest-private.hh       |   37 +++++++++---
 src/hb-set-private.hh              |  112 +++++++++++++++++++++++++++----------
 src/hb-set.cc                      |    6 -
 7 files changed, 228 insertions(+), 109 deletions(-)

New commits:
commit 493a005d9527b6075f3c1ca4b41c22d7805f975c
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Dec 16 11:49:39 2017 -0500

    [set] In add_sorted_array(), bail if data is not sorted

diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index 9d9a7712..4715e1f3 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -286,20 +286,24 @@ struct hb_set_t
     if (unlikely (in_error)) return false;
     if (!count) return true;
     hb_codepoint_t g = *array;
+    hb_codepoint_t last_g = g;
     while (count)
     {
       unsigned int m = get_major (g);
       page_t *page = page_for_insert (g); if (unlikely (!page)) return false;
-      unsigned int start = major_start (m);
       unsigned int end = major_start (m + 1);
       do
       {
+        /* If we try harder we can change the following comparison to <=;
+	 * Not sure if it's worth it. */
+        if (g < last_g) return false;
+	last_g = g;
 	page->add (g);
 
 	array++;
 	count--;
       }
-      while (count && (g = *array, start <= g && g < end));
+      while (count && (g = *array, g < end));
     }
     return true;
   }
commit a7bd6d7a4c53ff61d7d8286a594aaa0a0e15b1a1
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Dec 16 11:11:18 2017 -0500

    [collect_glyphs] Bail if input data looks like garbage
    
    Specificaly, when a range or sorted array has unexpected order, we take that as
    font data being garbage and bail out.  This fixes significant slowdown on a bad
    version of Chandas font which has a 600KB GPOS with garbage inside.
    
    Later on, I like to add a maximum-work counter for collect_glyphs to protect
    against malicious fonts as well.
    
    Fixes https://bugs.chromium.org/p/chromium/issues/detail?id=794896

diff --git a/src/hb-ot-layout-common-private.hh b/src/hb-ot-layout-common-private.hh
index f75f8b4f..5e699e19 100644
--- a/src/hb-ot-layout-common-private.hh
+++ b/src/hb-ot-layout-common-private.hh
@@ -819,7 +819,7 @@ struct CoverageFormat2
     unsigned int count = rangeRecord.len;
     for (unsigned int i = 0; i < count; i++)
       if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
-        return true;//XXXXXXXXXXXXfalse;
+        return false;
     return true;
   }
 
@@ -934,7 +934,7 @@ struct Coverage
     switch (u.format) {
     case 1: return u.format1.add_coverage (glyphs);
     case 2: return u.format2.add_coverage (glyphs);
-    default:return true;//XXXXXXXXXXXfalse;
+    default:return false;
     }
   }
 
@@ -1030,13 +1030,13 @@ struct ClassDefFormat1
 
       if (start != i)
 	if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + i)))
-	  return true;//XXXXXXXXfalse
+	  return false;
 
       start = i + 1;
     }
     if (start != count)
       if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + count)))
-	return true;//XXXXXXXXfalse
+	return false;
 
     return true;
   }
@@ -1107,7 +1107,7 @@ struct ClassDefFormat2
     for (unsigned int i = 0; i < count; i++)
       if (rangeRecord[i].value)
 	if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
-	  return true;//XXXXXXXXXXXXfalse;
+	  return false;
     return true;
   }
 
@@ -1118,7 +1118,7 @@ struct ClassDefFormat2
     {
       if (rangeRecord[i].value == klass)
         if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
-	  return true;//XXXXXXXXXXXXfalse;
+	  return false;
     }
     return true;
   }
@@ -1185,7 +1185,7 @@ struct ClassDef
     switch (u.format) {
     case 1: return u.format1.add_coverage (glyphs);
     case 2: return u.format2.add_coverage (glyphs);
-    default:return true;//XXXXXXXXXXXfalse;
+    default:return false;
     }
   }
 
diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index 74a22a03..9d9a7712 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -230,7 +230,7 @@ struct hb_set_t
   }
   inline bool add_range (hb_codepoint_t a, hb_codepoint_t b)
   {
-    if (unlikely (in_error || a > b || a == INVALID || b == INVALID)) return true;//XXXXXXXfalse;
+    if (unlikely (in_error || a > b || a == INVALID || b == INVALID)) return false;
     unsigned int ma = get_major (a);
     unsigned int mb = get_major (b);
     if (ma == mb)
@@ -283,7 +283,7 @@ struct hb_set_t
   template <typename T>
   inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
   {
-    if (unlikely (in_error)) return true;//XXXfalse
+    if (unlikely (in_error)) return false;
     if (!count) return true;
     hb_codepoint_t g = *array;
     while (count)
commit 1ce7d6e215ef9d5386010bcdbbca79ef01811596
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Dec 16 11:36:16 2017 -0500

    [set] Optimize add_array() / add_sorted_array()
    
    Does page lookup as needed.

diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index 4b6f7fb3..74a22a03 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -225,8 +225,7 @@ struct hb_set_t
   {
     if (unlikely (in_error)) return;
     if (unlikely (g == INVALID)) return;
-    page_t *page = page_for_insert (g);
-    if (unlikely (!page)) return;
+    page_t *page = page_for_insert (g); if (unlikely (!page)) return;
     page->add (g);
   }
   inline bool add_range (hb_codepoint_t a, hb_codepoint_t b)
@@ -236,25 +235,21 @@ struct hb_set_t
     unsigned int mb = get_major (b);
     if (ma == mb)
     {
-      page_t *page = page_for_insert (a);
-      if (unlikely (!page)) return false;
+      page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
       page->add_range (a, b);
     }
     else
     {
-      page_t *page = page_for_insert (a);
-      if (unlikely (!page)) return false;
+      page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
       page->add_range (a, major_start (ma + 1) - 1);
 
       for (unsigned int m = ma + 1; m < mb; m++)
       {
-	page = page_for_insert (major_start (m));
-	if (unlikely (!page)) return false;
+	page = page_for_insert (major_start (m)); if (unlikely (!page)) return false;
 	page->init1 ();
       }
 
-      page = page_for_insert (b);
-      if (unlikely (!page)) return false;
+      page = page_for_insert (b); if (unlikely (!page)) return false;
       page->add_range (major_start (mb), b);
     }
     return true;
@@ -263,21 +258,48 @@ struct hb_set_t
   template <typename T>
   inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
   {
-    for (unsigned int i = 0; i < count; i++)
+    if (unlikely (in_error)) return;
+    if (!count) return;
+    hb_codepoint_t g = *array;
+    while (count)
     {
-      add (*array);
-      array = (const T *) (stride + (const char *) array);
+      unsigned int m = get_major (g);
+      page_t *page = page_for_insert (g); if (unlikely (!page)) return;
+      unsigned int start = major_start (m);
+      unsigned int end = major_start (m + 1);
+      do
+      {
+	page->add (g);
+
+	array++;
+	count--;
+      }
+      while (count && (g = *array, start <= g && g < end));
     }
   }
+
   /* Might return false if array looks unsorted.
    * Used for faster rejection of corrupt data. */
   template <typename T>
   inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
   {
-    for (unsigned int i = 0; i < count; i++)
+    if (unlikely (in_error)) return true;//XXXfalse
+    if (!count) return true;
+    hb_codepoint_t g = *array;
+    while (count)
     {
-      add (*array);
-      array = (const T *) (stride + (const char *) array);
+      unsigned int m = get_major (g);
+      page_t *page = page_for_insert (g); if (unlikely (!page)) return false;
+      unsigned int start = major_start (m);
+      unsigned int end = major_start (m + 1);
+      do
+      {
+	page->add (g);
+
+	array++;
+	count--;
+      }
+      while (count && (g = *array, start <= g && g < end));
     }
     return true;
   }
commit 71e6adf1e2d65eb905a0ba247672fe36169955ef
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Dec 16 11:07:37 2017 -0500

    [collect_glyphs] handle ClassDef better

diff --git a/src/hb-ot-layout-common-private.hh b/src/hb-ot-layout-common-private.hh
index 55436485..f75f8b4f 100644
--- a/src/hb-ot-layout-common-private.hh
+++ b/src/hb-ot-layout-common-private.hh
@@ -1020,12 +1020,33 @@ struct ClassDefFormat1
   }
 
   template <typename set_t>
-  inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
+  inline bool add_coverage (set_t *glyphs) const {
+    unsigned int start = 0;
+    unsigned int count = classValue.len;
+    for (unsigned int i = 0; i < count; i++)
+    {
+      if (classValue[i])
+        continue;
+
+      if (start != i)
+	if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + i)))
+	  return true;//XXXXXXXXfalse
+
+      start = i + 1;
+    }
+    if (start != count)
+      if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + count)))
+	return true;//XXXXXXXXfalse
+
+    return true;
+  }
+
+  template <typename set_t>
+  inline bool add_class (set_t *glyphs, unsigned int klass) const {
     unsigned int count = classValue.len;
     for (unsigned int i = 0; i < count; i++)
     {
-      unsigned int klass = classValue[i];
-      if (min_klass <= klass && klass <= max_klass)
+      if (classValue[i] == klass)
         glyphs->add (startGlyph + i);
     }
     return true;
@@ -1081,14 +1102,23 @@ struct ClassDefFormat2
   }
 
   template <typename set_t>
-  inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
+  inline bool add_coverage (set_t *glyphs) const {
+    unsigned int count = rangeRecord.len;
+    for (unsigned int i = 0; i < count; i++)
+      if (rangeRecord[i].value)
+	if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
+	  return true;//XXXXXXXXXXXXfalse;
+    return true;
+  }
+
+  template <typename set_t>
+  inline bool add_class (set_t *glyphs, unsigned int klass) const {
     unsigned int count = rangeRecord.len;
     for (unsigned int i = 0; i < count; i++)
     {
-      unsigned int klass = rangeRecord[i].value;
-      if (min_klass <= klass && klass <= max_klass)
+      if (rangeRecord[i].value == klass)
         if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
-	  return false;
+	  return true;//XXXXXXXXXXXXfalse;
     }
     return true;
   }
@@ -1148,11 +1178,24 @@ struct ClassDef
     }
   }
 
+  /* Might return false if array looks unsorted.
+   * Used for faster rejection of corrupt data. */
+  template <typename set_t>
+  inline bool add_coverage (set_t *glyphs) const {
+    switch (u.format) {
+    case 1: return u.format1.add_coverage (glyphs);
+    case 2: return u.format2.add_coverage (glyphs);
+    default:return true;//XXXXXXXXXXXfalse;
+    }
+  }
+
+  /* Might return false if array looks unsorted.
+   * Used for faster rejection of corrupt data. */
   template <typename set_t>
-  inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
+  inline bool add_class (set_t *glyphs, unsigned int klass) const {
     switch (u.format) {
-    case 1: return u.format1.add_class (glyphs, min_klass, max_klass);
-    case 2: return u.format2.add_class (glyphs, min_klass, max_klass);
+    case 1: return u.format1.add_class (glyphs, klass);
+    case 2: return u.format2.add_class (glyphs, klass);
     default:return false;
     }
   }
diff --git a/src/hb-ot-layout-gdef-table.hh b/src/hb-ot-layout-gdef-table.hh
index b2924f6b..eed46dd6 100644
--- a/src/hb-ot-layout-gdef-table.hh
+++ b/src/hb-ot-layout-gdef-table.hh
@@ -352,7 +352,7 @@ struct GDEF
   inline unsigned int get_glyph_class (hb_codepoint_t glyph) const
   { return (this+glyphClassDef).get_class (glyph); }
   inline void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const
-  { (this+glyphClassDef).add_class (glyphs, klass, klass); }
+  { (this+glyphClassDef).add_class (glyphs, klass); }
 
   inline bool has_mark_attachment_types (void) const { return markAttachClassDef != 0; }
   inline unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const
diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index f44888b6..b344d793 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -751,10 +751,7 @@ struct PairPosFormat2
   {
     TRACE_COLLECT_GLYPHS (this);
     if (unlikely (!(this+coverage).add_coverage (c->input))) return;
-
-    unsigned int count2 = class2Count;
-    if (count2)
-      (this+classDef2).add_class (c->input, 0, count2 - 1);
+    if (unlikely (!(this+classDef2).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index a30fa6de..b08a5913 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -621,7 +621,7 @@ static inline void collect_glyph (hb_set_t *glyphs, const UINT16 &value, const v
 static inline void collect_class (hb_set_t *glyphs, const UINT16 &value, const void *data)
 {
   const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
-  class_def.add_class (glyphs, value, value);
+  class_def.add_class (glyphs, value);
 }
 static inline void collect_coverage (hb_set_t *glyphs, const UINT16 &value, const void *data)
 {
commit 87cc5a65cb4b98a3a857b5846085ef0814b392a8
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Dec 16 06:18:07 2017 -0800

    [collect_glyphs] In PairPosFornat2 do not collect classDef1
    
    The coverage already covered that.

diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index f1b61301..f44888b6 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -462,7 +462,7 @@ struct SinglePosFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+coverage).add_coverage (c->input);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
@@ -510,7 +510,7 @@ struct SinglePosFormat2
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+coverage).add_coverage (c->input);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
@@ -752,10 +752,6 @@ struct PairPosFormat2
     TRACE_COLLECT_GLYPHS (this);
     if (unlikely (!(this+coverage).add_coverage (c->input))) return;
 
-    unsigned int count1 = class1Count;
-    if (count1)
-      (this+classDef1).add_class (c->input, 0, count1 - 1);
-
     unsigned int count2 = class2Count;
     if (count2)
       (this+classDef2).add_class (c->input, 0, count2 - 1);
commit 81f27df4d9db1bfc1dd04593cbd121397b86e9a6
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Dec 16 06:12:06 2017 -0800

    More work towards improving collect_glyphs() against bad input
    
    The three "XXXXX"'s should be switched to false.  Doing that separately for ease
    of bisecting...

diff --git a/src/hb-ot-layout-common-private.hh b/src/hb-ot-layout-common-private.hh
index 47a05b5e..55436485 100644
--- a/src/hb-ot-layout-common-private.hh
+++ b/src/hb-ot-layout-common-private.hh
@@ -156,8 +156,7 @@ struct RangeRecord
 
   template <typename set_t>
   inline bool add_coverage (set_t *glyphs) const {
-    glyphs->add_range (start, end);
-    return likely (start <= end);
+    return glyphs->add_range (start, end);
   }
 
   GlyphID	start;		/* First GlyphID in the range */
@@ -820,7 +819,7 @@ struct CoverageFormat2
     unsigned int count = rangeRecord.len;
     for (unsigned int i = 0; i < count; i++)
       if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
-        return false;
+        return true;//XXXXXXXXXXXXfalse;
     return true;
   }
 
@@ -935,7 +934,7 @@ struct Coverage
     switch (u.format) {
     case 1: return u.format1.add_coverage (glyphs);
     case 2: return u.format2.add_coverage (glyphs);
-    default:return false;
+    default:return true;//XXXXXXXXXXXfalse;
     }
   }
 
diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index 215e2d74..f1b61301 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -607,12 +607,7 @@ struct PairSet
     unsigned int record_size = UINT16::static_size * (1 + len1 + len2);
 
     const PairValueRecord *record = CastP<PairValueRecord> (arrayZ);
-    unsigned int count = len;
-    for (unsigned int i = 0; i < count; i++)
-    {
-      c->input->add (record->secondGlyph);
-      record = &StructAtOffset<PairValueRecord> (record, record_size);
-    }
+    c->input->add_array (&record->secondGlyph, len, record_size);
   }
 
   inline bool apply (hb_apply_context_t *c,
@@ -689,7 +684,7 @@ struct PairPosFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+coverage).add_coverage (c->input);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
     unsigned int count = pairSet.len;
     for (unsigned int i = 0; i < count; i++)
       (this+pairSet[i]).collect_glyphs (c, valueFormat);
@@ -755,7 +750,7 @@ struct PairPosFormat2
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+coverage).add_coverage (c->input);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
 
     unsigned int count1 = class1Count;
     if (count1)
@@ -904,7 +899,7 @@ struct CursivePosFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+coverage).add_coverage (c->input);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
@@ -1062,8 +1057,8 @@ struct MarkBasePosFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+markCoverage).add_coverage (c->input);
-    (this+baseCoverage).add_coverage (c->input);
+    if (unlikely (!(this+markCoverage).add_coverage (c->input))) return;
+    if (unlikely (!(this+baseCoverage).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
@@ -1165,8 +1160,8 @@ struct MarkLigPosFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+markCoverage).add_coverage (c->input);
-    (this+ligatureCoverage).add_coverage (c->input);
+    if (unlikely (!(this+markCoverage).add_coverage (c->input))) return;
+    if (unlikely (!(this+ligatureCoverage).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
@@ -1278,8 +1273,8 @@ struct MarkMarkPosFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+mark1Coverage).add_coverage (c->input);
-    (this+mark2Coverage).add_coverage (c->input);
+    if (unlikely (!(this+mark1Coverage).add_coverage (c->input))) return;
+    if (unlikely (!(this+mark2Coverage).add_coverage (c->input))) return;
   }
 
   inline const Coverage &get_coverage (void) const
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index 28e0790e..0b09c4e4 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -54,13 +54,13 @@ struct SingleSubstFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
     Coverage::Iter iter;
     for (iter.init (this+coverage); iter.more (); iter.next ())
     {
       /* TODO Switch to range-based API to work around malicious fonts.
        * https://github.com/harfbuzz/harfbuzz/issues/363 */
       hb_codepoint_t glyph_id = iter.get_glyph ();
-      c->input->add (glyph_id);
       c->output->add ((glyph_id + deltaGlyphID) & 0xFFFFu);
     }
   }
@@ -139,13 +139,13 @@ struct SingleSubstFormat2
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
     Coverage::Iter iter;
     unsigned int count = substitute.len;
     for (iter.init (this+coverage); iter.more (); iter.next ())
     {
       if (unlikely (iter.get_coverage () >= count))
         break; /* Work around malicious fonts. https://github.com/harfbuzz/harfbuzz/issues/363 */
-      c->input->add (iter.get_glyph ());
       c->output->add (substitute[iter.get_coverage ()]);
     }
   }
@@ -269,9 +269,7 @@ struct Sequence
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    unsigned int count = substitute.len;
-    for (unsigned int i = 0; i < count; i++)
-      c->output->add (substitute[i]);
+    c->output->add_array (substitute.array, substitute.len);
   }
 
   inline bool apply (hb_apply_context_t *c) const
@@ -348,7 +346,7 @@ struct MultipleSubstFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    (this+coverage).add_coverage (c->input);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
     unsigned int count = sequence.len;
     for (unsigned int i = 0; i < count; i++)
 	(this+sequence[i]).collect_glyphs (c);
@@ -474,17 +472,15 @@ struct AlternateSubstFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
     Coverage::Iter iter;
     unsigned int count = alternateSet.len;
     for (iter.init (this+coverage); iter.more (); iter.next ())
     {
       if (unlikely (iter.get_coverage () >= count))
         break; /* Work around malicious fonts. https://github.com/harfbuzz/harfbuzz/issues/363 */
-      c->input->add (iter.get_glyph ());
       const AlternateSet &alt_set = this+alternateSet[iter.get_coverage ()];
-      unsigned int count = alt_set.len;
-      for (unsigned int i = 0; i < count; i++)
-	c->output->add (alt_set[i]);
+      c->output->add_array (alt_set.array, alt_set.len);
     }
   }
 
@@ -615,9 +611,7 @@ struct Ligature
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-    unsigned int count = component.len;
-    for (unsigned int i = 1; i < count; i++)
-      c->input->add (component[i]);
+    c->input->add_array (component.array, component.len ? component.len - 1 : 0);
     c->output->add (ligGlyph);
   }
 
@@ -801,13 +795,13 @@ struct LigatureSubstFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
     Coverage::Iter iter;
     unsigned int count = ligatureSet.len;
     for (iter.init (this+coverage); iter.more (); iter.next ())
     {
       if (unlikely (iter.get_coverage () >= count))
         break; /* Work around malicious fonts. https://github.com/harfbuzz/harfbuzz/issues/363 */
-      c->input->add (iter.get_glyph ());
       (this+ligatureSet[iter.get_coverage ()]).collect_glyphs (c);
     }
   }
@@ -970,25 +964,22 @@ struct ReverseChainSingleSubstFormat1
   inline void collect_glyphs (hb_collect_glyphs_context_t *c) const
   {
     TRACE_COLLECT_GLYPHS (this);
-
-    const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage> > (backtrack);
+    if (unlikely (!(this+coverage).add_coverage (c->input))) return;
 
     unsigned int count;
 
-    (this+coverage).add_coverage (c->input);
-
     count = backtrack.len;
     for (unsigned int i = 0; i < count; i++)
-      (this+backtrack[i]).add_coverage (c->before);
+      if (unlikely (!(this+backtrack[i]).add_coverage (c->before))) return;
 
+    const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage> > (backtrack);
     count = lookahead.len;
     for (unsigned int i = 0; i < count; i++)
-      (this+lookahead[i]).add_coverage (c->after);
+      if (unlikely (!(this+lookahead[i]).add_coverage (c->after))) return;
 
     const ArrayOf<GlyphID> &substitute = StructAfter<ArrayOf<GlyphID> > (lookahead);
     count = substitute.len;
-    for (unsigned int i = 0; i < count; i++)
-      c->output->add (substitute[i]);
+    c->output->add_array (substitute.array, substitute.len);
   }
 
   inline const Coverage &get_coverage (void) const
diff --git a/src/hb-set-digest-private.hh b/src/hb-set-digest-private.hh
index 0f798ab6..e099a826 100644
--- a/src/hb-set-digest-private.hh
+++ b/src/hb-set-digest-private.hh
@@ -71,7 +71,7 @@ struct hb_set_digest_lowest_bits_t
     mask |= mask_for (g);
   }
 
-  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
+  inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) {
     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
       mask = (mask_t) -1;
     else {
@@ -79,6 +79,7 @@ struct hb_set_digest_lowest_bits_t
       mask_t mb = mask_for (b);
       mask |= mb + (mb - ma) - (mb < ma);
     }
+    return true;
   }
 
   template <typename T>
@@ -128,9 +129,10 @@ struct hb_set_digest_combiner_t
     tail.add (g);
   }
 
-  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
+  inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) {
     head.add_range (a, b);
     tail.add_range (a, b);
+    return true;
   }
   template <typename T>
   inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index edeca6d7..4b6f7fb3 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -70,20 +70,19 @@ struct hb_set_t
 
     inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
     {
-     elt_t *la = &elt (a);
-     elt_t *lb = &elt (b);
-     if (la == lb)
-       *la |= (mask (b) << 1) - mask(a);
-     else
-     {
-       *la |= ~(mask (a) - 1);
-       la++;
-
-       memset (la, 0xff, (char *) lb - (char *) la);
+      elt_t *la = &elt (a);
+      elt_t *lb = &elt (b);
+      if (la == lb)
+        *la |= (mask (b) << 1) - mask(a);
+      else
+      {
+	*la |= ~(mask (a) - 1);
+	la++;
 
-       *lb |= ((mask (b) << 1) - 1);
+	memset (la, 0xff, (char *) lb - (char *) la);
 
-     }
+	*lb |= ((mask (b) << 1) - 1);
+      }
     }
 
     inline bool is_equal (const page_t *other) const
@@ -230,34 +229,35 @@ struct hb_set_t
     if (unlikely (!page)) return;
     page->add (g);
   }
-  inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
+  inline bool add_range (hb_codepoint_t a, hb_codepoint_t b)
   {
-    if (unlikely (in_error || a > b || a == INVALID || b == INVALID)) return;
+    if (unlikely (in_error || a > b || a == INVALID || b == INVALID)) return true;//XXXXXXXfalse;
     unsigned int ma = get_major (a);
     unsigned int mb = get_major (b);
     if (ma == mb)
     {
       page_t *page = page_for_insert (a);
-      if (unlikely (!page)) return;
+      if (unlikely (!page)) return false;
       page->add_range (a, b);
     }
     else
     {
       page_t *page = page_for_insert (a);
-      if (unlikely (!page)) return;
+      if (unlikely (!page)) return false;
       page->add_range (a, major_start (ma + 1) - 1);
 
       for (unsigned int m = ma + 1; m < mb; m++)
       {
 	page = page_for_insert (major_start (m));
-	if (unlikely (!page)) return;
+	if (unlikely (!page)) return false;
 	page->init1 ();
       }
 
       page = page_for_insert (b);
-      if (unlikely (!page)) return;
+      if (unlikely (!page)) return false;
       page->add_range (major_start (mb), b);
     }
+    return true;
   }
 
   template <typename T>
commit 5d02572034e3dafbe87000fd0aa34b858bd95075
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Dec 14 19:33:55 2017 -0800

    [set] Add add_sorted_array()
    
    Not optimized to use sortedness yet.  Also start putting in place infra
    to faster reject bad data.
    
    A version of Chandas.ttf found on some Chrome bots has 660kb of GPOS,
    mostly junk.  That is causing 48 million of set->add() calls in
    collect_glyphs(), which is insane.
    
    In the upcoming commits, I'll be speeding that up by optimizing
    add_sorted_array(), while also reducing work by rejecting out-of-sort
    arrays quickly and propagate the rejection.
    
    Part of https://bugs.chromium.org/p/chromium/issues/detail?id=794896

diff --git a/src/hb-ot-layout-common-private.hh b/src/hb-ot-layout-common-private.hh
index 82ace31b..47a05b5e 100644
--- a/src/hb-ot-layout-common-private.hh
+++ b/src/hb-ot-layout-common-private.hh
@@ -155,8 +155,9 @@ struct RangeRecord
   }
 
   template <typename set_t>
-  inline void add_coverage (set_t *glyphs) const {
+  inline bool add_coverage (set_t *glyphs) const {
     glyphs->add_range (start, end);
+    return likely (start <= end);
   }
 
   GlyphID	start;		/* First GlyphID in the range */
@@ -715,8 +716,8 @@ struct CoverageFormat1
   }
 
   template <typename set_t>
-  inline void add_coverage (set_t *glyphs) const {
-    glyphs->add_array (glyphArray.array, glyphArray.len);
+  inline bool add_coverage (set_t *glyphs) const {
+    return glyphs->add_sorted_array (glyphArray.array, glyphArray.len);
   }
 
   public:
@@ -815,10 +816,12 @@ struct CoverageFormat2
   }
 
   template <typename set_t>
-  inline void add_coverage (set_t *glyphs) const {
+  inline bool add_coverage (set_t *glyphs) const {
     unsigned int count = rangeRecord.len;
     for (unsigned int i = 0; i < count; i++)
-      rangeRecord[i].add_coverage (glyphs);
+      if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
+        return false;
+    return true;
   }
 
   public:
@@ -925,12 +928,14 @@ struct Coverage
     }
   }
 
+  /* Might return false if array looks unsorted.
+   * Used for faster rejection of corrupt data. */
   template <typename set_t>
-  inline void add_coverage (set_t *glyphs) const {
+  inline bool add_coverage (set_t *glyphs) const {
     switch (u.format) {
-    case 1: u.format1.add_coverage (glyphs); break;
-    case 2: u.format2.add_coverage (glyphs); break;
-    default:                                 break;
+    case 1: return u.format1.add_coverage (glyphs);
+    case 2: return u.format2.add_coverage (glyphs);
+    default:return false;
     }
   }
 
@@ -1016,11 +1021,15 @@ struct ClassDefFormat1
   }
 
   template <typename set_t>
-  inline void add_class (set_t *glyphs, unsigned int klass) const {
+  inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
     unsigned int count = classValue.len;
     for (unsigned int i = 0; i < count; i++)
-      if (classValue[i] == klass)
+    {
+      unsigned int klass = classValue[i];
+      if (min_klass <= klass && klass <= max_klass)
         glyphs->add (startGlyph + i);
+    }
+    return true;
   }
 
   inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const {
@@ -1073,11 +1082,16 @@ struct ClassDefFormat2
   }
 
   template <typename set_t>
-  inline void add_class (set_t *glyphs, unsigned int klass) const {
+  inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
     unsigned int count = rangeRecord.len;
     for (unsigned int i = 0; i < count; i++)
-      if (rangeRecord[i].value == klass)
-        rangeRecord[i].add_coverage (glyphs);
+    {
+      unsigned int klass = rangeRecord[i].value;
+      if (min_klass <= klass && klass <= max_klass)
+        if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
+	  return false;
+    }
+    return true;
   }
 
   inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const {
@@ -1135,11 +1149,12 @@ struct ClassDef
     }
   }
 
-  inline void add_class (hb_set_t *glyphs, unsigned int klass) const {
+  template <typename set_t>
+  inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
     switch (u.format) {
-    case 1: u.format1.add_class (glyphs, klass); return;
-    case 2: u.format2.add_class (glyphs, klass); return;
-    default:return;
+    case 1: return u.format1.add_class (glyphs, min_klass, max_klass);
+    case 2: return u.format2.add_class (glyphs, min_klass, max_klass);
+    default:return false;
     }
   }
 
diff --git a/src/hb-ot-layout-gdef-table.hh b/src/hb-ot-layout-gdef-table.hh
index eed46dd6..b2924f6b 100644
--- a/src/hb-ot-layout-gdef-table.hh
+++ b/src/hb-ot-layout-gdef-table.hh
@@ -352,7 +352,7 @@ struct GDEF
   inline unsigned int get_glyph_class (hb_codepoint_t glyph) const
   { return (this+glyphClassDef).get_class (glyph); }
   inline void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const
-  { (this+glyphClassDef).add_class (glyphs, klass); }
+  { (this+glyphClassDef).add_class (glyphs, klass, klass); }
 
   inline bool has_mark_attachment_types (void) const { return markAttachClassDef != 0; }
   inline unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const
diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index 3dcf2ec9..215e2d74 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -758,14 +758,12 @@ struct PairPosFormat2
     (this+coverage).add_coverage (c->input);
 
     unsigned int count1 = class1Count;
-    const ClassDef &klass1 = this+classDef1;
-    for (unsigned int i = 0; i < count1; i++)
-      klass1.add_class (c->input, i);
+    if (count1)
+      (this+classDef1).add_class (c->input, 0, count1 - 1);
 
     unsigned int count2 = class2Count;
-    const ClassDef &klass2 = this+classDef2;
-    for (unsigned int i = 0; i < count2; i++)
-      klass2.add_class (c->input, i);
+    if (count2)
+      (this+classDef2).add_class (c->input, 0, count2 - 1);
   }
 
   inline const Coverage &get_coverage (void) const
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index b08a5913..a30fa6de 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -621,7 +621,7 @@ static inline void collect_glyph (hb_set_t *glyphs, const UINT16 &value, const v
 static inline void collect_class (hb_set_t *glyphs, const UINT16 &value, const void *data)
 {
   const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
-  class_def.add_class (glyphs, value);
+  class_def.add_class (glyphs, value, value);
 }
 static inline void collect_coverage (hb_set_t *glyphs, const UINT16 &value, const void *data)
 {
diff --git a/src/hb-set-digest-private.hh b/src/hb-set-digest-private.hh
index 75087085..0f798ab6 100644
--- a/src/hb-set-digest-private.hh
+++ b/src/hb-set-digest-private.hh
@@ -80,11 +80,25 @@ struct hb_set_digest_lowest_bits_t
       mask |= mb + (mb - ma) - (mb < ma);
     }
   }
+
+  template <typename T>
+  inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+  {
+    for (unsigned int i = 0; i < count; i++)
+    {
+      add (*array);
+      array = (const T *) (stride + (const char *) array);
+    }
+  }
   template <typename T>
-  inline void add_array (const T *array, unsigned int count)
+  inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
   {
     for (unsigned int i = 0; i < count; i++)
-      add (array[i]);
+    {
+      add (*array);
+      array = (const T *) (stride + (const char *) array);
+    }
+    return true;
   }
 
   inline bool may_have (hb_codepoint_t g) const {
@@ -119,10 +133,17 @@ struct hb_set_digest_combiner_t
     tail.add_range (a, b);
   }
   template <typename T>
-  inline void add_array (const T *array, unsigned int count)
+  inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+  {
+    head.add_array (array, count, stride);
+    tail.add_array (array, count, stride);
+  }
+  template <typename T>
+  inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
   {
-    head.add_array (array, count);
-    tail.add_array (array, count);
+    head.add_sorted_array (array, count, stride);
+    tail.add_sorted_array (array, count, stride);
+    return true;
   }
 
   inline bool may_have (hb_codepoint_t g) const {
diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index 04f22c5d..edeca6d7 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -259,12 +259,29 @@ struct hb_set_t
       page->add_range (major_start (mb), b);
     }
   }
+
   template <typename T>
-  inline void add_array (const T *array, unsigned int count)
+  inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
   {
     for (unsigned int i = 0; i < count; i++)
-      add (array[i]);
+    {
+      add (*array);
+      array = (const T *) (stride + (const char *) array);
+    }
   }
+  /* Might return false if array looks unsorted.
+   * Used for faster rejection of corrupt data. */
+  template <typename T>
+  inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
+  {
+    for (unsigned int i = 0; i < count; i++)
+    {
+      add (*array);
+      array = (const T *) (stride + (const char *) array);
+    }
+    return true;
+  }
+
   inline void del (hb_codepoint_t g)
   {
     if (unlikely (in_error)) return;
commit 9d6511a7343ba150e8072e5fe91732db54a92309
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Dec 14 19:04:55 2017 -0800

    [set] Reduce number of preallocated pages from 8 to 1
    
    Now that pagesize is 8192, this feels better.

diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index 95c15449..04f22c5d 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -182,7 +182,7 @@ struct hb_set_t
   ASSERT_POD ();
   bool in_error;
   hb_prealloced_array_t<page_map_t, 8> page_map;
-  hb_prealloced_array_t<page_t, 8> pages;
+  hb_prealloced_array_t<page_t, 1> pages;
 
   inline void init (void)
   {
commit ae2e2b068e1ab68d1f814165cb86fa38deef1f5b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Dec 14 18:15:14 2017 -0800

    Fix partial tracing debug builds

diff --git a/src/hb-debug.hh b/src/hb-debug.hh
index 213e5432..6c425f7b 100644
--- a/src/hb-debug.hh
+++ b/src/hb-debug.hh
@@ -220,8 +220,8 @@ template <>
 {}
 
 template <int max_level, typename ret_t>
-struct hb_auto_trace_t {
-
+struct hb_auto_trace_t
+{
   explicit inline hb_auto_trace_t (unsigned int *plevel_,
 				   const char *what_,
 				   const void *obj_,
@@ -269,7 +269,17 @@ struct hb_auto_trace_t {
   bool returned;
 };
 template <typename ret_t> /* Make sure we don't use hb_auto_trace_t when not tracing. */
-struct hb_auto_trace_t<0, ret_t>;
+struct hb_auto_trace_t<0, ret_t>
+{
+  explicit inline hb_auto_trace_t (unsigned int *plevel_,
+				   const char *what_,
+				   const void *obj_,
+				   const char *func,
+				   const char *message,
+				   ...) HB_PRINTF_FUNC(6, 7) {}
+
+  inline ret_t ret (ret_t v, unsigned int line HB_UNUSED = 0) { return v; }
+};
 
 /* For disabled tracing; optimize out everything.
  * https://github.com/harfbuzz/harfbuzz/pull/605 */
commit 9daa88cd790b80a8bc7eaae2e7eec6f2f9fc60cf
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Dec 14 13:37:48 2017 -0800

    Minor

diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index c9305ec9..95c15449 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -184,6 +184,17 @@ struct hb_set_t
   hb_prealloced_array_t<page_map_t, 8> page_map;
   hb_prealloced_array_t<page_t, 8> pages;
 
+  inline void init (void)
+  {
+    page_map.init ();
+    pages.init ();
+  }
+  inline void finish (void)
+  {
+    page_map.finish ();
+    pages.finish ();
+  }
+
   inline bool resize (unsigned int count)
   {
     if (unlikely (in_error)) return false;
diff --git a/src/hb-set.cc b/src/hb-set.cc
index e2c78822..0b4f871e 100644
--- a/src/hb-set.cc
+++ b/src/hb-set.cc
@@ -45,8 +45,7 @@ hb_set_create (void)
   if (!(set = hb_object_create<hb_set_t> ()))
     return hb_set_get_empty ();
 
-  set->page_map.init ();
-  set->pages.init ();
+  set->init ();
 
   return set;
 }
@@ -96,8 +95,7 @@ hb_set_destroy (hb_set_t *set)
 {
   if (!hb_object_destroy (set)) return;
 
-  set->page_map.finish ();
-  set->pages.finish ();
+  set->finish ();
 
   free (set);
 }
commit f424a342233ae32bbfabbdeadf59c82420b0880b
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Thu Dec 14 13:30:38 2017 -0800

    [set] Change pagesize from 512 bits to 8192 bits
    
    Fixes perf regression on some heavy fonts in Chrome's FT+HB
    interaction.
    
    See:
    https://bugs.chromium.org/p/chromium/issues/detail?id=782220
    
    More work to be done:
    https://bugs.chromium.org/p/chromium/issues/detail?id=794896

diff --git a/src/hb-set-private.hh b/src/hb-set-private.hh
index e3048657..c9305ec9 100644
--- a/src/hb-set-private.hh
+++ b/src/hb-set-private.hh
@@ -151,7 +151,7 @@ struct hb_set_t
       return 0;
     }
 
-    static const unsigned int PAGE_BITS = 512; /* Use to tune. */
+    static const unsigned int PAGE_BITS = 8192; /* Use to tune. */
     static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
 
     typedef uint64_t elt_t;


More information about the HarfBuzz mailing list