[HarfBuzz] harfbuzz: Branch 'master' - 15 commits

Behdad Esfahbod behdad at kemper.freedesktop.org
Sun Aug 26 01:26:25 UTC 2018


 docs/harfbuzz-sections.txt                      |    5 
 src/hb-blob-private.hh                          |    4 
 src/hb-dsalgs.hh                                |    3 
 src/hb-face.cc                                  |  149 ++++++++++++++
 src/hb-face.h                                   |   14 +
 src/hb-machinery-private.hh                     |   83 ++++++++
 src/hb-ot-cmap-table.hh                         |  243 +++++++++++++++---------
 src/hb-subset-plan.cc                           |    2 
 src/hb-subset-plan.hh                           |    2 
 src/hb-subset-private.hh                        |    7 
 src/hb-subset.cc                                |  186 +++++-------------
 src/hb-subset.h                                 |   16 +
 test/api/test-subset-codepoints.c               |   18 -
 test/fuzzing/hb-subset-get-codepoints-fuzzer.cc |    2 
 test/shaping/data/in-house/tests/fuzzed.tests   |    2 
 15 files changed, 494 insertions(+), 242 deletions(-)

New commits:
commit c8cfb702e96bf4e89495fcc56f965c88bfa77dca
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 16:14:32 2018 -0700

    [cmap] Minor

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 31f3b309..dcdff008 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -292,11 +292,13 @@ struct CmapSubtableFormat4
     }
     inline void collect_unicodes (hb_set_t *out) const
     {
-      for (unsigned int i = 0; i < this->segCount; i++)
+      unsigned int count = this->segCount;
+      if (count && this->startCount[count - 1] == 0xFFFFu)
+        count--; /* Skip sentinel segment. */
+      for (unsigned int i = 0; i < count; i++)
       {
 	/* XXX This does NOT skip over chars mapping to gid0... */
-	if (this->startCount[i] != 0xFFFFu || this->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
-	  out->add_range (this->startCount[i], this->endCount[i]);
+	out->add_range (this->startCount[i], this->endCount[i]);
       }
     }
 
commit 1dcf5fb038e7c2d3d03a50c4cd9869a922f9adf7
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 16:11:26 2018 -0700

    [cmap Add hb_subset_collect_variation_unicodes()
    
    To be moved to hb-face.h later.

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 2f115e2e..31f3b309 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -296,7 +296,7 @@ struct CmapSubtableFormat4
       {
 	/* XXX This does NOT skip over chars mapping to gid0... */
 	if (this->startCount[i] != 0xFFFFu || this->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
-	  hb_set_add_range (out, this->startCount[i], this->endCount[i]);
+	  out->add_range (this->startCount[i], this->endCount[i]);
       }
     }
 
@@ -459,11 +459,9 @@ struct CmapSubtableLongSegmented
   inline void collect_unicodes (hb_set_t *out) const
   {
     for (unsigned int i = 0; i < this->groups.len; i++) {
-      hb_set_add_range (out,
-			MIN ((unsigned int) this->groups[i].startCharCode,
-			     (unsigned int) HB_MAX_UNICODE_CODEPOINT_VALUE),
-			MIN ((unsigned int) this->groups[i].endCharCode,
-			     (unsigned int) HB_MAX_UNICODE_CODEPOINT_VALUE));
+      out->add_range (this->groups[i].startCharCode,
+		      MIN ((hb_codepoint_t) this->groups[i].endCharCode,
+			   (hb_codepoint_t) HB_MAX_UNICODE_CODEPOINT_VALUE));
     }
   }
 
@@ -600,7 +598,23 @@ struct UnicodeValueRange
   DEFINE_SIZE_STATIC (4);
 };
 
-typedef SortedArrayOf<UnicodeValueRange, HBUINT32> DefaultUVS;
+struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
+{
+  inline void collect_unicodes (hb_set_t *out) const
+  {
+    unsigned int count = len;
+    for (unsigned int i = 0; i < count; i++)
+    {
+      hb_codepoint_t first = arrayZ[i].startUnicodeValue;
+      hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
+				 (hb_codepoint_t) HB_MAX_UNICODE_CODEPOINT_VALUE);
+      out->add_range (first, last);
+    }
+  }
+
+  public:
+  DEFINE_SIZE_ARRAY (4, arrayZ);
+};
 
 struct UVSMapping
 {
@@ -621,7 +635,18 @@ struct UVSMapping
   DEFINE_SIZE_STATIC (5);
 };
 
-typedef SortedArrayOf<UVSMapping, HBUINT32> NonDefaultUVS;
+struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
+{
+  inline void collect_unicodes (hb_set_t *out) const
+  {
+    unsigned int count = len;
+    for (unsigned int i = 0; i < count; i++)
+      out->add (arrayZ[i].glyphID);
+  }
+
+  public:
+  DEFINE_SIZE_ARRAY (4, arrayZ);
+};
 
 struct VariationSelectorRecord
 {
@@ -644,6 +669,12 @@ struct VariationSelectorRecord
     return GLYPH_VARIANT_NOT_FOUND;
   }
 
+  inline void collect_unicodes (hb_set_t *out, const void *base) const
+  {
+    (base+defaultUVS).collect_unicodes (out);
+    (base+nonDefaultUVS).collect_unicodes (out);
+  }
+
   inline int cmp (const hb_codepoint_t &variation_selector) const
   {
     return varSelector.cmp (variation_selector);
@@ -672,7 +703,7 @@ struct CmapSubtableFormat14
 					    hb_codepoint_t variation_selector,
 					    hb_codepoint_t *glyph) const
   {
-    return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
+    return record[record.bsearch (variation_selector)].get_glyph (codepoint, glyph, this);
   }
 
   inline void collect_variation_selectors (hb_set_t *out) const
@@ -681,6 +712,11 @@ struct CmapSubtableFormat14
     for (unsigned int i = 0; i < count; i++)
       out->add (record.arrayZ[i].varSelector);
   }
+  inline void collect_variation_unicodes (hb_codepoint_t variation_selector,
+					  hb_set_t *out) const
+  {
+    record[record.bsearch (variation_selector)].collect_unicodes (out, this);
+  }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -1045,6 +1081,11 @@ struct cmap
     {
       subtable_uvs->collect_variation_selectors (out);
     }
+    inline void collect_variation_unicodes (hb_codepoint_t variation_selector,
+					    hb_set_t *out) const
+    {
+      subtable_uvs->collect_variation_unicodes (variation_selector, out);
+    }
 
     protected:
     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
diff --git a/src/hb-subset.cc b/src/hb-subset.cc
index 01d9b89a..77d9e81c 100644
--- a/src/hb-subset.cc
+++ b/src/hb-subset.cc
@@ -263,7 +263,8 @@ hb_subset_collect_unicodes (hb_face_t *face, hb_set_t *out)
  * Since: REPLACEME
  */
 void
-hb_subset_collect_variation_selectors (hb_face_t *face, hb_set_t *out)
+hb_subset_collect_variation_selectors (hb_face_t *face,
+				       hb_set_t *out)
 {
   /* XXX Use saved accel. */
   OT::cmap::accelerator_t cmap;
@@ -271,3 +272,25 @@ hb_subset_collect_variation_selectors (hb_face_t *face, hb_set_t *out)
   cmap.collect_variation_selectors (out);
   cmap.fini();
 }
+
+/**
+ * hb_subset_collect_variation_unicodes:
+ * @face: font face.
+ * @out: set to add Unicode characters for @variation_selector covered by @face to.
+ *
+ *
+ *
+ * Since: REPLACEME
+ */
+void
+hb_subset_collect_variation_unicodes (hb_face_t *face,
+				      hb_codepoint_t variation_selector,
+				      hb_set_t *out)
+{
+  /* XXX Use saved accel. */
+  OT::cmap::accelerator_t cmap;
+  cmap.init (face);
+  cmap.collect_variation_unicodes (variation_selector, out);
+  cmap.fini();
+}
+
diff --git a/src/hb-subset.h b/src/hb-subset.h
index 745bacf2..1e7d8f52 100644
--- a/src/hb-subset.h
+++ b/src/hb-subset.h
@@ -84,10 +84,16 @@ hb_subset (hb_face_t *source,
 /* TODO Move to hb-face.h. */
 
 HB_EXTERN void
-hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out);
+hb_subset_collect_unicodes (hb_face_t *face, hb_set_t *out);
 
 HB_EXTERN void
-hb_subset_collect_variation_selectors (hb_face_t *source, hb_set_t *out);
+hb_subset_collect_variation_selectors (hb_face_t *face,
+				       hb_set_t *out);
+
+HB_EXTERN void
+hb_subset_collect_variation_unicodes (hb_face_t *face,
+				      hb_codepoint_t variation_selector,
+				      hb_set_t *out);
 
 
 HB_END_DECLS
commit 4806b3800d77603c203e8bb2e88baeb2b3a14f05
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 15:56:07 2018 -0700

    [cmap] Add hb_subset_collect_variation_selectors()
    
    To be moved to hb-face later.

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index bf9874a3..2f115e2e 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -594,7 +594,7 @@ struct UnicodeValueRange
   }
 
   HBUINT24	startUnicodeValue;	/* First value in this range. */
-  HBUINT8		additionalCount;	/* Number of additional values in this
+  HBUINT8	additionalCount;	/* Number of additional values in this
 					 * range. */
   public:
   DEFINE_SIZE_STATIC (4);
@@ -675,6 +675,13 @@ struct CmapSubtableFormat14
     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
   }
 
+  inline void collect_variation_selectors (hb_set_t *out) const
+  {
+    unsigned int count = record.len;
+    for (unsigned int i = 0; i < count; i++)
+      out->add (record.arrayZ[i].varSelector);
+  }
+
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -977,7 +984,7 @@ struct cmap
       /* Meh. */
       if (!subtable_uvs) subtable_uvs = &Null(CmapSubtableFormat14);
 
-      this->uvs_table = subtable_uvs;
+      this->subtable_uvs = subtable_uvs;
 
       this->get_glyph_data = subtable;
       if (unlikely (symbol))
@@ -1018,7 +1025,7 @@ struct cmap
 				     hb_codepoint_t  variation_selector,
 				     hb_codepoint_t *glyph) const
     {
-      switch (this->uvs_table->get_glyph_variant (unicode,
+      switch (this->subtable_uvs->get_glyph_variant (unicode,
 						  variation_selector,
 						  glyph))
       {
@@ -1034,6 +1041,10 @@ struct cmap
     {
       subtable->collect_unicodes (out);
     }
+    inline void collect_variation_selectors (hb_set_t *out) const
+    {
+      subtable_uvs->collect_variation_selectors (out);
+    }
 
     protected:
     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
@@ -1073,12 +1084,13 @@ struct cmap
 
     private:
     const CmapSubtable *subtable;
+    const CmapSubtableFormat14 *subtable_uvs;
+
     hb_cmap_get_glyph_func_t get_glyph_func;
     const void *get_glyph_data;
 
     CmapSubtableFormat4::accelerator_t format4_accel;
 
-    const CmapSubtableFormat14 *uvs_table;
     hb_blob_t *blob;
   };
 
diff --git a/src/hb-subset.cc b/src/hb-subset.cc
index 2e991de2..01d9b89a 100644
--- a/src/hb-subset.cc
+++ b/src/hb-subset.cc
@@ -238,14 +238,36 @@ hb_subset (hb_face_t *source,
 
 /**
  * hb_subset_collect_unicodes:
- * @source: font face data to load.
- * @out: set to add the all codepoints covered by font face, source.
+ * @face: font face.
+ * @out: set to add Unicode characters covered by @face to.
+ *
+ * Since: REPLACEME
  */
 void
-hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out)
+hb_subset_collect_unicodes (hb_face_t *face, hb_set_t *out)
 {
+  /* XXX Use saved accel. */
   OT::cmap::accelerator_t cmap;
-  cmap.init (source);
+  cmap.init (face);
   cmap.collect_unicodes (out);
   cmap.fini();
 }
+
+/**
+ * hb_subset_collect_variation_selectors:
+ * @face: font face.
+ * @out: set to add Variation Selector characters covered by @face to.
+ *
+ *
+ *
+ * Since: REPLACEME
+ */
+void
+hb_subset_collect_variation_selectors (hb_face_t *face, hb_set_t *out)
+{
+  /* XXX Use saved accel. */
+  OT::cmap::accelerator_t cmap;
+  cmap.init (face);
+  cmap.collect_variation_selectors (out);
+  cmap.fini();
+}
diff --git a/src/hb-subset.h b/src/hb-subset.h
index b79b8094..745bacf2 100644
--- a/src/hb-subset.h
+++ b/src/hb-subset.h
@@ -80,10 +80,16 @@ hb_subset (hb_face_t *source,
 	   hb_subset_profile_t *profile,
            hb_subset_input_t *input);
 
-/* hb_subset_collect_unicodes */
+
+/* TODO Move to hb-face.h. */
+
 HB_EXTERN void
 hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out);
 
+HB_EXTERN void
+hb_subset_collect_variation_selectors (hb_face_t *source, hb_set_t *out);
+
+
 HB_END_DECLS
 
 #endif /* HB_SUBSET_H */
commit 3336de24790ac1a12852ac2a3e2fff4d6bb19bc7
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 15:41:25 2018 -0700

    [cmap] Remove unused code

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index a767354f..bf9874a3 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -1039,8 +1039,6 @@ struct cmap
     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
 					      hb_codepoint_t codepoint,
 					      hb_codepoint_t *glyph);
-    typedef void (*hb_cmap_collect_unicodes_func_t) (const void *obj,
-						       hb_set_t *out);
 
     template <typename Type>
     static inline bool get_glyph_from (const void *obj,
@@ -1052,14 +1050,6 @@ struct cmap
     }
 
     template <typename Type>
-    static inline void collect_unicodes_from (const void *obj,
-						hb_set_t *out)
-    {
-      const Type *typed_obj = (const Type *) obj;
-      typed_obj->collect_unicodes (out);
-    }
-
-    template <typename Type>
     static inline bool get_glyph_from_symbol (const void *obj,
 					      hb_codepoint_t codepoint,
 					      hb_codepoint_t *glyph)
commit 1becabe06c0c58aaf69a9ba641508e77a60f3451
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 15:37:56 2018 -0700

    [cmap] Use bsearch to find subtables

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 173d62e6..a767354f 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -1101,10 +1101,7 @@ struct cmap
     key.platformID.set (platform_id);
     key.encodingID.set (encoding_id);
 
-    /* Note: We can use bsearch, but since it has no performance
-     * implications, we use lsearch and as such accept fonts with
-     * unsorted subtable list. */
-    int result = encodingRecord./*bsearch*/lsearch (key);
+    int result = encodingRecord.bsearch (key);
     if (result == -1 || !encodingRecord[result].subtable)
       return nullptr;
 
diff --git a/test/shaping/data/in-house/tests/fuzzed.tests b/test/shaping/data/in-house/tests/fuzzed.tests
index 43a19334..a6ce93d0 100644
--- a/test/shaping/data/in-house/tests/fuzzed.tests
+++ b/test/shaping/data/in-house/tests/fuzzed.tests
@@ -10,7 +10,7 @@
 ../fonts/fab39d60d758cb586db5a504f218442cd1395725.ttf:--font-funcs=ot:U+0041,U+0041:[gid0=0+1000|gid0=1+1000]
 ../fonts/205edd09bd3d141cc9580f650109556cc28b22cb.ttf:--font-funcs=ot:U+0041:[gid0=0+1000]
 ../fonts/217a934cfe15c548b572c203dceb2befdf026462.ttf:--font-funcs=ot:U+0061,U+0061,U+0061:[]
-../fonts/558661aa659912f4d30ecd27bd09835171a8e2b0.ttf:--font-funcs=ot:U+FFFD,U+E0100,U+FFFD,U+E0010:[]
+../fonts/558661aa659912f4d30ecd27bd09835171a8e2b0.ttf:--font-funcs=ot:U+FFFD,U+E0100,U+FFFD,U+E0010:[gid3584=0+1000|gid1024=0+1000|gid1=0+1000|gid8=0+1000|gid3=0+1000|gid0=0+1000|gid1=0+1000|gid3584=0+1000|gid3584=2+1000|gid1024=2+1000|gid1=2+1000|gid8=2+1000|gid3=2+1000|gid0=2+1000|gid1=2+1000|gid3584=2+1000]
 ../fonts/a34a9191d9376bda419836effeef7e75c1386016.ttf:--font-funcs=ot:U+0041:[]
 ../fonts/a69118c2c2ada48ff803d9149daa54c9ebdae30e.ttf:--font-funcs=ot:U+0041:[gid0=0+1229]
 ../fonts/b6acef662e0beb8d5fcf5b61c6b0ca69537b7402.ttf:--font-funcs=ot:U+0041:[gid0=0+1000]
commit 02fe03e09a3258e07d2d6749990f6d31dd2a8525
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 15:33:05 2018 -0700

    [cmap] Factor out find_best_subtable

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 1152c8ea..173d62e6 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -928,6 +928,35 @@ struct cmap
     return result;
   }
 
+  const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
+  {
+    if (symbol) *symbol = false;
+
+    const CmapSubtable *subtable;
+
+    /* 32-bit subtables. */
+    if ((subtable = this->find_subtable (3, 10))) return subtable;
+    if ((subtable = this->find_subtable (0, 6))) return subtable;
+    if ((subtable = this->find_subtable (0, 4))) return subtable;
+
+    /* 16-bit subtables. */
+    if ((subtable = this->find_subtable (3, 1))) return subtable;
+    if ((subtable = this->find_subtable (0, 3))) return subtable;
+    if ((subtable = this->find_subtable (0, 2))) return subtable;
+    if ((subtable = this->find_subtable (0, 1))) return subtable;
+    if ((subtable = this->find_subtable (0, 0))) return subtable;
+
+    /* Symbol subtable. */
+    if ((subtable = this->find_subtable (3, 0)))
+    {
+      if (symbol) *symbol = true;
+      return subtable;
+    }
+
+    /* Meh. */
+    return &Null(CmapSubtable);
+  }
+
   struct accelerator_t
   {
     inline void init (hb_face_t *face)
@@ -935,27 +964,8 @@ struct cmap
       this->blob = hb_sanitize_context_t().reference_table<cmap> (face);
       const cmap *table = this->blob->as<cmap> ();
       const CmapSubtableFormat14 *subtable_uvs = nullptr;
-
-      subtable = nullptr;
-
-      bool symbol = false;
-      /* 32-bit subtables. */
-      if (!subtable) subtable = table->find_subtable (3, 10);
-      if (!subtable) subtable = table->find_subtable (0, 6);
-      if (!subtable) subtable = table->find_subtable (0, 4);
-      /* 16-bit subtables. */
-      if (!subtable) subtable = table->find_subtable (3, 1);
-      if (!subtable) subtable = table->find_subtable (0, 3);
-      if (!subtable) subtable = table->find_subtable (0, 2);
-      if (!subtable) subtable = table->find_subtable (0, 1);
-      if (!subtable) subtable = table->find_subtable (0, 0);
-      if (!subtable)
-      {
-	subtable = table->find_subtable (3, 0);
-	if (subtable) symbol = true;
-      }
-      /* Meh. */
-      if (!subtable) subtable = &Null(CmapSubtable);
+      bool symbol;
+      subtable = table->find_best_subtable (&symbol);
 
       /* UVS subtable. */
       if (!subtable_uvs)
commit b41c43b4e112bfa38fef35694842f242c28a7da2
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 15:25:03 2018 -0700

    Minor

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index a25d13cf..1152c8ea 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -244,14 +244,12 @@ struct CmapSubtableFormat4
       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
     }
 
-    static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
+    inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
     {
-      const accelerator_t *thiz = (const accelerator_t *) obj;
-
       /* Custom two-array bsearch. */
-      int min = 0, max = (int) thiz->segCount - 1;
-      const HBUINT16 *startCount = thiz->startCount;
-      const HBUINT16 *endCount = thiz->endCount;
+      int min = 0, max = (int) this->segCount - 1;
+      const HBUINT16 *startCount = this->startCount;
+      const HBUINT16 *endCount = this->endCount;
       unsigned int i;
       while (min <= max)
       {
@@ -270,24 +268,37 @@ struct CmapSubtableFormat4
 
     found:
       hb_codepoint_t gid;
-      unsigned int rangeOffset = thiz->idRangeOffset[i];
+      unsigned int rangeOffset = this->idRangeOffset[i];
       if (rangeOffset == 0)
-	gid = codepoint + thiz->idDelta[i];
+	gid = codepoint + this->idDelta[i];
       else
       {
 	/* Somebody has been smoking... */
-	unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
-	if (unlikely (index >= thiz->glyphIdArrayLength))
+	unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
+	if (unlikely (index >= this->glyphIdArrayLength))
 	  return false;
-	gid = thiz->glyphIdArray[index];
+	gid = this->glyphIdArray[index];
 	if (unlikely (!gid))
 	  return false;
-	gid += thiz->idDelta[i];
+	gid += this->idDelta[i];
       }
 
       *glyph = gid & 0xFFFFu;
       return *glyph != 0;
     }
+    static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
+    {
+      return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph);
+    }
+    inline void collect_unicodes (hb_set_t *out) const
+    {
+      for (unsigned int i = 0; i < this->segCount; i++)
+      {
+	/* XXX This does NOT skip over chars mapping to gid0... */
+	if (this->startCount[i] != 0xFFFFu || this->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
+	  hb_set_add_range (out, this->startCount[i], this->endCount[i]);
+      }
+    }
 
     const HBUINT16 *endCount;
     const HBUINT16 *startCount;
@@ -306,16 +317,9 @@ struct CmapSubtableFormat4
   }
   inline void collect_unicodes (hb_set_t *out) const
   {
-    unsigned int segCount = this->segCountX2 / 2;
-    const HBUINT16 *endCount = this->values;
-    const HBUINT16 *startCount = endCount + segCount + 1;
-
-    for (unsigned int i = 0; i < segCount; i++)
-    {
-      /* XXX This does NOT skip over chars mapping to gid0... */
-      if (startCount[i] != 0xFFFFu || endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
-	hb_set_add_range (out, startCount[i], endCount[i]);
-    }
+    accelerator_t accel;
+    accel.init (this);
+    accel.collect_unicodes (out);
   }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
commit 7d382fa276f44b7b163e98d434cc79f958bf87fb
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 09:35:45 2018 -0700

    [cmap] Implement collect_unicodes() for Format0/6/10

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 2f88a767..a25d13cf 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -54,6 +54,12 @@ struct CmapSubtableFormat0
     *glyph = gid;
     return *glyph != 0;
   }
+  inline void collect_unicodes (hb_set_t *out) const
+  {
+    for (unsigned int i = 0; i < 256; i++)
+      if (glyphIdArray[i])
+        out->add (i);
+  }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -306,6 +312,7 @@ struct CmapSubtableFormat4
 
     for (unsigned int i = 0; i < segCount; i++)
     {
+      /* XXX This does NOT skip over chars mapping to gid0... */
       if (startCount[i] != 0xFFFFu || endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
 	hb_set_add_range (out, startCount[i], endCount[i]);
     }
@@ -384,7 +391,7 @@ struct CmapSubtableLongGroup
   HBUINT32		startCharCode;	/* First character code in this group. */
   HBUINT32		endCharCode;	/* Last character code in this group. */
   HBUINT32		glyphID;	/* Glyph index; interpretation depends on
-				 * subtable format. */
+					 * subtable format. */
   public:
   DEFINE_SIZE_STATIC (12);
 };
@@ -401,6 +408,14 @@ struct CmapSubtableTrimmed
     *glyph = gid;
     return *glyph != 0;
   }
+  inline void collect_unicodes (hb_set_t *out) const
+  {
+    hb_codepoint_t start = startCharCode;
+    unsigned int count = glyphIdArray.len;
+    for (unsigned int i = 0; i < count; i++)
+      if (glyphIdArray[i])
+        out->add (start + i);
+  }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -694,10 +709,10 @@ struct CmapSubtable
   inline void collect_unicodes (hb_set_t *out) const
   {
     switch (u.format) {
-//    case  0: u.format0 .collect_unicodes (out); return;
+    case  0: u.format0 .collect_unicodes (out); return;
     case  4: u.format4 .collect_unicodes (out); return;
-//    case  6: u.format6 .collect_unicodes (out); return;
-//    case 10: u.format10.collect_unicodes (out); return;
+    case  6: u.format6 .collect_unicodes (out); return;
+    case 10: u.format10.collect_unicodes (out); return;
     case 12: u.format12.collect_unicodes (out); return;
     case 13: u.format13.collect_unicodes (out); return;
     case 14:
commit bd0e542525d41d9ebe51cbcab8151d65eb984b2e
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 09:33:30 2018 -0700

    [cmap] Simplify collect_unicodes()
    
    Don't use accelerator (almost).  Hooks up Format13 as well.

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 925101f7..2f88a767 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -283,17 +283,6 @@ struct CmapSubtableFormat4
       return *glyph != 0;
     }
 
-    static inline void collect_unicodes_func (const void *obj, hb_set_t *out)
-    {
-      const accelerator_t *thiz = (const accelerator_t *) obj;
-      for (unsigned int i = 0; i < thiz->segCount; i++)
-      {
-	if (thiz->startCount[i] != 0xFFFFu
-	    || thiz->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
-	  hb_set_add_range (out, thiz->startCount[i], thiz->endCount[i]);
-      }
-    }
-
     const HBUINT16 *endCount;
     const HBUINT16 *startCount;
     const HBUINT16 *idDelta;
@@ -309,6 +298,18 @@ struct CmapSubtableFormat4
     accel.init (this);
     return accel.get_glyph_func (&accel, codepoint, glyph);
   }
+  inline void collect_unicodes (hb_set_t *out) const
+  {
+    unsigned int segCount = this->segCountX2 / 2;
+    const HBUINT16 *endCount = this->values;
+    const HBUINT16 *startCount = endCount + segCount + 1;
+
+    for (unsigned int i = 0; i < segCount; i++)
+    {
+      if (startCount[i] != 0xFFFFu || endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
+	hb_set_add_range (out, startCount[i], endCount[i]);
+    }
+  }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -690,6 +691,19 @@ struct CmapSubtable
     default: return false;
     }
   }
+  inline void collect_unicodes (hb_set_t *out) const
+  {
+    switch (u.format) {
+//    case  0: u.format0 .collect_unicodes (out); return;
+    case  4: u.format4 .collect_unicodes (out); return;
+//    case  6: u.format6 .collect_unicodes (out); return;
+//    case 10: u.format10.collect_unicodes (out); return;
+    case 12: u.format12.collect_unicodes (out); return;
+    case 13: u.format13.collect_unicodes (out); return;
+    case 14:
+    default: return;
+    }
+  }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -901,9 +915,10 @@ struct cmap
     {
       this->blob = hb_sanitize_context_t().reference_table<cmap> (face);
       const cmap *table = this->blob->as<cmap> ();
-      const CmapSubtable *subtable = nullptr;
       const CmapSubtableFormat14 *subtable_uvs = nullptr;
 
+      subtable = nullptr;
+
       bool symbol = false;
       /* 32-bit subtables. */
       if (!subtable) subtable = table->find_subtable (3, 10);
@@ -939,24 +954,20 @@ struct cmap
       if (unlikely (symbol))
       {
 	this->get_glyph_func = get_glyph_from_symbol<CmapSubtable>;
-	this->collect_unicodes_func = collect_unicodes_func_nil;
       } else {
 	switch (subtable->u.format) {
 	/* Accelerate format 4 and format 12. */
 	default:
 	  this->get_glyph_func = get_glyph_from<CmapSubtable>;
-	  this->collect_unicodes_func = collect_unicodes_func_nil;
 	  break;
 	case 12:
 	  this->get_glyph_func = get_glyph_from<CmapSubtableFormat12>;
-	  this->collect_unicodes_func = collect_unicodes_from<CmapSubtableFormat12>;
 	  break;
 	case  4:
 	  {
 	    this->format4_accel.init (&subtable->u.format4);
 	    this->get_glyph_data = &this->format4_accel;
 	    this->get_glyph_func = this->format4_accel.get_glyph_func;
-	    this->collect_unicodes_func = this->format4_accel.collect_unicodes_func;
 	  }
 	  break;
 	}
@@ -992,7 +1003,7 @@ struct cmap
 
     inline void collect_unicodes (hb_set_t *out) const
     {
-      this->collect_unicodes_func (get_glyph_data, out);
+      subtable->collect_unicodes (out);
     }
 
     protected:
@@ -1002,11 +1013,6 @@ struct cmap
     typedef void (*hb_cmap_collect_unicodes_func_t) (const void *obj,
 						       hb_set_t *out);
 
-    static inline void collect_unicodes_func_nil (const void *obj, hb_set_t *out)
-    {
-      // NOOP
-    }
-
     template <typename Type>
     static inline bool get_glyph_from (const void *obj,
 				       hb_codepoint_t codepoint,
@@ -1047,9 +1053,9 @@ struct cmap
     }
 
     private:
+    const CmapSubtable *subtable;
     hb_cmap_get_glyph_func_t get_glyph_func;
     const void *get_glyph_data;
-    hb_cmap_collect_unicodes_func_t collect_unicodes_func;
 
     CmapSubtableFormat4::accelerator_t format4_accel;
 
commit d60c465627d76fcfbeb37d6b8f9382f3b84ace6e
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 08:47:06 2018 -0700

    Rename get_all_codepoints() to collect_unicodes()

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 8529c6a3..925101f7 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -283,7 +283,7 @@ struct CmapSubtableFormat4
       return *glyph != 0;
     }
 
-    static inline void get_all_codepoints_func (const void *obj, hb_set_t *out)
+    static inline void collect_unicodes_func (const void *obj, hb_set_t *out)
     {
       const accelerator_t *thiz = (const accelerator_t *) obj;
       for (unsigned int i = 0; i < thiz->segCount; i++)
@@ -436,7 +436,7 @@ struct CmapSubtableLongSegmented
     return *glyph != 0;
   }
 
-  inline void get_all_codepoints (hb_set_t *out) const
+  inline void collect_unicodes (hb_set_t *out) const
   {
     for (unsigned int i = 0; i < this->groups.len; i++) {
       hb_set_add_range (out,
@@ -939,24 +939,24 @@ struct cmap
       if (unlikely (symbol))
       {
 	this->get_glyph_func = get_glyph_from_symbol<CmapSubtable>;
-	this->get_all_codepoints_func = null_get_all_codepoints_func;
+	this->collect_unicodes_func = collect_unicodes_func_nil;
       } else {
 	switch (subtable->u.format) {
 	/* Accelerate format 4 and format 12. */
 	default:
 	  this->get_glyph_func = get_glyph_from<CmapSubtable>;
-	  this->get_all_codepoints_func = null_get_all_codepoints_func;
+	  this->collect_unicodes_func = collect_unicodes_func_nil;
 	  break;
 	case 12:
 	  this->get_glyph_func = get_glyph_from<CmapSubtableFormat12>;
-	  this->get_all_codepoints_func = get_all_codepoints_from<CmapSubtableFormat12>;
+	  this->collect_unicodes_func = collect_unicodes_from<CmapSubtableFormat12>;
 	  break;
 	case  4:
 	  {
 	    this->format4_accel.init (&subtable->u.format4);
 	    this->get_glyph_data = &this->format4_accel;
 	    this->get_glyph_func = this->format4_accel.get_glyph_func;
-	    this->get_all_codepoints_func = this->format4_accel.get_all_codepoints_func;
+	    this->collect_unicodes_func = this->format4_accel.collect_unicodes_func;
 	  }
 	  break;
 	}
@@ -990,19 +990,19 @@ struct cmap
       return get_nominal_glyph (unicode, glyph);
     }
 
-    inline void get_all_codepoints (hb_set_t *out) const
+    inline void collect_unicodes (hb_set_t *out) const
     {
-      this->get_all_codepoints_func (get_glyph_data, out);
+      this->collect_unicodes_func (get_glyph_data, out);
     }
 
     protected:
     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
 					      hb_codepoint_t codepoint,
 					      hb_codepoint_t *glyph);
-    typedef void (*hb_cmap_get_all_codepoints_func_t) (const void *obj,
+    typedef void (*hb_cmap_collect_unicodes_func_t) (const void *obj,
 						       hb_set_t *out);
 
-    static inline void null_get_all_codepoints_func (const void *obj, hb_set_t *out)
+    static inline void collect_unicodes_func_nil (const void *obj, hb_set_t *out)
     {
       // NOOP
     }
@@ -1017,11 +1017,11 @@ struct cmap
     }
 
     template <typename Type>
-    static inline void get_all_codepoints_from (const void *obj,
+    static inline void collect_unicodes_from (const void *obj,
 						hb_set_t *out)
     {
       const Type *typed_obj = (const Type *) obj;
-      typed_obj->get_all_codepoints (out);
+      typed_obj->collect_unicodes (out);
     }
 
     template <typename Type>
@@ -1049,7 +1049,7 @@ struct cmap
     private:
     hb_cmap_get_glyph_func_t get_glyph_func;
     const void *get_glyph_data;
-    hb_cmap_get_all_codepoints_func_t get_all_codepoints_func;
+    hb_cmap_collect_unicodes_func_t collect_unicodes_func;
 
     CmapSubtableFormat4::accelerator_t format4_accel;
 
diff --git a/src/hb-subset.cc b/src/hb-subset.cc
index 88f6d678..2e991de2 100644
--- a/src/hb-subset.cc
+++ b/src/hb-subset.cc
@@ -237,15 +237,15 @@ hb_subset (hb_face_t *source,
 }
 
 /**
- * hb_subset_get_all_codepoints:
+ * hb_subset_collect_unicodes:
  * @source: font face data to load.
  * @out: set to add the all codepoints covered by font face, source.
  */
 void
-hb_subset_get_all_codepoints (hb_face_t *source, hb_set_t *out)
+hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out)
 {
   OT::cmap::accelerator_t cmap;
   cmap.init (source);
-  cmap.get_all_codepoints (out);
+  cmap.collect_unicodes (out);
   cmap.fini();
 }
diff --git a/src/hb-subset.h b/src/hb-subset.h
index f6d2ae0a..b79b8094 100644
--- a/src/hb-subset.h
+++ b/src/hb-subset.h
@@ -80,9 +80,9 @@ hb_subset (hb_face_t *source,
 	   hb_subset_profile_t *profile,
            hb_subset_input_t *input);
 
-/* hb_subset_get_all_codepoints */
+/* hb_subset_collect_unicodes */
 HB_EXTERN void
-hb_subset_get_all_codepoints (hb_face_t *source, hb_set_t *out);
+hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out);
 
 HB_END_DECLS
 
diff --git a/test/api/test-subset-codepoints.c b/test/api/test-subset-codepoints.c
index 3bd1fe06..a2c40cd6 100644
--- a/test/api/test-subset-codepoints.c
+++ b/test/api/test-subset-codepoints.c
@@ -28,12 +28,12 @@
 #include "hb-subset-test.h"
 
 static void
-test_get_all_codepoints_format4 (void)
+test_collect_unicodes_format4 (void)
 {
   hb_face_t *face = hb_subset_test_open_font("fonts/Roboto-Regular.abc.format4.ttf");
   hb_set_t *codepoints = hb_set_create();
 
-  hb_subset_get_all_codepoints (face, codepoints);
+  hb_subset_collect_unicodes (face, codepoints);
 
   hb_codepoint_t cp = HB_SET_VALUE_INVALID;
   g_assert (hb_set_next (codepoints, &cp));
@@ -49,12 +49,12 @@ test_get_all_codepoints_format4 (void)
 }
 
 static void
-test_get_all_codepoints_format12 (void)
+test_collect_unicodes_format12 (void)
 {
   hb_face_t *face = hb_subset_test_open_font("fonts/Roboto-Regular.abc.format12.ttf");
   hb_set_t *codepoints = hb_set_create();
 
-  hb_subset_get_all_codepoints (face, codepoints);
+  hb_subset_collect_unicodes (face, codepoints);
 
   hb_codepoint_t cp = HB_SET_VALUE_INVALID;
   g_assert (hb_set_next (codepoints, &cp));
@@ -70,12 +70,12 @@ test_get_all_codepoints_format12 (void)
 }
 
 static void
-test_get_all_codepoints (void)
+test_collect_unicodes (void)
 {
   hb_face_t *face = hb_subset_test_open_font("fonts/Roboto-Regular.abc.ttf");
   hb_set_t *codepoints = hb_set_create();
 
-  hb_subset_get_all_codepoints (face, codepoints);
+  hb_subset_collect_unicodes (face, codepoints);
 
   hb_codepoint_t cp = HB_SET_VALUE_INVALID;
   g_assert (hb_set_next (codepoints, &cp));
@@ -95,9 +95,9 @@ main (int argc, char **argv)
 {
   hb_test_init (&argc, &argv);
 
-  hb_test_add (test_get_all_codepoints);
-  hb_test_add (test_get_all_codepoints_format4);
-  hb_test_add (test_get_all_codepoints_format12);
+  hb_test_add (test_collect_unicodes);
+  hb_test_add (test_collect_unicodes_format4);
+  hb_test_add (test_collect_unicodes_format12);
 
   return hb_test_run();
 }
diff --git a/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc b/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc
index 38f338ba..bcdafebd 100644
--- a/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc
+++ b/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc
@@ -13,7 +13,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
   hb_face_t *face = hb_face_create (blob, 0);
 
   hb_set_t *output = hb_set_create();
-  hb_subset_get_all_codepoints (face, output);
+  hb_subset_collect_unicodes (face, output);
 
   hb_set_destroy (output);
   hb_face_destroy (face);
commit 531051b8b904cf4eb6a50bacebc11c2d85e40140
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 08:44:18 2018 -0700

    [ot-font] Return char-not-found if mapped to gid 0
    
    OpenType cmap table can use gid=0 to mean "not covered" to produce
    more optimized cmap subtables.  Return false from get_nominal_glyph()
    for those.  hb-ft already does this.

diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 67a9c7dd..8529c6a3 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -52,7 +52,7 @@ struct CmapSubtableFormat0
     if (!gid)
       return false;
     *glyph = gid;
-    return true;
+    return *glyph != 0;
   }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
@@ -280,7 +280,7 @@ struct CmapSubtableFormat4
       }
 
       *glyph = gid & 0xFFFFu;
-      return true;
+      return *glyph != 0;
     }
 
     static inline void get_all_codepoints_func (const void *obj, hb_set_t *out)
@@ -398,7 +398,7 @@ struct CmapSubtableTrimmed
     if (!gid)
       return false;
     *glyph = gid;
-    return true;
+    return *glyph != 0;
   }
 
   inline bool sanitize (hb_sanitize_context_t *c) const
@@ -433,7 +433,7 @@ struct CmapSubtableLongSegmented
     if (i == -1)
       return false;
     *glyph = T::group_get_glyph (groups[i], codepoint);
-    return true;
+    return *glyph != 0;
   }
 
   inline void get_all_codepoints (hb_set_t *out) const
commit aadb2a9188f143126392c0b0f139326dcf7f9c8d
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Sat Aug 25 08:18:53 2018 -0700

    Add face-builder
    
    New API:
    +hb_face_builder_create
    +hb_face_builder_add_table

diff --git a/docs/harfbuzz-sections.txt b/docs/harfbuzz-sections.txt
index 16b66277..f98e8cc1 100644
--- a/docs/harfbuzz-sections.txt
+++ b/docs/harfbuzz-sections.txt
@@ -170,6 +170,7 @@ hb_coretext_font_get_ct_font
 <SECTION>
 <FILE>hb-face</FILE>
 hb_face_count
+hb_face_t
 hb_face_create
 hb_face_create_for_tables
 hb_face_destroy
@@ -188,7 +189,8 @@ hb_face_set_glyph_count
 hb_face_set_index
 hb_face_set_upem
 hb_face_set_user_data
-hb_face_t
+hb_face_builder_create
+hb_face_builder_add_table
 </SECTION>
 
 <SECTION>
@@ -226,6 +228,7 @@ hb_font_get_glyph
 hb_font_get_glyph_advance_for_direction
 hb_font_get_glyph_advance_func_t
 hb_font_get_glyph_advances_for_direction
+hb_font_get_glyph_advances_func_t
 hb_font_get_glyph_contour_point
 hb_font_get_glyph_contour_point_for_origin
 hb_font_get_glyph_contour_point_func_t
diff --git a/src/hb-face.cc b/src/hb-face.cc
index 49f29d3f..9cb0f265 100644
--- a/src/hb-face.cc
+++ b/src/hb-face.cc
@@ -512,3 +512,152 @@ hb_face_get_table_tags (const hb_face_t *face,
 
   return ot_face.get_table_tags (start_offset, table_count, table_tags);
 }
+
+
+/*
+ * face-builder: A face that has add_table().
+ */
+
+struct hb_face_builder_data_t
+{
+  struct table_entry_t
+  {
+    inline int cmp (const hb_tag_t *t) const
+    {
+      if (*t < tag) return -1;
+      if (*t > tag) return -1;
+      return 0;
+    }
+
+    hb_tag_t   tag;
+    hb_blob_t *blob;
+  };
+
+  hb_vector_t<table_entry_t, 32> tables;
+};
+
+static hb_face_builder_data_t *
+_hb_face_builder_data_create (void)
+{
+  hb_face_builder_data_t *data = (hb_face_builder_data_t *) calloc (1, sizeof (hb_face_builder_data_t));
+  if (unlikely (!data))
+    return nullptr;
+
+  data->tables.init ();
+
+  return data;
+}
+
+static void
+_hb_face_builder_data_destroy (void *user_data)
+{
+  hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data;
+
+  for (unsigned int i = 0; i < data->tables.len; i++)
+    hb_blob_destroy (data->tables[i].blob);
+
+  data->tables.fini ();
+
+  free (data);
+}
+
+static hb_blob_t *
+_hb_face_builder_data_reference_blob (hb_face_builder_data_t *data)
+{
+
+  unsigned int table_count = data->tables.len;
+  unsigned int face_length = table_count * 16 + 12;
+
+  for (unsigned int i = 0; i < table_count; i++)
+    face_length += hb_ceil_to_4 (hb_blob_get_length (data->tables.arrayZ[i].blob));
+
+  char *buf = (char *) malloc (face_length);
+  if (unlikely (!buf))
+    return nullptr;
+
+  hb_serialize_context_t c (buf, face_length);
+  OT::OpenTypeFontFile *f = c.start_serialize<OT::OpenTypeFontFile> ();
+
+  bool is_cff = data->tables.lsearch (HB_TAG ('C','F','F',' ')) || data->tables.lsearch (HB_TAG ('C','F','F','2'));
+  hb_tag_t sfnt_tag = is_cff ? OT::OpenTypeFontFile::CFFTag : OT::OpenTypeFontFile::TrueTypeTag;
+
+  Supplier<hb_tag_t>    tags_supplier  (&data->tables[0].tag, table_count, sizeof (data->tables[0]));
+  Supplier<hb_blob_t *> blobs_supplier (&data->tables[0].blob, table_count, sizeof (data->tables[0]));
+  bool ret = f->serialize_single (&c,
+				  sfnt_tag,
+				  tags_supplier,
+				  blobs_supplier,
+				  table_count);
+
+  c.end_serialize ();
+
+  if (unlikely (!ret))
+  {
+    free (buf);
+    return nullptr;
+  }
+
+  return hb_blob_create (buf, face_length, HB_MEMORY_MODE_WRITABLE, buf, free);
+}
+
+static hb_blob_t *
+_hb_face_builder_reference_table (hb_face_t *face, hb_tag_t tag, void *user_data)
+{
+  hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data;
+
+  if (!tag)
+    return _hb_face_builder_data_reference_blob (data);
+
+  hb_face_builder_data_t::table_entry_t *entry = data->tables.lsearch (tag);
+  if (entry)
+    return hb_blob_reference (entry->blob);
+
+  return nullptr;
+}
+
+
+/**
+ * hb_face_builder_create:
+ *
+ * Creates a #hb_face_t that can be used with hb_face_builder_add_table().
+ * After tables are added to the face, it can be compiled to a binary
+ * font file by calling hb_face_reference_blob().
+ *
+ * Return value: (transfer full) New face.
+ *
+ * Since: REPLACEME
+ **/
+hb_face_t *
+hb_face_builder_create (void)
+{
+  hb_face_builder_data_t *data = _hb_face_builder_data_create ();
+  if (unlikely (!data)) return hb_face_get_empty ();
+
+  return hb_face_create_for_tables (_hb_face_builder_reference_table,
+				    data,
+				    _hb_face_builder_data_destroy);
+}
+
+/**
+ * hb_face_builder_add_table:
+ *
+ * Add table for @tag with data provided by @blob to the face.  @face must
+ * be created using hb_face_builder_create().
+ *
+ * Since: REPLACEME
+ **/
+hb_bool_t
+hb_face_builder_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob)
+{
+  if (unlikely (face->destroy != (hb_destroy_func_t) _hb_face_builder_data_destroy))
+    return false;
+
+  hb_face_builder_data_t *data = (hb_face_builder_data_t *) face->user_data;
+  hb_face_builder_data_t::table_entry_t *entry = data->tables.push ();
+
+  entry->tag = tag;
+  entry->blob = hb_blob_reference (blob);
+
+  return true;
+}
+
diff --git a/src/hb-face.h b/src/hb-face.h
index 208092ef..2bc3e895 100644
--- a/src/hb-face.h
+++ b/src/hb-face.h
@@ -120,6 +120,20 @@ hb_face_get_table_tags (const hb_face_t *face,
 			unsigned int *table_count, /* IN/OUT */
 			hb_tag_t     *table_tags /* OUT */);
 
+
+/*
+ * Builder face.
+ */
+
+HB_EXTERN hb_face_t *
+hb_face_builder_create (void);
+
+HB_EXTERN hb_bool_t
+hb_face_builder_add_table (hb_face_t *face,
+			   hb_tag_t   tag,
+			   hb_blob_t *blob);
+
+
 HB_END_DECLS
 
 #endif /* HB_FACE_H */
diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc
index 12566827..6e556c20 100644
--- a/src/hb-subset-plan.cc
+++ b/src/hb-subset-plan.cc
@@ -154,7 +154,7 @@ hb_subset_plan_create (hb_face_t           *face,
   plan->unicodes = hb_set_create();
   plan->glyphs.init();
   plan->source = hb_face_reference (face);
-  plan->dest = hb_subset_face_create ();
+  plan->dest = hb_face_builder_create ();
   plan->codepoint_to_glyph = hb_map_create();
   plan->glyph_map = hb_map_create();
 
diff --git a/src/hb-subset-plan.hh b/src/hb-subset-plan.hh
index 7501294d..b7f14d2e 100644
--- a/src/hb-subset-plan.hh
+++ b/src/hb-subset-plan.hh
@@ -89,7 +89,7 @@ struct hb_subset_plan_t
               hb_blob_get_length (contents),
               hb_blob_get_length (source_blob));
     hb_blob_destroy (source_blob);
-    return hb_subset_face_add_table(dest, tag, contents);
+    return hb_face_builder_add_table (dest, tag, contents);
   }
 };
 
diff --git a/src/hb-subset-private.hh b/src/hb-subset-private.hh
index 6b2b207f..42c93d7d 100644
--- a/src/hb-subset-private.hh
+++ b/src/hb-subset-private.hh
@@ -34,8 +34,6 @@
 
 #include "hb-font-private.hh"
 
-typedef struct hb_subset_face_data_t hb_subset_face_data_t;
-
 struct hb_subset_input_t {
   hb_object_header_t header;
   ASSERT_POD ();
@@ -54,10 +52,5 @@ struct hb_subset_input_t {
    */
 };
 
-HB_INTERNAL hb_face_t *
-hb_subset_face_create (void);
-
-HB_INTERNAL hb_bool_t
-hb_subset_face_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob);
 
 #endif /* HB_SUBSET_PRIVATE_HH */
diff --git a/src/hb-subset.cc b/src/hb-subset.cc
index 411c6b86..88f6d678 100644
--- a/src/hb-subset.cc
+++ b/src/hb-subset.cc
@@ -96,135 +96,6 @@ _subset (hb_subset_plan_t *plan)
 }
 
 
-/*
- * A face that has add_table().
- */
-
-struct hb_subset_face_data_t
-{
-  struct table_entry_t
-  {
-    inline int cmp (const hb_tag_t *t) const
-    {
-      if (*t < tag) return -1;
-      if (*t > tag) return -1;
-      return 0;
-    }
-
-    hb_tag_t   tag;
-    hb_blob_t *blob;
-  };
-
-  hb_vector_t<table_entry_t, 32> tables;
-};
-
-static hb_subset_face_data_t *
-_hb_subset_face_data_create (void)
-{
-  hb_subset_face_data_t *data = (hb_subset_face_data_t *) calloc (1, sizeof (hb_subset_face_data_t));
-  if (unlikely (!data))
-    return nullptr;
-
-  data->tables.init ();
-
-  return data;
-}
-
-static void
-_hb_subset_face_data_destroy (void *user_data)
-{
-  hb_subset_face_data_t *data = (hb_subset_face_data_t *) user_data;
-
-  for (unsigned int i = 0; i < data->tables.len; i++)
-    hb_blob_destroy (data->tables[i].blob);
-
-  data->tables.fini ();
-
-  free (data);
-}
-
-static hb_blob_t *
-_hb_subset_face_data_reference_blob (hb_subset_face_data_t *data)
-{
-
-  unsigned int table_count = data->tables.len;
-  unsigned int face_length = table_count * 16 + 12;
-
-  for (unsigned int i = 0; i < table_count; i++)
-    face_length += hb_ceil_to_4 (hb_blob_get_length (data->tables.arrayZ[i].blob));
-
-  char *buf = (char *) malloc (face_length);
-  if (unlikely (!buf))
-    return nullptr;
-
-  hb_serialize_context_t c (buf, face_length);
-  OT::OpenTypeFontFile *f = c.start_serialize<OT::OpenTypeFontFile> ();
-
-  bool is_cff = data->tables.lsearch (HB_TAG ('C','F','F',' ')) || data->tables.lsearch (HB_TAG ('C','F','F','2'));
-  hb_tag_t sfnt_tag = is_cff ? OT::OpenTypeFontFile::CFFTag : OT::OpenTypeFontFile::TrueTypeTag;
-
-  Supplier<hb_tag_t>    tags_supplier  (&data->tables[0].tag, table_count, sizeof (data->tables[0]));
-  Supplier<hb_blob_t *> blobs_supplier (&data->tables[0].blob, table_count, sizeof (data->tables[0]));
-  bool ret = f->serialize_single (&c,
-				  sfnt_tag,
-				  tags_supplier,
-				  blobs_supplier,
-				  table_count);
-
-  c.end_serialize ();
-
-  if (unlikely (!ret))
-  {
-    free (buf);
-    return nullptr;
-  }
-
-  return hb_blob_create (buf, face_length, HB_MEMORY_MODE_WRITABLE, buf, free);
-}
-
-static hb_blob_t *
-_hb_subset_face_reference_table (hb_face_t *face, hb_tag_t tag, void *user_data)
-{
-  hb_subset_face_data_t *data = (hb_subset_face_data_t *) user_data;
-
-  if (!tag)
-    return _hb_subset_face_data_reference_blob (data);
-
-  hb_subset_face_data_t::table_entry_t *entry = data->tables.lsearch (tag);
-  if (entry)
-    return hb_blob_reference (entry->blob);
-
-  return nullptr;
-}
-
-/* TODO: Move this to hb-face.h and rename to hb_face_builder_create()
- * with hb_face_builder_add_table(). */
-hb_face_t *
-hb_subset_face_create (void)
-{
-  hb_subset_face_data_t *data = _hb_subset_face_data_create ();
-  if (unlikely (!data)) return hb_face_get_empty ();
-
-  return hb_face_create_for_tables (_hb_subset_face_reference_table,
-				    data,
-				    _hb_subset_face_data_destroy);
-}
-
-hb_bool_t
-hb_subset_face_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob)
-{
-  if (unlikely (face->destroy != (hb_destroy_func_t) _hb_subset_face_data_destroy))
-    return false;
-
-  hb_subset_face_data_t *data = (hb_subset_face_data_t *) face->user_data;
-  hb_subset_face_data_t::table_entry_t *entry = data->tables.push ();
-
-  entry->tag = tag;
-  entry->blob = hb_blob_reference (blob);
-
-  return true;
-}
-
 static bool
 _subset_table (hb_subset_plan_t *plan,
                hb_tag_t          tag)
commit 6cac9dc9ccb76945e9dfacafc169afad4a6e1e88
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 24 10:29:45 2018 -0700

    [blob] Add as_bytes()

diff --git a/src/hb-blob-private.hh b/src/hb-blob-private.hh
index 49ad68ec..0d3fad57 100644
--- a/src/hb-blob-private.hh
+++ b/src/hb-blob-private.hh
@@ -62,6 +62,10 @@ struct hb_blob_t
   {
     return unlikely (!data) ? &Null(Type) : reinterpret_cast<const Type *> (data);
   }
+  inline hb_bytes_t as_bytes (void) const
+  {
+    return hb_bytes_t (data, length);
+  }
 
   public:
   hb_object_header_t header;
commit 29a9a0883877c598413de78cd0c61f07bc393b2c
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 24 10:24:45 2018 -0700

    [sanitize] Document how sanitize machinery works

diff --git a/src/hb-machinery-private.hh b/src/hb-machinery-private.hh
index 05add1f4..b33dd996 100644
--- a/src/hb-machinery-private.hh
+++ b/src/hb-machinery-private.hh
@@ -138,6 +138,68 @@ struct hb_dispatch_context_t
 
 /*
  * Sanitize
+ *
+ *
+ * === Introduction ===
+ *
+ * The sanitize machinery is at the core of our zero-cost font loading.  We
+ * mmap() font file into memory and create a blob out of it.  Font subtables
+ * are returned as a readonly sub-blob of the main font blob.  These table
+ * blobs are then sanitized before use, to ensure invalid memory access does
+ * not happen.  The toplevel sanitize API use is like, eg. to load the 'head'
+ * table:
+ *
+ *   hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face);
+ *
+ * The blob then can be converted to a head table struct with:
+ *
+ *   const head *head_table = head_blob->as<head> ();
+ *
+ * What the reference_table does is, to call hb_face_reference_table() to load
+ * the table blob, sanitize it and return either the sanitized blob, or empty
+ * blob if sanitization failed.  The blob->as() function returns the null
+ * object of its template type argument if the blob is empty.  Otherwise, it
+ * just casts the blob contents to the desired type.
+ *
+ * Sanitizing a blob of data with a type T works as follows (with minor
+ * simplification):
+ *
+ *   - Cast blob content to T*, call sanitize() method of it,
+ *   - If sanitize succeeded, return blob.
+ *   - Otherwise, if blob is not writable, try making it writable,
+ *     or copy if cannot be made writable in-place,
+ *   - Call sanitize() again.  Return blob if sanitize succeeded.
+ *   - Return empty blob otherwise.
+ *
+ *
+ * === The sanitize() contract ===
+ *
+ * The sanitize() method of each object type shall return true if it's safe to
+ * call other methods of the object, and false otherwise.
+ *
+ * Note that what sanitize() checks for might align with what the specification
+ * describes as valid table data, but does not have to be.  In particular, we
+ * do NOT want to be pedantic and concern ourselves with validity checks that
+ * are irrelevant to our use of the table.  On the contrary, we want to be
+ * lenient with error handling and accept invalid data to the extent that it
+ * does not impose extra burden on us.
+ *
+ * Based on the sanitize contract, one can see that what we check for depends
+ * on how we use the data in other table methods.  Ie. if other table methods
+ * assume that offsets do NOT point out of the table data block, then that's
+ * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way).  On
+ * the other hand, if other methods do such checks themselves, then sanitize()
+ * does not have to bother with them (glyf/local work this way).  The choice
+ * depends on the table structure and sanitize() performance.  For example, to
+ * check glyf/loca offsets in sanitize() would cost O(num-glyphs).  We try hard
+ * to avoid such costs during font loading.  By postponing such checks to the
+ * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime
+ * cost to O(used-glyphs).  As such, this is preferred.
+ *
+ * The same argument can be made re GSUB/GPOS/GDEF, but there, the table
+ * structure is so complicated that by checking all offsets at sanitize() time,
+ * we make the code much simpler in other methods, as offsets and referenced
+ * objectes do not need to be validated at each use site.
  */
 
 /* This limits sanitizing time on really broken fonts. */
commit 142ac5a6be6088771e0ee4b135ba753c80036a9a
Author: Behdad Esfahbod <behdad at behdad.org>
Date:   Fri Aug 24 10:07:49 2018 -0700

    [serialize] Add copy_bytes() and copy_blob()

diff --git a/src/hb-dsalgs.hh b/src/hb-dsalgs.hh
index 8cbe6584..8d59c6cf 100644
--- a/src/hb-dsalgs.hh
+++ b/src/hb-dsalgs.hh
@@ -502,6 +502,9 @@ struct hb_bytes_t
 {
   inline hb_bytes_t (void) : bytes (nullptr), len (0) {}
   inline hb_bytes_t (const char *bytes_, unsigned int len_) : bytes (bytes_), len (len_) {}
+  inline hb_bytes_t (const void *bytes_, unsigned int len_) : bytes ((const char *) bytes_), len (len_) {}
+
+  inline void free (void) { ::free ((void *) bytes); bytes = nullptr; len = 0; }
 
   inline int cmp (const hb_bytes_t &a) const
   {
diff --git a/src/hb-machinery-private.hh b/src/hb-machinery-private.hh
index 99ef485a..05add1f4 100644
--- a/src/hb-machinery-private.hh
+++ b/src/hb-machinery-private.hh
@@ -402,7 +402,7 @@ struct hb_serialize_context_t
   }
 
   template <typename Type>
-  inline Type *copy (void)
+  inline Type *copy (void) const
   {
     assert (!this->ran_out_of_room);
     unsigned int len = this->head - this->start;
@@ -411,6 +411,25 @@ struct hb_serialize_context_t
       memcpy (p, this->start, len);
     return reinterpret_cast<Type *> (p);
   }
+  inline hb_bytes_t copy_bytes (void) const
+  {
+    assert (!this->ran_out_of_room);
+    unsigned int len = this->head - this->start;
+    void *p = malloc (len);
+    if (p)
+      memcpy (p, this->start, len);
+    else
+      return hb_bytes_t ();
+    return hb_bytes_t (p, len);
+  }
+  inline hb_blob_t *copy_blob (void) const
+  {
+    assert (!this->ran_out_of_room);
+    return hb_blob_create (this->start,
+			   this->head - this->start,
+			   HB_MEMORY_MODE_DUPLICATE,
+			   nullptr, nullptr);
+  }
 
   template <typename Type>
   inline Type *allocate_size (unsigned int size)


More information about the HarfBuzz mailing list