[cairo] PDF Text Extraction: Past and Present

Adrian Johnson ajohnson at redneon.com
Sat Feb 3 07:05:41 PST 2007


Behdad Esfahbod wrote:
> To summarize, I suggest that we generate ToUnicode mappings for
> all fonts embedded in cairo's PDF output.  This should be done by
> calling into the font backends, passing in the scaled-font and an
> array of glyph indices, and get back an array of Unicode
> character codes.  It helps the backend if input glyphs are sorted
> numerically. The PDF backend then will build and add the
> ToUnicode CMap.

The attached patch
 - Generates ToUnicode mappings for all fonts
 - Adds a TrueType/OpenType reverse cmap lookup function.
 - Adds FT and Win32 font backend functions for mapping glyphs to
   unicode. These backend functions are fallbacks for when the
   reverse cmap fails (although for win32 the backend function
   only supports Type1 fonts).

Text selection works well in acroread however evince does not
correctly select TrueType fonts. This seems to be caused by
the individual glyph positioning in the content stream.

-------------- next part --------------
From 023151b448da0c682582b4fa663e2ea336014065 Mon Sep 17 00:00:00 2001
From: asj <asj at tux.lan>
Date: Sun, 4 Feb 2007 01:16:01 +1030
Subject: [PATCH] PDF: Make text selection work

---
 src/cairo-ft-font.c                     |   33 +++++++
 src/cairo-pdf-surface.c                 |  128 +++++++++++++++++++++++++--
 src/cairo-scaled-font-subsets-private.h |   29 ++++---
 src/cairo-scaled-font-subsets.c         |   15 +++-
 src/cairo-truetype-subset-private.h     |   23 +++++
 src/cairo-truetype-subset.c             |  145 +++++++++++++++++++++++++++++++
 src/cairo-win32-font.c                  |   17 ++++
 src/cairoint.h                          |   18 ++++
 8 files changed, 385 insertions(+), 23 deletions(-)

diff --git a/src/cairo-ft-font.c b/src/cairo-ft-font.c
index 57e3853..7788381 100644
--- a/src/cairo-ft-font.c
+++ b/src/cairo-ft-font.c
@@ -2013,6 +2013,38 @@ _cairo_ft_load_truetype_table (void
     return status;
 }
 
+static void
+_cairo_ft_map_glyphs_to_unicode (void	                    *abstract_font,
+                                 cairo_scaled_font_subset_t *font_subset)
+{ 
+    cairo_ft_scaled_font_t *scaled_font = abstract_font;
+    cairo_ft_unscaled_font_t *unscaled = scaled_font->unscaled;
+    FT_Face face;
+    FT_UInt glyph;
+    unsigned long charcode;
+    unsigned int i;
+    int count;
+
+    face = _cairo_ft_unscaled_font_lock_face (unscaled);
+    if (!face)
+	return;
+
+    count = font_subset->num_glyphs;
+    charcode = FT_Get_First_Char( face, &glyph);
+    while (glyph != 0 && count > 0)
+    {
+        for (i = 0; i < font_subset->num_glyphs; i++) {
+            if (font_subset->glyphs[i] == glyph) {
+                font_subset->to_unicode[i] = charcode;
+                count--;
+                break;
+            }
+        }
+        charcode = FT_Get_Next_Char(face, charcode, &glyph);
+    }
+    _cairo_ft_unscaled_font_unlock_face (unscaled);
+}
+
 const cairo_scaled_font_backend_t cairo_ft_scaled_font_backend = {
     CAIRO_FONT_TYPE_FT,
     _cairo_ft_scaled_font_create_toy,
@@ -2022,6 +2054,7 @@ const cairo_scaled_font_backend_t cairo_
     _cairo_ft_ucs4_to_index,
     NULL, 			/* show_glyphs */
     _cairo_ft_load_truetype_table,
+    _cairo_ft_map_glyphs_to_unicode,
 };
 
 /* cairo_ft_font_face_t */
diff --git a/src/cairo-pdf-surface.c b/src/cairo-pdf-surface.c
index bdfef00..cdf1d5c 100644
--- a/src/cairo-pdf-surface.c
+++ b/src/cairo-pdf-surface.c
@@ -1629,11 +1629,87 @@ _cairo_pdf_surface_write_pages (cairo_pd
 				 "endobj\r\n");
 }
 
+static cairo_pdf_resource_t
+_cairo_pdf_surface_emit_to_unicode_stream (cairo_pdf_surface_t		*surface,
+                                           cairo_scaled_font_subset_t	*font_subset)
+{
+    const cairo_scaled_font_backend_t *backend;
+    cairo_pdf_resource_t stream;
+    unsigned int i;
+
+    if (font_subset->to_unicode == NULL) {
+        stream.id = 0;
+        return stream;
+    }
+    
+    if (_cairo_truetype_create_glyph_to_unicode_map (font_subset) != CAIRO_STATUS_SUCCESS) {
+        backend = font_subset->scaled_font->backend;
+        if (backend->map_glyphs_to_unicode == NULL) {
+            stream.id = 0;
+            return stream;
+        }
+        backend->map_glyphs_to_unicode (font_subset->scaled_font, font_subset);
+    }
+        
+    stream = _cairo_pdf_surface_open_stream (surface, FALSE, NULL);
+    _cairo_output_stream_printf (surface->output,
+                                 "/CIDInit /ProcSet findresource begin\r\n"
+                                 "12 dict begin\r\n"
+                                 "begincmap\r\n"
+                                 "/CIDSystemInfo\r\n"
+                                 "<< /Registry (Adobe)\r\n"
+                                 "   /Ordering (UCS)\r\n"
+                                 "   /Supplement 0\r\n"
+                                 ">> def\r\n"
+                                 "/CMapName /Adobe-Identity-UCS def\r\n"
+                                 "/CMapType 2 def\r\n"
+                                 "1 begincodespacerange\r\n"
+                                 "<00> <ff>\r\n"
+                                 "endcodespacerange\r\n",
+                                 stream.id);
+
+    /* The CMap specification has a limit of 100 characters per beginbfchar operator */
+    _cairo_output_stream_printf (surface->output,
+                                 "%d beginbfchar\r\n",
+                                 font_subset->num_glyphs > 100 ? 100 : font_subset->num_glyphs);
+    for (i = 0; i < font_subset->num_glyphs; i++) {
+        if (i != 0 && i % 100 == 0) {
+            _cairo_output_stream_printf (surface->output,
+                                         "endbfchar\r\n"
+                                         "%d beginbfchar\r\n",
+                                         font_subset->num_glyphs - i > 100 ? 100 : font_subset->num_glyphs - i);
+        }
+        _cairo_output_stream_printf (surface->output,
+                                     "<%02x> <%04x>\r\n",
+                                     i, font_subset->to_unicode[i]);
+    }
+    _cairo_output_stream_printf (surface->output,
+                                 "endbfchar\r\n");
+
+    if (font_subset->num_glyphs < 256) {
+        _cairo_output_stream_printf (surface->output,
+                                     "1 beginnotdefrange\r\n"
+                                     "<%02x> <ff> 0\r\n"
+                                     "endnotdefrange\r\n",
+                                     font_subset->num_glyphs);
+    }
+                                 
+    _cairo_output_stream_printf (surface->output,
+                                "endcmap\r\n"
+                                "CMapName currentdict /CMap defineresource pop\r\n"
+                                "end\r\n"
+                                 "end\r\n");
+
+    _cairo_pdf_surface_close_stream (surface);
+    
+    return stream;
+}
+
 static cairo_status_t
 _cairo_pdf_surface_emit_cff_font_subset (cairo_pdf_surface_t		*surface,
                                          cairo_scaled_font_subset_t	*font_subset)
 {
-    cairo_pdf_resource_t stream, descriptor, subset_resource;
+    cairo_pdf_resource_t stream, descriptor, subset_resource, to_unicode_stream;
     cairo_status_t status;
     cairo_pdf_font_t font;
     cairo_cff_subset_t subset;
@@ -1671,6 +1747,8 @@ _cairo_pdf_surface_emit_cff_font_subset
 				 "endobj\r\n");
     free (compressed);
 
+    to_unicode_stream = _cairo_pdf_surface_emit_to_unicode_stream (surface, font_subset);
+
     descriptor = _cairo_pdf_surface_new_object (surface);
     _cairo_output_stream_printf (surface->output,
 				 "%d 0 obj\r\n"
@@ -1718,7 +1796,14 @@ _cairo_pdf_surface_emit_cff_font_subset
 				     subset.widths[i]);
 
     _cairo_output_stream_printf (surface->output,
-				 " ]\r\n"
+				 " ]\r\n");
+
+    if (to_unicode_stream.id != 0)
+        _cairo_output_stream_printf (surface->output,
+                                     "    /ToUnicode %d 0 R\r\n",
+                                     to_unicode_stream.id);
+    
+    _cairo_output_stream_printf (surface->output,
 				 ">>\r\n"
 				 "endobj\r\n");
 
@@ -1737,7 +1822,7 @@ _cairo_pdf_surface_emit_type1_font (cair
                                     cairo_scaled_font_subset_t	*font_subset,
                                     cairo_type1_subset_t        *subset)
 {
-    cairo_pdf_resource_t stream, descriptor, subset_resource;
+    cairo_pdf_resource_t stream, descriptor, subset_resource, to_unicode_stream;
     cairo_pdf_font_t font;
     unsigned long length, compressed_length;
     char *compressed;
@@ -1771,6 +1856,8 @@ _cairo_pdf_surface_emit_type1_font (cair
 				 "endobj\r\n");
     free (compressed);
 
+    to_unicode_stream = _cairo_pdf_surface_emit_to_unicode_stream (surface, font_subset);
+
     descriptor = _cairo_pdf_surface_new_object (surface);
     _cairo_output_stream_printf (surface->output,
 				 "%d 0 obj\r\n"
@@ -1818,7 +1905,14 @@ _cairo_pdf_surface_emit_type1_font (cair
 				     subset->widths[i]);
 
     _cairo_output_stream_printf (surface->output,
-				 " ]\r\n"
+				 " ]\r\n");
+
+    if (to_unicode_stream.id != 0)
+        _cairo_output_stream_printf (surface->output,
+                                     "    /ToUnicode %d 0 R\r\n",
+                                     to_unicode_stream.id);
+    
+    _cairo_output_stream_printf (surface->output,
 				 ">>\r\n"
 				 "endobj\r\n");
 
@@ -1876,7 +1970,7 @@ static cairo_status_t
 _cairo_pdf_surface_emit_truetype_font_subset (cairo_pdf_surface_t		*surface,
 					      cairo_scaled_font_subset_t	*font_subset)
 {
-    cairo_pdf_resource_t stream, descriptor, subset_resource;
+    cairo_pdf_resource_t stream, descriptor, subset_resource, to_unicode_stream;
     cairo_status_t status;
     cairo_pdf_font_t font;
     cairo_truetype_subset_t subset;
@@ -1887,7 +1981,7 @@ _cairo_pdf_surface_emit_truetype_font_su
     status = _cairo_truetype_subset_init (&subset, font_subset);
     if (status)
 	return status;
-
+   
     compressed = compress_dup (subset.data, subset.data_length,
 			       &compressed_length);
     if (compressed == NULL) {
@@ -1913,11 +2007,13 @@ _cairo_pdf_surface_emit_truetype_font_su
 				 "endobj\r\n");
     free (compressed);
 
+    to_unicode_stream = _cairo_pdf_surface_emit_to_unicode_stream (surface, font_subset);
+
     descriptor = _cairo_pdf_surface_new_object (surface);
     _cairo_output_stream_printf (surface->output,
 				 "%d 0 obj\r\n"
 				 "<< /Type /FontDescriptor\r\n"
-				 "   /FontName /7%s\r\n"
+				 "   /FontName /%s\r\n"
 				 "   /Flags 4\r\n"
 				 "   /FontBBox [ %ld %ld %ld %ld ]\r\n"
 				 "   /ItalicAngle 0\r\n"
@@ -1960,7 +2056,14 @@ _cairo_pdf_surface_emit_truetype_font_su
 				     subset.widths[i]);
 
     _cairo_output_stream_printf (surface->output,
-				 " ]\r\n"
+				 " ]\r\n");
+
+    if (to_unicode_stream.id != 0)
+        _cairo_output_stream_printf (surface->output,
+                                     "    /ToUnicode %d 0 R\r\n",
+                                     to_unicode_stream.id);
+    
+    _cairo_output_stream_printf (surface->output,
 				 ">>\r\n"
 				 "endobj\r\n");
 
@@ -2118,7 +2221,7 @@ static cairo_status_t
 _cairo_pdf_surface_emit_type3_font_subset (cairo_pdf_surface_t		*surface,
 					   cairo_scaled_font_subset_t	*font_subset)
 {
-    cairo_pdf_resource_t *glyphs, encoding, char_procs, subset_resource;
+    cairo_pdf_resource_t *glyphs, encoding, char_procs, subset_resource, to_unicode_stream;
     cairo_pdf_font_t font;
     cairo_matrix_t matrix;
     unsigned int i;
@@ -2163,6 +2266,8 @@ _cairo_pdf_surface_emit_type3_font_subse
 
     free (glyphs);
 
+    to_unicode_stream = _cairo_pdf_surface_emit_to_unicode_stream (surface, font_subset);
+
     subset_resource = _cairo_pdf_surface_new_object (surface);
     matrix = font_subset->scaled_font->scale;
     cairo_matrix_invert (&matrix);
@@ -2192,6 +2297,11 @@ _cairo_pdf_surface_emit_type3_font_subse
     _cairo_output_stream_printf (surface->output,
 				 "]\r\n");
 
+    if (to_unicode_stream.id != 0)
+        _cairo_output_stream_printf (surface->output,
+                                     "    /ToUnicode %d 0 R\r\n",
+                                     to_unicode_stream.id);
+    
     _cairo_output_stream_printf (surface->output,
 				 ">>\r\n"
 				 "endobj\r\n");
diff --git a/src/cairo-scaled-font-subsets-private.h b/src/cairo-scaled-font-subsets-private.h
index 2c81fdc..91148b3 100644
--- a/src/cairo-scaled-font-subsets-private.h
+++ b/src/cairo-scaled-font-subsets-private.h
@@ -41,18 +41,6 @@
 
 typedef struct _cairo_scaled_font_subsets cairo_scaled_font_subsets_t;
 
-typedef struct _cairo_scaled_font_subset {
-    cairo_scaled_font_t *scaled_font;
-    unsigned int font_id;
-    unsigned int subset_id;
-
-    /* Index of glyphs array is subset_glyph_index.
-     * Value of glyphs array is scaled_font_glyph_index.
-     */
-    unsigned long *glyphs;
-    unsigned int num_glyphs;
-} cairo_scaled_font_subset_t;
-
 /**
  * _cairo_scaled_font_subsets_create:
  * @max_glyphs_per_subset: the maximum number of glyphs that should
@@ -361,4 +349,21 @@ _cairo_type1_fallback_init_hex (cairo_ty
 cairo_private void
 _cairo_type1_fallback_fini (cairo_type1_subset_t *subset);
 
+/**
+ * _cairo_truetype_create_glyph_to_unicode_map:
+ * @font_subset: the #cairo_scaled_font_subset_t to initialize from
+ *
+ * If possible (depending on the format of the underlying
+ * cairo_scaled_font_t and the font backend in use) assign
+ * the unicode character of each glyph in font_subset to
+ * fontsubset->to_unicode.
+ *
+ * Return value: CAIRO_STATUS_SUCCESS if successful,
+ * CAIRO_INT_STATUS_UNSUPPORTED if the unicode encoding of
+ * the glyphs is not available.  Possible  errors include
+ * CAIRO_STATUS_NO_MEMORY.
+ **/
+cairo_private cairo_int_status_t
+_cairo_truetype_create_glyph_to_unicode_map (cairo_scaled_font_subset_t	*font_subset);
+
 #endif /* CAIRO_SCALED_FONT_SUBSETS_PRIVATE_H */
diff --git a/src/cairo-scaled-font-subsets.c b/src/cairo-scaled-font-subsets.c
index 0c428e7..68a662a 100644
--- a/src/cairo-scaled-font-subsets.c
+++ b/src/cairo-scaled-font-subsets.c
@@ -273,6 +273,7 @@ _cairo_sub_font_collect (void *entry, vo
     cairo_sub_font_collection_t *collection = closure;
     cairo_scaled_font_subset_t subset;
     int i;
+    unsigned int j;
 
     for (i = 0; i <= sub_font->current_subset; i++) {
 	collection->subset_id = i;
@@ -291,9 +292,19 @@ _cairo_sub_font_collect (void *entry, vo
 	subset.subset_id = i;
 	subset.glyphs = collection->glyphs;
 	subset.num_glyphs = collection->num_glyphs;
-
-	(collection->font_subset_callback) (&subset,
+        /* No need to check for out of memory here. If to_unicode is NULL, the PDF
+         * surface does not emit an ToUnicode stream */
+        subset.to_unicode = malloc (collection->num_glyphs*sizeof(unsigned long));
+        for (j = 0; j < collection->num_glyphs; j++) {
+            /* default unicode character required when mapping fails */
+            subset.to_unicode[j] = 0xfffd;
+        }
+
+        (collection->font_subset_callback) (&subset,
 					    collection->font_subset_callback_closure);
+
+        if (subset.to_unicode != NULL)
+            free (subset.to_unicode);
     }
 }
 
diff --git a/src/cairo-truetype-subset-private.h b/src/cairo-truetype-subset-private.h
index e9b2c47..80baa28 100644
--- a/src/cairo-truetype-subset-private.h
+++ b/src/cairo-truetype-subset-private.h
@@ -65,6 +65,29 @@
 #define TT_TAG_prep   MAKE_TT_TAG('p','r','e','p')
 
 /* All tt_* structs are big-endian */
+typedef struct _tt_cmap_index {
+    uint16_t platform;
+    uint16_t encoding;
+    uint32_t offset;
+} tt_cmap_index_t;
+
+typedef struct _tt_cmap {
+    uint16_t        version;
+    uint16_t        num_tables;
+    tt_cmap_index_t index[];
+} tt_cmap_t;
+
+typedef struct _segment_map {
+    uint16_t format;
+    uint16_t length;
+    uint16_t version;
+    uint16_t segCountX2;
+    uint16_t searchRange;
+    uint16_t entrySelector;
+    uint16_t rangeShift;
+    uint16_t endCount[];
+} tt_segment_map_t;
+
 typedef struct _tt_head {
     int16_t     version_1;
     int16_t     version_2;
diff --git a/src/cairo-truetype-subset.c b/src/cairo-truetype-subset.c
index 86509df..20c1cb1 100644
--- a/src/cairo-truetype-subset.c
+++ b/src/cairo-truetype-subset.c
@@ -114,6 +114,8 @@ be32_to_cpu(uint32_t v)
 
 #endif
 
+
+
 static cairo_status_t
 _cairo_truetype_font_create (cairo_scaled_font_subset_t  *scaled_font_subset,
 			     cairo_truetype_font_t      **font_return)
@@ -880,3 +882,146 @@ _cairo_truetype_subset_fini (cairo_truet
     free (subset->string_offsets);
 }
 
+static cairo_int_status_t
+_cairo_truetype_map_glyphs_to_unicode (cairo_scaled_font_subset_t *font_subset,
+                                       unsigned long               table_offset)
+{
+    cairo_status_t status = CAIRO_INT_STATUS_UNSUPPORTED;
+    const cairo_scaled_font_backend_t *backend;
+    tt_segment_map_t *map;
+    char buf[4];
+    unsigned int num_segments, i, j, k;
+    unsigned long size;
+    uint16_t *start_code;
+    uint16_t *end_code;
+    uint16_t *delta;
+    uint16_t *range_offset;
+    uint16_t *glyph_array;
+    uint16_t  g_id, c;
+    
+    backend = font_subset->scaled_font->backend;
+    size = 4;
+    if (backend->load_truetype_table (font_subset->scaled_font,
+                                      TT_TAG_cmap, table_offset,
+                                      (unsigned char *) &buf,
+                                      &size) != CAIRO_STATUS_SUCCESS) {
+	return CAIRO_INT_STATUS_UNSUPPORTED;
+    }
+    
+    /* All table formats have the same first two words */
+    map = (tt_segment_map_t *) buf;
+    if (be16_to_cpu (map->format) != 4)
+	return CAIRO_INT_STATUS_UNSUPPORTED;
+
+    size = be16_to_cpu (map->length);
+    map = malloc (size);
+    if (map == NULL)
+	return CAIRO_STATUS_NO_MEMORY;
+    if (backend->load_truetype_table (font_subset->scaled_font,
+                                      TT_TAG_cmap, table_offset,
+                                      (unsigned char *) map,
+                                      &size) != CAIRO_STATUS_SUCCESS) {
+	goto fail;
+    }
+
+    num_segments = be16_to_cpu (map->segCountX2)/2;
+    end_code = map->endCount;
+    start_code = &(end_code[num_segments+1]);
+    delta = &(start_code[num_segments]);
+    range_offset = &(delta[num_segments]);
+    glyph_array = &(range_offset[num_segments]);
+
+    i = 0;
+    while (i < font_subset->num_glyphs) {
+        g_id = (uint16_t) font_subset->glyphs[i];
+
+        /* search for glyph in segments
+         * with rangeOffset=0 */
+        for (j = 0; j < num_segments; j++) {
+            c = g_id - be16_to_cpu (delta[j]);
+            if (range_offset[j] == 0 &&
+                c >= be16_to_cpu (start_code[j]) &&
+                c <= be16_to_cpu (end_code[j]))
+            {
+                font_subset->to_unicode[i] = c;
+                goto next_glyph;
+            }
+        }
+        
+        /* search for glyph in segments with rangeOffset=1 */
+        for (j = 0; j < num_segments; j++) {
+            if (range_offset[j] != 0) {
+                for (k = be16_to_cpu (start_code[j]);
+                     k <= be16_to_cpu (end_code[j]); k++)
+                {
+                    uint16_t id;
+
+                    id = *(be16_to_cpu (range_offset[j])/2 +
+                           (k - be16_to_cpu (start_code[j])) + &range_offset[j]);
+                    if (id == g_id) {
+                        font_subset->to_unicode[i] = id;
+                        goto next_glyph;
+                    }
+                }
+            }
+        }
+
+    next_glyph:
+        i++;
+    }
+    status = CAIRO_STATUS_SUCCESS;
+fail:
+    free (map);
+
+    return status;
+}
+
+cairo_int_status_t
+_cairo_truetype_create_glyph_to_unicode_map (cairo_scaled_font_subset_t	*font_subset)
+{
+    cairo_status_t status = CAIRO_INT_STATUS_UNSUPPORTED;
+    const cairo_scaled_font_backend_t *backend;
+    tt_cmap_t *cmap;
+    char buf[4];
+    int num_tables, i;
+    unsigned long size;
+
+    backend = font_subset->scaled_font->backend;
+    if (!backend->load_truetype_table)
+	return CAIRO_INT_STATUS_UNSUPPORTED;
+
+    size = 4;
+    if (backend->load_truetype_table (font_subset->scaled_font,
+                                      TT_TAG_cmap, 0, (unsigned char *) &buf,
+                                      &size) != CAIRO_STATUS_SUCCESS)
+	return CAIRO_INT_STATUS_UNSUPPORTED;
+
+    cmap = (tt_cmap_t *) buf;
+    num_tables = be16_to_cpu (cmap->num_tables);
+    size = 4 + num_tables*sizeof(tt_cmap_index_t);
+    cmap = malloc (size);
+    if (cmap == NULL)
+	return CAIRO_STATUS_NO_MEMORY;
+    if (backend->load_truetype_table (font_subset->scaled_font,
+                                      TT_TAG_cmap, 0, (unsigned char *) cmap,
+                                      &size) != CAIRO_STATUS_SUCCESS) {
+	status = CAIRO_INT_STATUS_UNSUPPORTED;
+        goto cleanup;
+    }
+                   
+    /* Find a table with Unicode mapping */
+    for (i = 0; i < num_tables; i++) {
+        if (be16_to_cpu (cmap->index[i].platform) == 3 &&
+            be16_to_cpu (cmap->index[i].encoding) == 1) {
+            status = _cairo_truetype_map_glyphs_to_unicode (font_subset,
+                                                            be32_to_cpu (cmap->index[i].offset));
+            if (status != CAIRO_INT_STATUS_UNSUPPORTED)
+                goto cleanup;
+        }
+    }
+
+cleanup:
+    free (cmap);
+    
+    return status;
+}
diff --git a/src/cairo-win32-font.c b/src/cairo-win32-font.c
index 139a73b..8e8f9c5 100644
--- a/src/cairo-win32-font.c
+++ b/src/cairo-win32-font.c
@@ -1281,6 +1281,22 @@ _cairo_win32_scaled_font_load_truetype_t
     return status;
 }
 
+static cairo_int_status_t
+_cairo_win32_scaled_font_map_glyphs_to_unicode (void *abstract_font,
+                                                      cairo_scaled_font_subset_t *font_subset)
+{
+    cairo_win32_scaled_font_t *scaled_font = abstract_font;
+    unsigned int i;
+    
+    if (scaled_font->glyph_indexing)
+        return CAIRO_INT_STATUS_UNSUPPORTED;
+
+    for (i = 0; i < font_subset->num_glyphs; i++)
+        font_subset->to_unicode[i] = font_subset->glyphs[i];
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
 static void
 _cairo_win32_transform_FIXED_to_fixed (cairo_matrix_t *matrix,
                                        FIXED Fx, FIXED Fy,
@@ -1467,6 +1483,7 @@ const cairo_scaled_font_backend_t cairo_
     NULL,			/* ucs4_to_index */
     _cairo_win32_scaled_font_show_glyphs,
     _cairo_win32_scaled_font_load_truetype_table,
+    _cairo_win32_scaled_font_map_glyphs_to_unicode,
 };
 
 /* cairo_win32_font_face_t */
diff --git a/src/cairoint.h b/src/cairoint.h
index 060b988..669e454 100755
--- a/src/cairoint.h
+++ b/src/cairoint.h
@@ -598,6 +598,19 @@ typedef enum _cairo_scaled_glyph_info {
     CAIRO_SCALED_GLYPH_INFO_PATH	= (1 << 2)
 } cairo_scaled_glyph_info_t;
 
+typedef struct _cairo_scaled_font_subset {
+    cairo_scaled_font_t *scaled_font;
+    unsigned int font_id;
+    unsigned int subset_id;
+
+    /* Index of glyphs array is subset_glyph_index.
+     * Value of glyphs array is scaled_font_glyph_index.
+     */
+    unsigned long *glyphs;
+    unsigned long *to_unicode;
+    unsigned int num_glyphs;
+} cairo_scaled_font_subset_t;
+
 struct _cairo_scaled_font_backend {
     cairo_font_type_t type;
 
@@ -651,6 +664,11 @@ struct _cairo_scaled_font_backend {
                            long                  offset,
                            unsigned char        *buffer,
                            unsigned long        *length);
+
+    void
+    (*map_glyphs_to_unicode)(void                       *scaled_font,
+                                   cairo_scaled_font_subset_t *font_subset);
+
 };
 
 struct _cairo_font_face_backend {
-- 
1.4.3.4


More information about the cairo mailing list