[cairo-commit] src/cairo-cff-subset.c src/cairo-pdf-surface.c src/cairo-scaled-font-subsets-private.h src/cairo-truetype-subset.c
Adrian Johnson
ajohnson at kemper.freedesktop.org
Thu Sep 15 05:36:23 PDT 2011
src/cairo-cff-subset.c | 12 -
src/cairo-pdf-surface.c | 81 ++++++++++-
src/cairo-scaled-font-subsets-private.h | 4
src/cairo-truetype-subset.c | 225 ++++++++++++++++++++++++--------
4 files changed, 253 insertions(+), 69 deletions(-)
New commits:
commit 47e16d0e565fcb579148285394bafd45b53ecaff
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Thu Sep 15 21:52:26 2011 +0930
subsetting: Support unicode fontnames
Most fonts use Window platform specific encoded font names since they
allow unicode names.
- Make _cairo_truetype_read_font_name() read the Windows platform
names first. If this fails, fallback to reading he the Mac platform
MacRoman encoded name.
- Use the PDF method of encoding non ASCII PS font names. Poppler will
correctly extract the unicode name.
- Make PDF embed the font family name as AsciiHex if the name is not ASCII.
diff --git a/src/cairo-cff-subset.c b/src/cairo-cff-subset.c
index 3ee3ef3..c011cfc 100644
--- a/src/cairo-cff-subset.c
+++ b/src/cairo-cff-subset.c
@@ -2837,13 +2837,13 @@ _cairo_cff_subset_init (cairo_cff_subset_t *cff_subset,
}
if (font->font_name) {
- cff_subset->font_name = strdup (font->font_name);
- if (cff_subset->font_name == NULL) {
+ cff_subset->family_name_utf8 = strdup (font->font_name);
+ if (cff_subset->family_name_utf8 == NULL) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail2;
}
} else {
- cff_subset->font_name = NULL;
+ cff_subset->family_name_utf8 = NULL;
}
cff_subset->widths = calloc (sizeof (double), font->scaled_font_subset->num_glyphs);
@@ -2877,7 +2877,7 @@ _cairo_cff_subset_init (cairo_cff_subset_t *cff_subset,
fail4:
free (cff_subset->widths);
fail3:
- free (cff_subset->font_name);
+ free (cff_subset->family_name_utf8);
fail2:
free (cff_subset->ps_name);
fail1:
@@ -2890,7 +2890,7 @@ void
_cairo_cff_subset_fini (cairo_cff_subset_t *subset)
{
free (subset->ps_name);
- free (subset->font_name);
+ free (subset->family_name_utf8);
free (subset->widths);
free (subset->data);
}
@@ -3249,7 +3249,7 @@ _cairo_cff_fallback_init (cairo_cff_subset_t *cff_subset,
if (unlikely (status))
goto fail2;
- cff_subset->font_name = NULL;
+ cff_subset->family_name_utf8 = NULL;
cff_subset->ps_name = strdup (font->ps_name);
if (unlikely (cff_subset->ps_name == NULL)) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
diff --git a/src/cairo-pdf-surface.c b/src/cairo-pdf-surface.c
index 7336a7d..16e4576 100644
--- a/src/cairo-pdf-surface.c
+++ b/src/cairo-pdf-surface.c
@@ -3960,6 +3960,61 @@ _cairo_pdf_surface_write_pages (cairo_pdf_surface_t *surface)
}
static cairo_status_t
+_utf8_to_pdf_string (const char *utf8, char **str_out)
+{
+ int i;
+ int len;
+ cairo_bool_t ascii;
+ char *str;
+ cairo_status_t status = CAIRO_STATUS_SUCCESS;
+
+ ascii = TRUE;
+ len = strlen (utf8);
+ for (i = 0; i < len; i++) {
+ unsigned c = utf8[i];
+ if (c < 32 || c > 126 || c == '(' || c == ')' || c == '\\') {
+ ascii = FALSE;
+ break;
+ }
+ }
+
+ if (ascii) {
+ str = malloc (len + 3);
+ if (str == NULL)
+ return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+ str[0] = '(';
+ for (i = 0; i < len; i++)
+ str[i+1] = utf8[i];
+ str[i+1] = ')';
+ str[i+2] = 0;
+ } else {
+ uint16_t *utf16 = NULL;
+ int utf16_len = 0;
+
+ status = _cairo_utf8_to_utf16 (utf8, -1, &utf16, &utf16_len);
+ if (unlikely (status))
+ return status;
+
+ str = malloc (utf16_len*4 + 7);
+ if (str == NULL) {
+ free (utf16);
+ return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+ }
+
+ strcpy (str, "<FEFF");
+ for (i = 0; i < utf16_len; i++)
+ snprintf (str + 4*i + 5, 5, "%04X", utf16[i]);
+
+ strcat (str, ">");
+ free (utf16);
+ }
+ *str_out = str;
+
+ return status;
+}
+
+static cairo_status_t
_cairo_pdf_surface_emit_unicode_for_glyph (cairo_pdf_surface_t *surface,
const char *utf8)
{
@@ -4243,10 +4298,17 @@ _cairo_pdf_surface_emit_cff_font (cairo_pdf_surface_t *surface,
tag,
subset->ps_name);
- if (subset->font_name) {
+ if (subset->family_name_utf8) {
+ char *pdf_str;
+
+ status = _utf8_to_pdf_string (subset->family_name_utf8, &pdf_str);
+ if (unlikely (status))
+ return status;
+
_cairo_output_stream_printf (surface->output,
- " /FontFamily (%s)\n",
- subset->font_name);
+ " /FontFamily %s\n",
+ pdf_str);
+ free (pdf_str);
}
_cairo_output_stream_printf (surface->output,
@@ -4681,10 +4743,17 @@ _cairo_pdf_surface_emit_truetype_font_subset (cairo_pdf_surface_t *surface,
tag,
subset.ps_name);
- if (subset.font_name) {
+ if (subset.family_name_utf8) {
+ char *pdf_str;
+
+ status = _utf8_to_pdf_string (subset.family_name_utf8, &pdf_str);
+ if (unlikely (status))
+ return status;
+
_cairo_output_stream_printf (surface->output,
- " /FontFamily (%s)\n",
- subset.font_name);
+ " /FontFamily %s\n",
+ pdf_str);
+ free (pdf_str);
}
_cairo_output_stream_printf (surface->output,
diff --git a/src/cairo-scaled-font-subsets-private.h b/src/cairo-scaled-font-subsets-private.h
index 92f0020..2edf770 100644
--- a/src/cairo-scaled-font-subsets-private.h
+++ b/src/cairo-scaled-font-subsets-private.h
@@ -345,7 +345,7 @@ cairo_private cairo_int_status_t
_cairo_scaled_font_subset_create_glyph_names (cairo_scaled_font_subset_t *subset);
typedef struct _cairo_cff_subset {
- char *font_name;
+ char *family_name_utf8;
char *ps_name;
double *widths;
double x_min, y_min, x_max, y_max;
@@ -427,7 +427,7 @@ cairo_private void
_cairo_cff_fallback_fini (cairo_cff_subset_t *cff_subset);
typedef struct _cairo_truetype_subset {
- char *font_name;
+ char *family_name_utf8;
char *ps_name;
double *widths;
double x_min, y_min, x_max, y_max;
diff --git a/src/cairo-truetype-subset.c b/src/cairo-truetype-subset.c
index d0d6ab7..1afdf3a 100644
--- a/src/cairo-truetype-subset.c
+++ b/src/cairo-truetype-subset.c
@@ -1137,13 +1137,13 @@ cairo_truetype_subset_init_internal (cairo_truetype_subset_t *truetype_subse
}
if (font->base.font_name != NULL) {
- truetype_subset->font_name = strdup (font->base.font_name);
- if (unlikely (truetype_subset->font_name == NULL)) {
+ truetype_subset->family_name_utf8 = strdup (font->base.font_name);
+ if (unlikely (truetype_subset->family_name_utf8 == NULL)) {
status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
goto fail2;
}
} else {
- truetype_subset->font_name = NULL;
+ truetype_subset->family_name_utf8 = NULL;
}
/* The widths array returned must contain only widths for the
@@ -1201,7 +1201,7 @@ cairo_truetype_subset_init_internal (cairo_truetype_subset_t *truetype_subse
fail4:
free (truetype_subset->widths);
fail3:
- free (truetype_subset->font_name);
+ free (truetype_subset->family_name_utf8);
fail2:
free (truetype_subset->ps_name);
fail1:
@@ -1228,7 +1228,7 @@ void
_cairo_truetype_subset_fini (cairo_truetype_subset_t *subset)
{
free (subset->ps_name);
- free (subset->font_name);
+ free (subset->family_name_utf8);
free (subset->widths);
free (subset->data);
free (subset->string_offsets);
@@ -1395,6 +1395,107 @@ cleanup:
return status;
}
+static cairo_status_t
+find_name (tt_name_t *name, int name_id, int platform, int encoding, int language, char **str_out)
+{
+ tt_name_record_t *record;
+ int i, len;
+ char *str;
+ char *p;
+ cairo_bool_t has_tag;
+ cairo_status_t status;
+
+ str = NULL;
+ for (i = 0; i < be16_to_cpu (name->num_records); i++) {
+ record = &(name->records[i]);
+ if (be16_to_cpu (record->name) == name_id &&
+ be16_to_cpu (record->platform) == platform &&
+ be16_to_cpu (record->encoding) == encoding &&
+ (language == -1 || be16_to_cpu (record->language) == language)) {
+
+ str = malloc (be16_to_cpu (record->length) + 1);
+ if (str == NULL)
+ return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+ len = be16_to_cpu (record->length);
+ memcpy (str,
+ ((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
+ len);
+ str[be16_to_cpu (record->length)] = 0;
+ break;
+ }
+ }
+ if (str == NULL) {
+ *str_out = NULL;
+ return CAIRO_STATUS_SUCCESS;
+ }
+
+ if (platform == 3) { /* Win platform, unicode encoding */
+ /* convert to utf8 */
+ int size = 0;
+ char *utf8;
+ uint16_t *u = (uint16_t *) str;
+ int u_len = len/2;
+
+ for (i = 0; i < u_len; i++)
+ size += _cairo_ucs4_to_utf8 (be16_to_cpu(u[i]), NULL);
+
+ utf8 = malloc (size + 1);
+ if (utf8 == NULL) {
+ status =_cairo_error (CAIRO_STATUS_NO_MEMORY);
+ goto fail;
+ }
+ p = utf8;
+ for (i = 0; i < u_len; i++)
+ p += _cairo_ucs4_to_utf8 (be16_to_cpu(u[i]), p);
+ *p = 0;
+ free (str);
+ str = utf8;
+ } else if (platform == 1) { /* Mac platform, Mac Roman encoding */
+ /* Replace characters above 127 with underscores. We could use
+ * a lookup table to convert to unicode but since most fonts
+ * include a unicode name this is just a rarely used fallback. */
+ for (i = 0; i < len; i++) {
+ if ((unsigned char)str[i] > 127)
+ str[i] = '_';
+ }
+ }
+
+ /* If font name is prefixed with a PDF subset tag, strip it off. */
+ p = str;
+ len = strlen (str);
+ has_tag = FALSE;
+ if (len > 7 && p[6] == '+') {
+ has_tag = TRUE;
+ for (i = 0; i < 6; i++) {
+ if (p[i] < 'A' || p[i] > 'Z') {
+ has_tag = FALSE;
+ break;
+ }
+ }
+ }
+ if (has_tag) {
+ p = malloc (len - 6);
+ if (unlikely (p == NULL)) {
+ status =_cairo_error (CAIRO_STATUS_NO_MEMORY);
+ goto fail;
+ }
+ memcpy (p, str + 7, len - 7);
+ p[len-7] = 0;
+ free (str);
+ str = p;
+ }
+
+ *str_out = str;
+
+ return CAIRO_STATUS_SUCCESS;
+
+ fail:
+ free (str);
+
+ return status;
+}
+
cairo_int_status_t
_cairo_truetype_read_font_name (cairo_scaled_font_t *scaled_font,
char **ps_name_out,
@@ -1403,11 +1504,9 @@ _cairo_truetype_read_font_name (cairo_scaled_font_t *scaled_font,
cairo_status_t status;
const cairo_scaled_font_backend_t *backend;
tt_name_t *name;
- tt_name_record_t *record;
unsigned long size;
- int i, j;
char *ps_name = NULL;
- char *font_name = NULL;
+ char *family_name = NULL;
backend = scaled_font->backend;
if (!backend->load_truetype_table)
@@ -1425,76 +1524,92 @@ _cairo_truetype_read_font_name (cairo_scaled_font_t *scaled_font,
if (name == NULL)
return _cairo_error (CAIRO_STATUS_NO_MEMORY);
- status = backend->load_truetype_table (scaled_font,
+ status = backend->load_truetype_table (scaled_font,
TT_TAG_name, 0,
(unsigned char *) name,
&size);
if (status)
goto fail;
- /* Extract the font name and PS name from the name table. At
- * present this just looks for the Mac platform/Roman encoded font
- * name. It should be extended to use any suitable font name in
- * the name table.
- */
- for (i = 0; i < be16_to_cpu(name->num_records); i++) {
- record = &(name->records[i]);
- if ((be16_to_cpu (record->platform) == 1) &&
- (be16_to_cpu (record->encoding) == 0)) {
-
- if (be16_to_cpu (record->name) == 4) {
- font_name = malloc (be16_to_cpu(record->length) + 1);
- if (font_name == NULL) {
- status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
- goto fail;
- }
- strncpy(font_name,
- ((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
- be16_to_cpu (record->length));
- font_name[be16_to_cpu (record->length)] = 0;
- }
+ /* Find PS Name (name_id = 6). OT spec says PS name must be one of
+ * the following two encodings */
+ status = find_name (name, 6, 3, 1, 0x409, &ps_name); /* win, unicode, english-us */
+ if (unlikely(status))
+ goto fail;
- if (be16_to_cpu (record->name) == 6) {
- ps_name = malloc (be16_to_cpu(record->length) + 1);
- if (ps_name == NULL) {
- status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
- goto fail;
- }
- strncpy(ps_name,
- ((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
- be16_to_cpu (record->length));
- ps_name[be16_to_cpu (record->length)] = 0;
- }
+ if (!ps_name) {
+ status = find_name (name, 6, 1, 0, 0, &ps_name); /* mac, roman, english */
+ if (unlikely(status))
+ goto fail;
+ }
- if (font_name && ps_name)
- break;
- }
+ /* Find Family name (name_id = 1) */
+ status = find_name (name, 1, 3, 1, 0x409, &family_name); /* win, unicode, english-us */
+ if (unlikely(status))
+ goto fail;
+
+ if (!family_name) {
+ status = find_name (name, 1, 3, 0, 0x409, &family_name); /* win, symbol, english-us */
+ if (unlikely(status))
+ goto fail;
+ }
+
+ if (!family_name) {
+ status = find_name (name, 1, 1, 0, 0, &family_name); /* mac, roman, english */
+ if (unlikely(status))
+ goto fail;
+ }
+
+ if (!family_name) {
+ status = find_name (name, 1, 3, 1, -1, &family_name); /* win, unicode, any language */
+ if (unlikely(status))
+ goto fail;
}
free (name);
- /* Ensure PS name does not contain any spaces */
+ /* Ensure PS name is a valid PDF/PS name object. In PDF names are
+ * treated as UTF8 and non ASCII bytes, ' ', and '#' are encoded
+ * as '#' followed by 2 hex digits that encode the byte. By also
+ * encoding the characters in the reserved string we ensure the
+ * name is also PS compatible. */
if (ps_name) {
- for (i = 0, j = 0; ps_name[j]; j++) {
- if (ps_name[j] == ' ')
- continue;
- ps_name[i++] = ps_name[j];
+ static const char *reserved = "()<>[]{}/%#\\";
+ char buf[128]; /* max name length is 127 bytes */
+ char *src = ps_name;
+ char *dst = buf;
+
+ while (*src && dst < buf + 127) {
+ unsigned char c = *src;
+ if (c < 0x21 || c > 0x7e || strchr (reserved, c)) {
+ if (dst + 4 > buf + 127)
+ break;
+
+ snprintf (dst, 4, "#%02X", c);
+ src++;
+ dst += 3;
+ } else {
+ *dst++ = *src++;
+ }
+ }
+ *dst = 0;
+ free (ps_name);
+ ps_name = strdup (buf);
+ if (ps_name == NULL) {
+ status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
+ goto fail;
}
- ps_name[i] = '\0';
}
*ps_name_out = ps_name;
- *font_name_out = font_name;
+ *font_name_out = family_name;
return CAIRO_STATUS_SUCCESS;
fail:
free (name);
-
free (ps_name);
-
- free (font_name);
-
+ free (family_name);
*ps_name_out = NULL;
*font_name_out = NULL;
More information about the cairo-commit
mailing list