[poppler] Re: [Patch] Info about document fonts
Marco Pesenti Gritti
mpgritti at gmail.com
Sat Jun 11 07:09:03 PDT 2005
Grrr the patch :)
On 6/11/05, Marco Pesenti Gritti <mpgritti at gmail.com> wrote:
> Attached patch implements a scan api, as discussed yesterday on irc,
> which can scan the document for fonts incrementally. I used it in
> evince and it works pretty well.
>
> I'm not that sure about the _scan api yet:
>
> gboolean
> poppler_font_info_scan (PopplerFontInfo *font_info,
> int n_pages,
> PopplerFontsIter **iter)
>
> *iter contains an iterator if fonts was found otherwise NULL, the
> return value is FALSE when all the document pages have been scanned.
>
> I'm not yet 100% sure about the ideal UI. In evince I'm not adding
> fonts incrementally as they are found atm. Instead I'm showing a
> "Loading..." message until all fonts has been scanned. The reason is
> that I'm not sure what sort of feedback to give if we incrementally
> add items (how do you know scanning is still in progress?) and if that
> is actually necessary anyway.
>
> Anyway, I guess this could be a decent start, we could improve api and
> UI later...
>
> Marco
>
-------------- next part --------------
? dump.txt
? slice.png
? glib/dump.txt
? glib/poppler-enums.c
? glib/poppler-enums.h
? glib/slice.png
? poppler/FontInfo.cc
? poppler/FontInfo.h
Index: glib/poppler-document.cc
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-document.cc,v
retrieving revision 1.17
diff -u -r1.17 poppler-document.cc
--- glib/poppler-document.cc 29 May 2005 14:59:34 -0000 1.17
+++ glib/poppler-document.cc 11 Jun 2005 13:42:52 -0000
@@ -26,6 +26,7 @@
#include <GfxState.h>
#include <SplashOutputDev.h>
#include <Stream.h>
+#include <FontInfo.h>
#include "poppler.h"
#include "poppler-private.h"
@@ -745,6 +746,136 @@
}
+struct _PopplerFontsIter
+{
+ GooList *items;
+ int index;
+};
+
+GType
+poppler_fonts_iter_get_type (void)
+{
+ static GType our_type = 0;
+
+ if (our_type == 0)
+ our_type = g_boxed_type_register_static ("PopplerFontsIter",
+ (GBoxedCopyFunc) poppler_fonts_iter_copy,
+ (GBoxedFreeFunc) poppler_fonts_iter_free);
+
+ return our_type;
+}
+
+const char *
+poppler_fonts_iter_get_name (PopplerFontsIter *iter)
+{
+ FontInfo *info;
+
+ info = (FontInfo *)iter->items->get (iter->index);
+
+ return info->getName()->getCString();
+}
+
+gboolean
+poppler_fonts_iter_next (PopplerFontsIter *iter)
+{
+ g_return_val_if_fail (iter != NULL, FALSE);
+
+ iter->index++;
+ if (iter->index >= iter->items->getLength())
+ return FALSE;
+
+ return TRUE;
+}
+
+PopplerFontsIter *
+poppler_fonts_iter_copy (PopplerFontsIter *iter)
+{
+ PopplerFontsIter *new_iter;
+
+ g_return_val_if_fail (iter != NULL, NULL);
+
+ new_iter = g_new0 (PopplerFontsIter, 1);
+ *new_iter = *iter;
+
+ new_iter->items = new GooList ();
+ for (int i = 0; i < iter->items->getLength(); i++) {
+ FontInfo *info = (FontInfo *)iter->items->get(i);
+ new_iter->items->append (new FontInfo (*info));
+ }
+
+ return new_iter;
+}
+
+void
+poppler_fonts_iter_free (PopplerFontsIter *iter)
+{
+ if (iter == NULL)
+ return;
+
+ deleteGooList (iter->items, FontInfo);
+
+ g_free (iter);
+}
+
+static PopplerFontsIter *
+poppler_fonts_iter_new (GooList *items)
+{
+ PopplerFontsIter *iter;
+
+ iter = g_new0 (PopplerFontsIter, 1);
+ iter->items = items;
+ iter->index = 0;
+
+ return iter;
+}
+
+PopplerFontInfo *
+poppler_font_info_new (PopplerDocument *document)
+{
+ PopplerFontInfo *font_info;
+
+ g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL);
+
+ font_info = g_new0 (PopplerFontInfo, 1);
+ font_info->document = (PopplerDocument *) g_object_ref (document);
+ font_info->scanner = new FontInfoScanner(document->doc);
+
+ return font_info;
+}
+
+gboolean
+poppler_font_info_scan (PopplerFontInfo *font_info,
+ int n_pages,
+ PopplerFontsIter **iter)
+{
+ GooList *items;
+
+ g_return_val_if_fail (iter != NULL, FALSE);
+
+ items = font_info->scanner->scan(n_pages);
+
+ if (items == NULL) {
+ *iter = NULL;
+ } else if (items->getLength() == 0) {
+ *iter = NULL;
+ delete items;
+ } else {
+ *iter = poppler_fonts_iter_new(items);
+ }
+
+ return (items != NULL);
+}
+
+void
+poppler_font_info_free (PopplerFontInfo *font_info)
+{
+ g_return_if_fail (font_info != NULL);
+
+ delete font_info->scanner;
+
+ g_object_unref (font_info->document);
+}
+
/**
* poppler_ps_file_new:
* @document: a #PopplerDocument
Index: glib/poppler-document.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-document.h,v
retrieving revision 1.11
diff -u -r1.11 poppler-document.h
--- glib/poppler-document.h 23 May 2005 04:23:53 -0000 1.11
+++ glib/poppler-document.h 11 Jun 2005 13:42:53 -0000
@@ -103,6 +103,19 @@
PopplerAction *poppler_index_iter_get_action (PopplerIndexIter *iter);
gboolean poppler_index_iter_next (PopplerIndexIter *iter);
+/* Interface for getting the Fonts of a poppler_document */
+PopplerFontInfo *poppler_font_info_new (PopplerDocument *document);
+gboolean poppler_font_info_scan (PopplerFontInfo *font_info,
+ int n_pages,
+ PopplerFontsIter **iter);
+void poppler_font_info_free (PopplerFontInfo *font_info);
+
+GType poppler_fonts_iter_get_type (void) G_GNUC_CONST;
+PopplerFontsIter *poppler_fonts_iter_copy (PopplerFontsIter *iter);
+void poppler_fonts_iter_free (PopplerFontsIter *iter);
+const char *poppler_fonts_iter_get_name (PopplerFontsIter *iter);
+gboolean poppler_fonts_iter_next (PopplerFontsIter *iter);
+
/* Export to ps */
PopplerPSFile *poppler_ps_file_new (PopplerDocument *document,
const char *filename,
Index: glib/poppler-private.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-private.h,v
retrieving revision 1.6
diff -u -r1.6 poppler-private.h
--- glib/poppler-private.h 21 Apr 2005 05:20:25 -0000 1.6
+++ glib/poppler-private.h 11 Jun 2005 13:42:53 -0000
@@ -5,6 +5,7 @@
#include <PDFDoc.h>
#include <PSOutputDev.h>
#include <Link.h>
+#include <FontInfo.h>
#if defined (HAVE_CAIRO)
#include <CairoOutputDevImage.h>
@@ -30,6 +31,12 @@
PSOutputDev *out;
};
+struct _PopplerFontInfo
+{
+ PopplerDocument *document;
+ FontInfoScanner *scanner;
+};
+
struct _PopplerPage
{
GObject parent_instance;
Index: glib/poppler.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler.h,v
retrieving revision 1.7
diff -u -r1.7 poppler.h
--- glib/poppler.h 11 May 2005 20:01:43 -0000 1.7
+++ glib/poppler.h 11 Jun 2005 13:42:54 -0000
@@ -45,9 +45,11 @@
typedef struct _PopplerDocument PopplerDocument;
typedef struct _PopplerIndexIter PopplerIndexIter;
+typedef struct _PopplerFontsIter PopplerFontsIter;
typedef struct _PopplerRectangle PopplerRectangle;
typedef struct _PopplerLinkMapping PopplerLinkMapping;
typedef struct _PopplerPage PopplerPage;
+typedef struct _PopplerFontInfo PopplerFontInfo;
typedef struct _PopplerPSFile PopplerPSFile;
typedef union _PopplerAction PopplerAction;
Index: glib/test-poppler-glib.c
===================================================================
RCS file: /cvs/poppler/poppler/glib/test-poppler-glib.c,v
retrieving revision 1.11
diff -u -r1.11 test-poppler-glib.c
--- glib/test-poppler-glib.c 29 May 2005 14:59:34 -0000 1.11
+++ glib/test-poppler-glib.c 11 Jun 2005 13:42:57 -0000
@@ -15,6 +15,8 @@
PopplerPageLayout layout;
PopplerPageMode mode;
PopplerViewerPreferences view_prefs;
+ PopplerFontInfo *font_info;
+ PopplerFontsIter *fonts_iter;
GEnumValue *enum_value;
g_object_get (document,
@@ -53,6 +55,18 @@
g_print ("\tcreation date:\t%d\n", creation_date);
g_print ("\tmodified date:\t%d\n", mod_date);
+ g_print ("\tfonts:\n");
+ font_info = poppler_font_info_new (document);
+ while (poppler_font_info_scan (font_info, 20, &fonts_iter)) {
+ if (fonts_iter) {
+ do {
+ g_print ("\t\t\t%s\n", poppler_fonts_iter_get_name (fonts_iter));
+ } while (poppler_fonts_iter_next (fonts_iter));
+ poppler_fonts_iter_free (fonts_iter);
+ }
+ }
+ poppler_font_info_free (font_info);
+
/* FIXME: print out the view prefs when we support it */
g_free (title);
Index: poppler/Makefile.am
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Makefile.am,v
retrieving revision 1.6
diff -u -r1.6 Makefile.am
--- poppler/Makefile.am 27 Apr 2005 20:56:18 -0000 1.6
+++ poppler/Makefile.am 11 Jun 2005 13:42:57 -0000
@@ -92,6 +92,7 @@
Dict.h \
Error.h \
FontEncodingTables.h \
+ FontInfo.h \
Function.cc \
Function.h \
Gfx.h \
@@ -143,6 +144,7 @@
Dict.cc \
Error.cc \
FontEncodingTables.cc \
+ FontInfo.cc \
Function.cc \
Gfx.cc \
GfxFont.cc \
--- /dev/null 2005-06-11 14:27:27.427003784 +0200
+++ poppler/FontInfo.cc 2005-06-11 15:25:08.000000000 +0200
@@ -0,0 +1,198 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include "GlobalParams.h"
+#include "Error.h"
+#include "Object.h"
+#include "Dict.h"
+#include "GfxFont.h"
+#include "Annot.h"
+#include "PDFDoc.h"
+#include "config.h"
+#include "FontInfo.h"
+
+static char *fontTypeNames[] = {
+ "unknown",
+ "Type 1",
+ "Type 1C",
+ "Type 3",
+ "TrueType",
+ "CID Type 0",
+ "CID Type 0C",
+ "CID TrueType"
+};
+
+FontInfoScanner::FontInfoScanner(PDFDoc *docA) {
+ doc = docA;
+ currentPage = 1;
+ fonts = NULL;
+ fontsLen = fontsSize = 0;
+}
+
+FontInfoScanner::~FontInfoScanner() {
+ gfree(fonts);
+}
+
+GooList *FontInfoScanner::scan(int nPages) {
+ GooList *result;
+ Page *page;
+ Dict *resDict;
+ Annots *annots;
+ Object obj1, obj2;
+ int pg, i, lastPage;
+
+ result = new GooList();
+
+ lastPage = currentPage + nPages;
+ if (lastPage > doc->getNumPages()) {
+ lastPage = doc->getNumPages();
+ }
+
+ for (pg = currentPage; pg <= lastPage; ++pg) {
+ page = doc->getCatalog()->getPage(pg);
+ if ((resDict = page->getResourceDict())) {
+ scanFonts(resDict, result);
+ }
+ annots = new Annots(doc->getXRef(), page->getAnnots(&obj1));
+ obj1.free();
+ for (i = 0; i < annots->getNumAnnots(); ++i) {
+ if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) {
+ obj1.streamGetDict()->lookup("Resources", &obj2);
+ if (obj2.isDict()) {
+ scanFonts(obj2.getDict(), result);
+ }
+ obj2.free();
+ }
+ obj1.free();
+ }
+ delete annots;
+ }
+
+ currentPage = lastPage + 1;
+
+ if (currentPage > doc->getNumPages()) {
+ delete result;
+ return NULL;
+ } else {
+ return result;
+ }
+}
+
+void FontInfoScanner::scanFonts(Dict *resDict, GooList *fontsList) {
+ Object obj1, obj2, xObjDict, xObj, resObj;
+ Ref r;
+ GfxFontDict *gfxFontDict;
+ GfxFont *font;
+ int i;
+
+ // scan the fonts in this resource dictionary
+ gfxFontDict = NULL;
+ resDict->lookupNF("Font", &obj1);
+ if (obj1.isRef()) {
+ obj1.fetch(doc->getXRef(), &obj2);
+ if (obj2.isDict()) {
+ r = obj1.getRef();
+ gfxFontDict = new GfxFontDict(doc->getXRef(), &r, obj2.getDict());
+ }
+ obj2.free();
+ } else if (obj1.isDict()) {
+ gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, obj1.getDict());
+ }
+ if (gfxFontDict) {
+ for (i = 0; i < gfxFontDict->getNumFonts(); ++i) {
+ if ((font = gfxFontDict->getFont(i))) {
+ Ref fontRef = *font->getID();
+ GBool alreadySeen = gFalse;
+
+ // check for an already-seen font
+ for (i = 0; i < fontsLen; ++i) {
+ if (fontRef.num == fonts[i].num && fontRef.gen == fonts[i].gen) {
+ alreadySeen = gTrue;
+ }
+ }
+
+ // add this font to the list
+ if (!alreadySeen) {
+ fontsList->append(new FontInfo(font, doc));
+ if (fontsLen == fontsSize) {
+ fontsSize += 32;
+ fonts = (Ref *)grealloc(fonts, fontsSize * sizeof(Ref));
+ }
+ fonts[fontsLen++] = *font->getID();
+ }
+ }
+ }
+ delete gfxFontDict;
+ }
+ obj1.free();
+
+ // recursively scan any resource dictionaries in objects in this
+ // resource dictionary
+ resDict->lookup("XObject", &xObjDict);
+ if (xObjDict.isDict()) {
+ for (i = 0; i < xObjDict.dictGetLength(); ++i) {
+ xObjDict.dictGetVal(i, &xObj);
+ if (xObj.isStream()) {
+ xObj.streamGetDict()->lookup("Resources", &resObj);
+ if (resObj.isDict()) {
+ scanFonts(resObj.getDict(), fontsList);
+ }
+ resObj.free();
+ }
+ xObj.free();
+ }
+ }
+ xObjDict.free();
+}
+
+FontInfo::FontInfo(GfxFont *font, PDFDoc *doc) {
+ Ref embRef;
+ Object fontObj, toUnicodeObj;
+ int i;
+
+ fontRef = *font->getID();
+
+ // font name
+ name = font->getOrigName()->copy();
+
+ // check for an embedded font
+ if (font->getType() == fontType3) {
+ emb = gTrue;
+ } else {
+ emb = font->getEmbeddedFontID(&embRef);
+ }
+
+ // look for a ToUnicode map
+ hasToUnicode = gFalse;
+ if (doc->getXRef()->fetch(fontRef.num, fontRef.gen, &fontObj)->isDict()) {
+ hasToUnicode = fontObj.dictLookup("ToUnicode", &toUnicodeObj)->isStream();
+ toUnicodeObj.free();
+ }
+ fontObj.free();
+
+ // check for a font subset name: capital letters followed by a '+'
+ // sign
+ subset = gFalse;
+ if (name) {
+ for (i = 0; i < name->getLength(); ++i) {
+ if (name->getChar(i) < 'A' || name->getChar(i) > 'Z') {
+ break;
+ }
+ }
+ subset = i > 0 && i < name->getLength() && name->getChar(i) == '+';
+ }
+}
+
+FontInfo::FontInfo(FontInfo& f) {
+ name = f.name->copy();
+ emb = f.emb;
+ subset = f.subset;
+ hasToUnicode = f.hasToUnicode;
+ fontRef = f.fontRef;
+}
+
+FontInfo::~FontInfo() {
+ delete name;
+}
--- /dev/null 2005-06-11 14:27:27.427003784 +0200
+++ poppler/FontInfo.h 2005-06-11 11:35:08.000000000 +0200
@@ -0,0 +1,51 @@
+#ifndef FONT_INFO_H
+#define FONT_INFO_H
+
+#include "goo/gtypes.h"
+#include "goo/GooList.h"
+
+class FontInfo {
+public:
+
+ // Constructor.
+ FontInfo(GfxFont *fontA, PDFDoc *doc);
+ // Copy constructor
+ FontInfo(FontInfo& f);
+ // Destructor.
+ ~FontInfo();
+
+ GooString *getName() { return name; };
+ GBool getEmbedded() { return emb; };
+ GBool getSubset() { return subset; };
+ GBool getToUnicode() { return hasToUnicode; };
+
+private:
+ GooString *name;
+ GBool emb;
+ GBool subset;
+ GBool hasToUnicode;
+ Ref fontRef;
+};
+
+class FontInfoScanner {
+public:
+
+ // Constructor.
+ FontInfoScanner(PDFDoc *doc);
+ // Destructor.
+ ~FontInfoScanner();
+
+ GooList *scan(int nPages);
+
+private:
+
+ PDFDoc *doc;
+ int currentPage;
+ Ref *fonts;
+ int fontsLen;
+ int fontsSize;
+
+ void scanFonts(Dict *resDict, GooList *fontsList);
+};
+
+#endif
More information about the poppler
mailing list