[poppler] Re: [Patch] Info about document fonts

Marco Pesenti Gritti mpgritti at gmail.com
Sat Jun 11 07:27:01 PDT 2005


Yet another update. Fixed a bug in the FontInfo code.

Marco

On 6/11/05, Marco Pesenti Gritti <mpgritti at gmail.com> wrote:
> Grrr the patch :)
> 
> On 6/11/05, Marco Pesenti Gritti <mpgritti at gmail.com> wrote:
> > Attached patch implements a scan api, as discussed yesterday on irc,
> > which can scan the document for fonts incrementally. I used it in
> > evince and it works pretty well.
> >
> > I'm not that sure about the _scan api yet:
> >
> > gboolean
> > poppler_font_info_scan (PopplerFontInfo    *font_info,
> >                                    int                         n_pages,
> >                                    PopplerFontsIter  **iter)
> >
> > *iter contains an iterator if fonts was found otherwise NULL, the
> > return value is FALSE when all the document pages have been scanned.
> >
> > I'm not yet 100% sure about the ideal UI. In evince I'm not adding
> > fonts incrementally as they are found atm. Instead I'm showing a
> > "Loading..." message until all fonts has been scanned. The reason is
> > that I'm not sure what sort of feedback to give if we incrementally
> > add items (how do you know scanning is still in progress?) and if that
> > is actually necessary anyway.
> >
> > Anyway, I guess this could be a decent start, we could improve api and
> > UI later...
> >
> > Marco
> >
> 
> 
>
-------------- next part --------------
? dump.txt
? slice.png
? glib/dump.txt
? glib/poppler-enums.c
? glib/poppler-enums.h
? glib/slice.png
? poppler/FontInfo.cc
? poppler/FontInfo.h
Index: glib/poppler-document.cc
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-document.cc,v
retrieving revision 1.17
diff -u -r1.17 poppler-document.cc
--- glib/poppler-document.cc	29 May 2005 14:59:34 -0000	1.17
+++ glib/poppler-document.cc	11 Jun 2005 14:24:45 -0000
@@ -26,6 +26,7 @@
 #include <GfxState.h>
 #include <SplashOutputDev.h>
 #include <Stream.h>
+#include <FontInfo.h>
 
 #include "poppler.h"
 #include "poppler-private.h"
@@ -745,6 +746,136 @@
 	
 }
 
+struct _PopplerFontsIter
+{
+	GooList *items;
+	int index;
+};
+
+GType
+poppler_fonts_iter_get_type (void)
+{
+  static GType our_type = 0;
+
+  if (our_type == 0)
+    our_type = g_boxed_type_register_static ("PopplerFontsIter",
+					     (GBoxedCopyFunc) poppler_fonts_iter_copy,
+					     (GBoxedFreeFunc) poppler_fonts_iter_free);
+
+  return our_type;
+}
+
+const char *
+poppler_fonts_iter_get_name (PopplerFontsIter *iter)
+{
+	FontInfo *info;
+
+	info = (FontInfo *)iter->items->get (iter->index);
+
+	return info->getName()->getCString();
+}
+
+gboolean
+poppler_fonts_iter_next (PopplerFontsIter *iter)
+{
+	g_return_val_if_fail (iter != NULL, FALSE);
+
+	iter->index++;
+	if (iter->index >= iter->items->getLength())
+		return FALSE;
+
+	return TRUE;
+}
+
+PopplerFontsIter *
+poppler_fonts_iter_copy (PopplerFontsIter *iter)
+{
+	PopplerFontsIter *new_iter;
+
+	g_return_val_if_fail (iter != NULL, NULL);
+
+	new_iter = g_new0 (PopplerFontsIter, 1);
+	*new_iter = *iter;
+
+	new_iter->items = new GooList ();
+	for (int i = 0; i < iter->items->getLength(); i++) {
+		FontInfo *info = (FontInfo *)iter->items->get(i);
+		new_iter->items->append (new FontInfo (*info));
+	}
+
+	return new_iter;
+}
+
+void
+poppler_fonts_iter_free (PopplerFontsIter *iter)
+{
+	if (iter == NULL)
+		return;
+
+	deleteGooList (iter->items, FontInfo);
+
+	g_free (iter);
+}
+
+static PopplerFontsIter *
+poppler_fonts_iter_new (GooList *items)
+{
+	PopplerFontsIter *iter;
+
+	iter = g_new0 (PopplerFontsIter, 1);
+	iter->items = items;
+	iter->index = 0;
+
+	return iter;
+}
+
+PopplerFontInfo *
+poppler_font_info_new (PopplerDocument *document)
+{
+	PopplerFontInfo *font_info;
+
+	g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL);
+
+	font_info = g_new0 (PopplerFontInfo, 1);
+	font_info->document = (PopplerDocument *) g_object_ref (document);
+	font_info->scanner = new FontInfoScanner(document->doc);
+
+	return font_info;
+}
+
+gboolean
+poppler_font_info_scan (PopplerFontInfo   *font_info,
+			int                n_pages,
+			PopplerFontsIter **iter)
+{
+	GooList *items;
+
+	g_return_val_if_fail (iter != NULL, FALSE);
+
+	items = font_info->scanner->scan(n_pages);
+
+	if (items == NULL) {
+		*iter = NULL;
+	} else if (items->getLength() == 0) {
+		*iter = NULL;
+		delete items;
+	} else {
+		*iter = poppler_fonts_iter_new(items);
+	}
+	
+	return (items != NULL);
+}
+
+void
+poppler_font_info_free (PopplerFontInfo *font_info)
+{
+	g_return_if_fail (font_info != NULL);
+
+	delete font_info->scanner;
+
+	g_object_unref (font_info->document);
+}
+
 /**
  * poppler_ps_file_new:
  * @document: a #PopplerDocument
Index: glib/poppler-document.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-document.h,v
retrieving revision 1.11
diff -u -r1.11 poppler-document.h
--- glib/poppler-document.h	23 May 2005 04:23:53 -0000	1.11
+++ glib/poppler-document.h	11 Jun 2005 14:24:46 -0000
@@ -103,6 +103,19 @@
 PopplerAction    *poppler_index_iter_get_action (PopplerIndexIter  *iter);
 gboolean          poppler_index_iter_next       (PopplerIndexIter  *iter);
 
+/* Interface for getting the Fonts of a poppler_document */
+PopplerFontInfo  *poppler_font_info_new         (PopplerDocument   *document);
+gboolean          poppler_font_info_scan        (PopplerFontInfo   *font_info,
+						 int                n_pages,
+						 PopplerFontsIter **iter);
+void		  poppler_font_info_free	(PopplerFontInfo   *font_info);
+
+GType             poppler_fonts_iter_get_type   (void) G_GNUC_CONST;
+PopplerFontsIter *poppler_fonts_iter_copy       (PopplerFontsIter  *iter);
+void              poppler_fonts_iter_free       (PopplerFontsIter  *iter);
+const char       *poppler_fonts_iter_get_name   (PopplerFontsIter  *iter);
+gboolean          poppler_fonts_iter_next       (PopplerFontsIter  *iter);
+
 /* Export to ps */
 PopplerPSFile *poppler_ps_file_new   (PopplerDocument *document,
 				      const char      *filename,
Index: glib/poppler-private.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-private.h,v
retrieving revision 1.6
diff -u -r1.6 poppler-private.h
--- glib/poppler-private.h	21 Apr 2005 05:20:25 -0000	1.6
+++ glib/poppler-private.h	11 Jun 2005 14:24:46 -0000
@@ -5,6 +5,7 @@
 #include <PDFDoc.h>
 #include <PSOutputDev.h>
 #include <Link.h>
+#include <FontInfo.h>
 
 #if defined (HAVE_CAIRO)
 #include <CairoOutputDevImage.h>
@@ -30,6 +31,12 @@
   PSOutputDev *out;
 };
 
+struct _PopplerFontInfo
+{
+  PopplerDocument *document;
+  FontInfoScanner *scanner;
+};
+
 struct _PopplerPage
 {
   GObject parent_instance;
Index: glib/poppler.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler.h,v
retrieving revision 1.7
diff -u -r1.7 poppler.h
--- glib/poppler.h	11 May 2005 20:01:43 -0000	1.7
+++ glib/poppler.h	11 Jun 2005 14:24:46 -0000
@@ -45,9 +45,11 @@
 
 typedef struct _PopplerDocument    PopplerDocument;
 typedef struct _PopplerIndexIter   PopplerIndexIter;
+typedef struct _PopplerFontsIter   PopplerFontsIter;
 typedef struct _PopplerRectangle   PopplerRectangle;
 typedef struct _PopplerLinkMapping PopplerLinkMapping;
 typedef struct _PopplerPage        PopplerPage;
+typedef struct _PopplerFontInfo    PopplerFontInfo;
 typedef struct _PopplerPSFile      PopplerPSFile;
 typedef union  _PopplerAction      PopplerAction;
 
Index: glib/test-poppler-glib.c
===================================================================
RCS file: /cvs/poppler/poppler/glib/test-poppler-glib.c,v
retrieving revision 1.11
diff -u -r1.11 test-poppler-glib.c
--- glib/test-poppler-glib.c	29 May 2005 14:59:34 -0000	1.11
+++ glib/test-poppler-glib.c	11 Jun 2005 14:24:47 -0000
@@ -15,6 +15,8 @@
   PopplerPageLayout layout;
   PopplerPageMode mode;
   PopplerViewerPreferences view_prefs;
+  PopplerFontInfo *font_info;
+  PopplerFontsIter *fonts_iter;
   GEnumValue *enum_value;
 
   g_object_get (document,
@@ -53,6 +55,18 @@
   g_print ("\tcreation date:\t%d\n", creation_date);
   g_print ("\tmodified date:\t%d\n", mod_date);
 
+  g_print ("\tfonts:\n");
+  font_info = poppler_font_info_new (document);
+  while (poppler_font_info_scan (font_info, 20, &fonts_iter)) {
+    if (fonts_iter) {
+      do {
+        g_print ("\t\t\t%s\n", poppler_fonts_iter_get_name (fonts_iter));
+      } while (poppler_fonts_iter_next (fonts_iter));
+      poppler_fonts_iter_free (fonts_iter);
+    }
+  }
+  poppler_font_info_free (font_info);
+
   /* FIXME: print out the view prefs when we support it */
 
   g_free (title);
Index: poppler/Makefile.am
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Makefile.am,v
retrieving revision 1.6
diff -u -r1.6 Makefile.am
--- poppler/Makefile.am	27 Apr 2005 20:56:18 -0000	1.6
+++ poppler/Makefile.am	11 Jun 2005 14:24:48 -0000
@@ -92,6 +92,7 @@
 	Dict.h			\
 	Error.h			\
 	FontEncodingTables.h	\
+	FontInfo.h		\
 	Function.cc		\
 	Function.h		\
 	Gfx.h			\
@@ -143,6 +144,7 @@
 	Dict.cc 		\
 	Error.cc 		\
 	FontEncodingTables.cc	\
+	FontInfo.cc		\
 	Function.cc		\
 	Gfx.cc 			\
 	GfxFont.cc 		\
Index: poppler/XRef.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/XRef.cc,v
retrieving revision 1.2
diff -u -r1.2 XRef.cc
--- poppler/XRef.cc	16 Mar 2005 15:51:36 -0000	1.2
+++ poppler/XRef.cc	11 Jun 2005 14:24:51 -0000
@@ -397,6 +397,8 @@
         error(-1, "Invalid 'obj' parameters'");
         goto err1;
       }
+
+printf ("%p %d\n", entries, newSize); 
  
       entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
       for (i = size; i < newSize; ++i) {
--- /dev/null	2005-06-11 14:27:27.427003784 +0200
+++ poppler/FontInfo.cc	2005-06-11 16:22:15.000000000 +0200
@@ -0,0 +1,197 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include "GlobalParams.h"
+#include "Error.h"
+#include "Object.h"
+#include "Dict.h"
+#include "GfxFont.h"
+#include "Annot.h"
+#include "PDFDoc.h"
+#include "config.h"
+#include "FontInfo.h"
+
+static char *fontTypeNames[] = {
+  "unknown",
+  "Type 1",
+  "Type 1C",
+  "Type 3",
+  "TrueType",
+  "CID Type 0",
+  "CID Type 0C",
+  "CID TrueType"
+};
+
+FontInfoScanner::FontInfoScanner(PDFDoc *docA) {
+  doc = docA;
+  currentPage = 1;
+  fonts = NULL;
+  fontsLen = fontsSize = 0;
+}
+
+FontInfoScanner::~FontInfoScanner() {
+  gfree(fonts);
+}
+
+GooList *FontInfoScanner::scan(int nPages) {
+  GooList *result;
+  Page *page;
+  Dict *resDict;
+  Annots *annots;
+  Object obj1, obj2;
+  int pg, i, lastPage;
+
+  if (currentPage > doc->getNumPages()) {
+    return NULL;
+  }
+ 
+  result = new GooList();
+
+  lastPage = currentPage + nPages;
+  if (lastPage > doc->getNumPages()) {
+    lastPage = doc->getNumPages();
+  }
+
+  for (pg = currentPage; pg <= lastPage; ++pg) {
+    page = doc->getCatalog()->getPage(pg);
+    if ((resDict = page->getResourceDict())) {
+      scanFonts(resDict, result);
+    }
+    annots = new Annots(doc->getXRef(), page->getAnnots(&obj1));
+    obj1.free();
+    for (i = 0; i < annots->getNumAnnots(); ++i) {
+      if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) {
+	obj1.streamGetDict()->lookup("Resources", &obj2);
+	if (obj2.isDict()) {
+	  scanFonts(obj2.getDict(), result);
+	}
+	obj2.free();
+      }
+      obj1.free();
+    }
+    delete annots;
+  }
+
+  currentPage = lastPage + 1;
+
+  return result;
+}
+
+void FontInfoScanner::scanFonts(Dict *resDict, GooList *fontsList) {
+  Object obj1, obj2, xObjDict, xObj, resObj;
+  Ref r;
+  GfxFontDict *gfxFontDict;
+  GfxFont *font;
+  int i;
+
+  // scan the fonts in this resource dictionary
+  gfxFontDict = NULL;
+  resDict->lookupNF("Font", &obj1);
+  if (obj1.isRef()) {
+    obj1.fetch(doc->getXRef(), &obj2);
+    if (obj2.isDict()) {
+      r = obj1.getRef();
+      gfxFontDict = new GfxFontDict(doc->getXRef(), &r, obj2.getDict());
+    }
+    obj2.free();
+  } else if (obj1.isDict()) {
+    gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, obj1.getDict());
+  }
+  if (gfxFontDict) {
+    for (i = 0; i < gfxFontDict->getNumFonts(); ++i) {
+      if ((font = gfxFontDict->getFont(i))) {
+        Ref fontRef = *font->getID();
+	GBool alreadySeen = gFalse;
+
+        // check for an already-seen font
+        for (i = 0; i < fontsLen; ++i) {
+          if (fontRef.num == fonts[i].num && fontRef.gen == fonts[i].gen) {
+            alreadySeen = gTrue;
+          }
+        }
+
+	// add this font to the list
+        if (!alreadySeen) {
+          fontsList->append(new FontInfo(font, doc));
+          if (fontsLen == fontsSize) {
+            fontsSize += 32;
+            fonts = (Ref *)grealloc(fonts, fontsSize * sizeof(Ref));
+          }
+          fonts[fontsLen++] = *font->getID();
+        }
+      }
+    }
+    delete gfxFontDict;
+  }
+  obj1.free();
+
+  // recursively scan any resource dictionaries in objects in this
+  // resource dictionary
+  resDict->lookup("XObject", &xObjDict);
+  if (xObjDict.isDict()) {
+    for (i = 0; i < xObjDict.dictGetLength(); ++i) {
+      xObjDict.dictGetVal(i, &xObj);
+      if (xObj.isStream()) {
+	xObj.streamGetDict()->lookup("Resources", &resObj);
+	if (resObj.isDict()) {
+	  scanFonts(resObj.getDict(), fontsList);
+	}
+	resObj.free();
+      }
+      xObj.free();
+    }
+  }
+  xObjDict.free();
+}
+
+FontInfo::FontInfo(GfxFont *font, PDFDoc *doc) {
+  Ref embRef;
+  Object fontObj, toUnicodeObj;
+  int i;
+
+  fontRef = *font->getID();
+
+  // font name
+  name = font->getOrigName()->copy();
+
+  // check for an embedded font
+  if (font->getType() == fontType3) {
+    emb = gTrue;
+  } else {
+    emb = font->getEmbeddedFontID(&embRef);
+  }
+
+  // look for a ToUnicode map
+  hasToUnicode = gFalse;
+  if (doc->getXRef()->fetch(fontRef.num, fontRef.gen, &fontObj)->isDict()) {
+    hasToUnicode = fontObj.dictLookup("ToUnicode", &toUnicodeObj)->isStream();
+    toUnicodeObj.free();
+  }
+  fontObj.free();
+
+  // check for a font subset name: capital letters followed by a '+'
+  // sign
+  subset = gFalse;
+  if (name) {
+    for (i = 0; i < name->getLength(); ++i) {
+      if (name->getChar(i) < 'A' || name->getChar(i) > 'Z') {
+	break;
+      }
+    }
+    subset = i > 0 && i < name->getLength() && name->getChar(i) == '+';
+  }
+}
+
+FontInfo::FontInfo(FontInfo& f) {
+  name = f.name->copy();
+  emb = f.emb;
+  subset = f.subset;
+  hasToUnicode = f.hasToUnicode;
+  fontRef = f.fontRef;
+}
+
+FontInfo::~FontInfo() {
+  delete name;
+}
--- /dev/null	2005-06-11 14:27:27.427003784 +0200
+++ poppler/FontInfo.h	2005-06-11 11:35:08.000000000 +0200
@@ -0,0 +1,51 @@
+#ifndef FONT_INFO_H
+#define FONT_INFO_H
+
+#include "goo/gtypes.h"
+#include "goo/GooList.h"
+
+class FontInfo {
+public:
+
+  // Constructor.
+  FontInfo(GfxFont *fontA, PDFDoc *doc);
+  // Copy constructor
+  FontInfo(FontInfo& f);
+  // Destructor.
+  ~FontInfo();
+
+  GooString *getName()      { return name; };
+  GBool      getEmbedded()  { return emb; };
+  GBool      getSubset()    { return subset; };
+  GBool      getToUnicode() { return hasToUnicode; };
+
+private:
+  GooString *name;
+  GBool emb;
+  GBool subset;
+  GBool hasToUnicode;
+  Ref fontRef;
+};
+
+class FontInfoScanner {
+public:
+
+  // Constructor.
+  FontInfoScanner(PDFDoc *doc);
+  // Destructor.
+  ~FontInfoScanner();
+
+  GooList *scan(int nPages);
+
+private:
+
+  PDFDoc *doc;
+  int currentPage;
+  Ref *fonts;
+  int fontsLen;
+  int fontsSize;
+
+  void scanFonts(Dict *resDict, GooList *fontsList);
+};
+
+#endif


More information about the poppler mailing list