[poppler] Re: [Patch] Info about document fonts

Marco Pesenti Gritti mpgritti at gmail.com
Sat Jun 4 10:04:36 PDT 2005


Slightly updated patch, I implemented evince font properties page with this.

Marco

On 6/4/05, Marco Pesenti Gritti <mpgritti at gmail.com> wrote:
> Hi,
> 
> here is a patch that allows to retrieve fonts information from the
> document. It's based on xpdf pdffonts code. It's not complete yet but
> if it make sense I'd like to check it in so that we start playing with
> the user interface in evince and see what we need more...
> Suggestions welcome, my c++/xpdf knowledge is not that great.
> 
> Marco
>
-------------- next part --------------
? dump.txt
? slice.png
? glib/poppler-enums.c
? glib/poppler-enums.h
? poppler/FontInfo.cc
? poppler/FontInfo.h
Index: glib/poppler-document.cc
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-document.cc,v
retrieving revision 1.17
diff -u -r1.17 poppler-document.cc
--- glib/poppler-document.cc	29 May 2005 14:59:34 -0000	1.17
+++ glib/poppler-document.cc	4 Jun 2005 17:02:14 -0000
@@ -26,6 +26,7 @@
 #include <GfxState.h>
 #include <SplashOutputDev.h>
 #include <Stream.h>
+#include <FontInfo.h>
 
 #include "poppler.h"
 #include "poppler-private.h"
@@ -745,6 +746,92 @@
 	
 }
 
+struct _PopplerFontsIter
+{
+	PopplerDocument *document;
+	GooList *items;
+	int index;
+};
+
+GType
+poppler_fonts_iter_get_type (void)
+{
+  static GType our_type = 0;
+
+  if (our_type == 0)
+    our_type = g_boxed_type_register_static ("PopplerFontsIter",
+					     (GBoxedCopyFunc) poppler_fonts_iter_copy,
+					     (GBoxedFreeFunc) poppler_fonts_iter_free);
+
+  return our_type;
+}
+
+PopplerFontsIter *
+poppler_fonts_iter_copy (PopplerFontsIter *iter)
+{
+	PopplerFontsIter *new_iter;
+
+	g_return_val_if_fail (iter != NULL, NULL);
+
+	new_iter = g_new0 (PopplerFontsIter, 1);
+	*new_iter = *iter;
+	new_iter->document = (PopplerDocument *) g_object_ref (new_iter->document);
+
+	return new_iter;
+}
+
+PopplerFontsIter *
+poppler_fonts_iter_new (PopplerDocument *document)
+{
+	PopplerFontsIter *iter;
+	GooList *items;
+
+	items = FontInfo::scan(document->doc);
+	if (items == NULL)
+		return NULL;
+
+	iter = g_new0 (PopplerFontsIter, 1);
+	iter->document = (PopplerDocument *) g_object_ref (document);
+	iter->items = items;
+	iter->index = 0;
+
+	return iter;
+}
+
+const char *
+poppler_fonts_iter_get_name (PopplerFontsIter *iter)
+{
+	FontInfo *info;
+
+	info = (FontInfo *)iter->items->get (iter->index);
+
+	return info->getName()->getCString();
+}
+
+gboolean
+poppler_fonts_iter_next (PopplerFontsIter *iter)
+{
+	g_return_val_if_fail (iter != NULL, FALSE);
+
+	iter->index++;
+	if (iter->index >= iter->items->getLength())
+		return FALSE;
+
+	return TRUE;
+}
+
+void
+poppler_fonts_iter_free (PopplerFontsIter *iter)
+{
+	if (iter == NULL)
+		return;
+
+	deleteGooList (iter->items, FontInfo);
+
+	g_object_unref (iter->document);
+	g_free (iter);
+}
+
 /**
  * poppler_ps_file_new:
  * @document: a #PopplerDocument
Index: glib/poppler-document.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler-document.h,v
retrieving revision 1.11
diff -u -r1.11 poppler-document.h
--- glib/poppler-document.h	23 May 2005 04:23:53 -0000	1.11
+++ glib/poppler-document.h	4 Jun 2005 17:02:15 -0000
@@ -103,6 +103,15 @@
 PopplerAction    *poppler_index_iter_get_action (PopplerIndexIter  *iter);
 gboolean          poppler_index_iter_next       (PopplerIndexIter  *iter);
 
+/* Interface for getting the Fonts of a poppler_document */
+GType             poppler_fonts_iter_get_type   (void) G_GNUC_CONST;
+PopplerFontsIter *poppler_fonts_iter_new        (PopplerDocument   *document);
+PopplerFontsIter *poppler_fonts_iter_copy       (PopplerFontsIter  *iter);
+void              poppler_fonts_iter_free       (PopplerFontsIter  *iter);
+
+const char       *poppler_fonts_iter_get_name   (PopplerFontsIter  *iter);
+gboolean          poppler_fonts_iter_next       (PopplerFontsIter  *iter);
+
 /* Export to ps */
 PopplerPSFile *poppler_ps_file_new   (PopplerDocument *document,
 				      const char      *filename,
Index: glib/poppler.h
===================================================================
RCS file: /cvs/poppler/poppler/glib/poppler.h,v
retrieving revision 1.7
diff -u -r1.7 poppler.h
--- glib/poppler.h	11 May 2005 20:01:43 -0000	1.7
+++ glib/poppler.h	4 Jun 2005 17:02:15 -0000
@@ -45,6 +45,7 @@
 
 typedef struct _PopplerDocument    PopplerDocument;
 typedef struct _PopplerIndexIter   PopplerIndexIter;
+typedef struct _PopplerFontsIter   PopplerFontsIter;
 typedef struct _PopplerRectangle   PopplerRectangle;
 typedef struct _PopplerLinkMapping PopplerLinkMapping;
 typedef struct _PopplerPage        PopplerPage;
Index: glib/test-poppler-glib.c
===================================================================
RCS file: /cvs/poppler/poppler/glib/test-poppler-glib.c,v
retrieving revision 1.11
diff -u -r1.11 test-poppler-glib.c
--- glib/test-poppler-glib.c	29 May 2005 14:59:34 -0000	1.11
+++ glib/test-poppler-glib.c	4 Jun 2005 17:02:16 -0000
@@ -15,6 +15,7 @@
   PopplerPageLayout layout;
   PopplerPageMode mode;
   PopplerViewerPreferences view_prefs;
+  PopplerFontsIter *fonts_iter;
   GEnumValue *enum_value;
 
   g_object_get (document,
@@ -53,6 +54,13 @@
   g_print ("\tcreation date:\t%d\n", creation_date);
   g_print ("\tmodified date:\t%d\n", mod_date);
 
+  g_print ("\tfonts:\n");
+  fonts_iter = poppler_fonts_iter_new (document);
+  do {
+    g_print ("\t\t\t%s\n", poppler_fonts_iter_get_name (fonts_iter));
+  } while (poppler_fonts_iter_next (fonts_iter));
+  poppler_fonts_iter_free (fonts_iter);
+
   /* FIXME: print out the view prefs when we support it */
 
   g_free (title);
Index: poppler/Makefile.am
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Makefile.am,v
retrieving revision 1.6
diff -u -r1.6 Makefile.am
--- poppler/Makefile.am	27 Apr 2005 20:56:18 -0000	1.6
+++ poppler/Makefile.am	4 Jun 2005 17:02:17 -0000
@@ -92,6 +92,7 @@
 	Dict.h			\
 	Error.h			\
 	FontEncodingTables.h	\
+	FontInfo.h		\
 	Function.cc		\
 	Function.h		\
 	Gfx.h			\
@@ -143,6 +144,7 @@
 	Dict.cc 		\
 	Error.cc 		\
 	FontEncodingTables.cc	\
+	FontInfo.cc		\
 	Function.cc		\
 	Gfx.cc 			\
 	GfxFont.cc 		\
--- /dev/null	2005-06-04 19:35:04.067058504 +0200
+++ poppler/FontInfo.cc	2005-06-04 14:16:21.000000000 +0200
@@ -0,0 +1,163 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include "GlobalParams.h"
+#include "Error.h"
+#include "Object.h"
+#include "Dict.h"
+#include "GfxFont.h"
+#include "Annot.h"
+#include "PDFDoc.h"
+#include "config.h"
+#include "FontInfo.h"
+
+static char *fontTypeNames[] = {
+  "unknown",
+  "Type 1",
+  "Type 1C",
+  "Type 3",
+  "TrueType",
+  "CID Type 0",
+  "CID Type 0C",
+  "CID TrueType"
+};
+
+GooList *FontInfo::scan(PDFDoc *doc) {
+  GooList *result;
+  Page *page;
+  Dict *resDict;
+  Annots *annots;
+  Object obj1, obj2;
+  int pg, i;
+
+  result = new GooList();
+
+  for (pg = 1; pg <= doc->getNumPages(); ++pg) {
+    page = doc->getCatalog()->getPage(pg);
+    if ((resDict = page->getResourceDict())) {
+      scanFonts(resDict, doc, result);
+    }
+    annots = new Annots(doc->getXRef(), page->getAnnots(&obj1));
+    obj1.free();
+    for (i = 0; i < annots->getNumAnnots(); ++i) {
+      if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) {
+	obj1.streamGetDict()->lookup("Resources", &obj2);
+	if (obj2.isDict()) {
+	  scanFonts(obj2.getDict(), doc, result);
+	}
+	obj2.free();
+      }
+      obj1.free();
+    }
+    delete annots;
+  }
+
+  return result;
+}
+
+void FontInfo::scanFonts(Dict *resDict, PDFDoc *doc, GooList *fontsList) {
+  Object obj1, obj2, xObjDict, xObj, resObj;
+  Ref r;
+  GfxFontDict *gfxFontDict;
+  GfxFont *font;
+  int i;
+
+  // scan the fonts in this resource dictionary
+  gfxFontDict = NULL;
+  resDict->lookupNF("Font", &obj1);
+  if (obj1.isRef()) {
+    obj1.fetch(doc->getXRef(), &obj2);
+    if (obj2.isDict()) {
+      r = obj1.getRef();
+      gfxFontDict = new GfxFontDict(doc->getXRef(), &r, obj2.getDict());
+    }
+    obj2.free();
+  } else if (obj1.isDict()) {
+    gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, obj1.getDict());
+  }
+  if (gfxFontDict) {
+    for (i = 0; i < gfxFontDict->getNumFonts(); ++i) {
+      if ((font = gfxFontDict->getFont(i))) {
+        Ref fontRef = *font->getID();
+	GBool already_seen = gFalse;
+
+        // check for an already-seen font
+        for (int k = 0; k < fontsList->getLength(); ++k) {
+          FontInfo *info = (FontInfo *)fontsList->get(k);
+          if (fontRef.num == info->fontRef.num && fontRef.gen == info->fontRef.gen) {
+            already_seen = gTrue;
+          }
+        }
+
+	// add this font to the list
+        if (!already_seen) {
+          fontsList->append(new FontInfo(font, doc));
+        }
+      }
+    }
+    delete gfxFontDict;
+  }
+  obj1.free();
+
+  // recursively scan any resource dictionaries in objects in this
+  // resource dictionary
+  resDict->lookup("XObject", &xObjDict);
+  if (xObjDict.isDict()) {
+    for (i = 0; i < xObjDict.dictGetLength(); ++i) {
+      xObjDict.dictGetVal(i, &xObj);
+      if (xObj.isStream()) {
+	xObj.streamGetDict()->lookup("Resources", &resObj);
+	if (resObj.isDict()) {
+	  scanFonts(resObj.getDict(), doc, fontsList);
+	}
+	resObj.free();
+      }
+      xObj.free();
+    }
+  }
+  xObjDict.free();
+}
+
+FontInfo::FontInfo(GfxFont *font, PDFDoc *doc) {
+  Ref embRef;
+  Object fontObj, toUnicodeObj;
+  int i;
+
+  fontRef = *font->getID();
+
+  // font name
+  name = font->getOrigName()->copy();
+
+  // check for an embedded font
+  if (font->getType() == fontType3) {
+    emb = gTrue;
+  } else {
+    emb = font->getEmbeddedFontID(&embRef);
+  }
+
+  // look for a ToUnicode map
+  hasToUnicode = gFalse;
+  if (doc->getXRef()->fetch(fontRef.num, fontRef.gen, &fontObj)->isDict()) {
+    hasToUnicode = fontObj.dictLookup("ToUnicode", &toUnicodeObj)->isStream();
+    toUnicodeObj.free();
+  }
+  fontObj.free();
+
+  // check for a font subset name: capital letters followed by a '+'
+  // sign
+  subset = gFalse;
+  if (name) {
+    for (i = 0; i < name->getLength(); ++i) {
+      if (name->getChar(i) < 'A' || name->getChar(i) > 'Z') {
+	break;
+      }
+    }
+    subset = i > 0 && i < name->getLength() && name->getChar(i) == '+';
+  }
+}
+
+FontInfo::~FontInfo() {
+  delete name;
+}
--- /dev/null	2005-06-04 19:35:04.067058504 +0200
+++ poppler/FontInfo.h	2005-06-04 14:20:43.000000000 +0200
@@ -0,0 +1,32 @@
+#ifndef FONT_INFO_H
+#define FONT_INFO_H
+
+#include "goo/gtypes.h"
+#include "goo/GooList.h"
+
+class FontInfo {
+public:
+
+  // Constructor.
+  FontInfo(GfxFont *fontA, PDFDoc *doc);
+  // Destructor.
+  ~FontInfo();
+
+  static GooList *scan(PDFDoc *doc);
+
+  GooString *getName()      { return name; };
+  GBool      getEmbedded()  { return emb; };
+  GBool      getSubset()    { return subset; };
+  GBool      getToUnicode() { return hasToUnicode; };
+
+private:
+  GooString *name;
+  GBool emb;
+  GBool subset;
+  GBool hasToUnicode;
+  Ref fontRef;
+
+  static void FontInfo::scanFonts(Dict *resDict, PDFDoc *doc, GooList *fontsList);
+};
+
+#endif


More information about the poppler mailing list