[poppler] utils/HtmlOutputDev.cc utils/HtmlOutputDev.h utils/pdf2xml.dtd utils/pdftohtml.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Thu Feb 23 14:09:45 PST 2012


 utils/HtmlOutputDev.cc |  130 +++++++++++++++++++++++++++++++++++--------------
 utils/HtmlOutputDev.h  |    7 ++
 utils/pdf2xml.dtd      |    7 ++
 utils/pdftohtml.cc     |    5 -
 4 files changed, 108 insertions(+), 41 deletions(-)

New commits:
commit 7705e65c231cc3af296bf19f5cba110cabb72e7d
Author: Albert Astals Cid <aacid at kde.org>
Date:   Thu Feb 23 23:09:23 2012 +0100

    Generate outlines in pdftohtml in -xml mode.
    
    Bug 56993

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 6ab7b9d..9e113eb 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -1564,7 +1564,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 	GBool bClose = gFalse;
 	Catalog *catalog = doc->getCatalog();
 
-	if (!ok || xml)
+	if (!ok)
                 return gFalse;
   
 	Outline *outline = doc->getOutline();
@@ -1575,7 +1575,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 	if (!outlines)
 		return gFalse;
   
-	if (!complexMode && !xml)
+	if (!complexMode || xml)
   	{
 		output = page;
   	}
@@ -1610,21 +1610,30 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 		}
 	}
  
-  	GBool done = newOutlineLevel(output, outlines, catalog);
-  	if (done && !complexMode)
-    	fputs("<hr>\n", output);
-	
-	if (bClose)
+	if (!xml)
 	{
-		fputs("</BODY>\n</HTML>\n", output);
-		fclose(output);
+		GBool done = newHtmlOutlineLevel(output, outlines, catalog);
+		if (done && !complexMode)
+			fputs("<hr>\n", output);
+	
+		if (bClose)
+		{
+			fputs("</BODY>\n</HTML>\n", output);
+			fclose(output);
+		}
 	}
-  	return done;
+	else
+		newXmlOutlineLevel(output, outlines, catalog);
+
+	return gTrue;
 #endif
 }
 
-GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
+GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
 {
+#ifdef DISABLE_OUTLINE
+	return gFalse;
+#else
 	GBool atLeastOne = gFalse;
 
 	if (level == 1)
@@ -1640,29 +1649,10 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
 		GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
 							   item->getTitleLength());
 
-		// get corresponding link
 		GooString *linkName = NULL;;
-		LinkAction *action = item->getAction();
-		LinkGoTo *link = NULL;
-		if (action && action->getKind() == actionGoTo)
-			link = dynamic_cast<LinkGoTo*>(action);
-		if (link && link->isOk()) {
-			LinkDest *linkdest=NULL;
-			if (link->getDest()!=NULL)
-				linkdest=link->getDest()->copy();
-			else if (link->getNamedDest()!=NULL)
-				linkdest=catalog->findDest(link->getNamedDest());
-
-			if (linkdest) {
-				int page;
-				if (linkdest->isPageRef()) {
-					Ref pageref=linkdest->getPageRef();
-					page=catalog->findPage(pageref.num,pageref.gen);
-				} else {
-					page=linkdest->getPageNum();
-				}
-				delete linkdest;
-
+        int page = getOutlinePageNum(item);
+        if (page > 0)
+        {
 				/*		complex		simple
 				frames		file-4.html	files.html#4
 				noframes	file.html#4	file.html#4
@@ -1683,7 +1673,6 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
 					}
 				}
 				delete str;
-			}
 		}
 
 		fputs("<li>",output);
@@ -1701,7 +1690,7 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
 		if (item->hasKids())
 		{
 			fputs("\n",output);
-			newOutlineLevel(output, item->getKids(), catalog, level+1);
+			newHtmlOutlineLevel(output, item->getKids(), catalog, level+1);
 		}
 		item->close();
 		fputs("</li>\n",output);
@@ -1709,4 +1698,75 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
 	fputs("</ul>\n",output);
 
 	return atLeastOne;
+#endif
+}
+
+void HtmlOutputDev::newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog)
+{
+#ifndef DISABLE_OUTLINE
+    fputs("<outline>\n", output);
+
+    for (int i = 0; i < outlines->getLength(); i++)
+    {
+        OutlineItem *item     = (OutlineItem*)outlines->get(i);
+        GooString   *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
+                                                     item->getTitleLength());
+        int page = getOutlinePageNum(item);
+        if (page > 0)
+        {
+            fprintf(output, "<item page=\"%d\">%s</item>\n",
+                    page, titleStr->getCString());
+        }
+        else
+        {
+            fprintf(output, "<item>%s</item>\n", titleStr->getCString());
+        }
+        delete titleStr;
+
+        item->open();
+        if (item->hasKids())
+        {
+            newXmlOutlineLevel(output, item->getKids(), catalog);
+        }
+        item->close();
+    }    
+
+    fputs("</outline>\n", output);
+#endif
+}
+
+#ifndef DISABLE_OUTLINE
+int HtmlOutputDev::getOutlinePageNum(OutlineItem *item)
+{
+    LinkAction *action   = item->getAction();
+    LinkGoTo   *link     = NULL;
+    LinkDest   *linkdest = NULL;
+    int         pagenum  = -1;
+
+    if (!action || action->getKind() != actionGoTo)
+        return pagenum;
+
+    link = dynamic_cast<LinkGoTo*>(action);
+
+    if (!link || !link->isOk())
+        return pagenum;
+
+    if (link->getDest())
+        linkdest = link->getDest()->copy();
+    else if (link->getNamedDest())
+        linkdest = catalog->findDest(link->getNamedDest());
+
+    if (!linkdest)
+        return pagenum;
+
+    if (linkdest->isPageRef()) {
+        Ref pageref = linkdest->getPageRef();
+        pagenum = catalog->findPage(pageref.num, pageref.gen);
+    } else {
+        pagenum = linkdest->getPageNum();
+    }
+
+    delete linkdest;
+    return pagenum;
 }
+#endif
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index a3db998..b730ead 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -60,6 +60,7 @@
 class GfxState;
 class GooString;
 class PDFDoc;
+class OutlineItem;
 //------------------------------------------------------------------------
 // HtmlString
 //------------------------------------------------------------------------
@@ -316,7 +317,11 @@ private:
   GooString* getLinkDest(AnnotLink *link);
   void dumpMetaVars(FILE *);
   void doFrame(int firstPage);
-  GBool newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
+  GBool newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
+  void newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog);
+#ifndef DISABLE_OUTLINE
+  int getOutlinePageNum(OutlineItem *item);
+#endif
   void drawJpegImage(GfxState *state, Stream *str);
 
   FILE *fContentsFrame;
diff --git a/utils/pdf2xml.dtd b/utils/pdf2xml.dtd
index 9cd3880..389676c 100644
--- a/utils/pdf2xml.dtd
+++ b/utils/pdf2xml.dtd
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<!ELEMENT pdf2xml (page+)>
+<!ELEMENT pdf2xml (page+, outline?)>
 <!ELEMENT page (fontspec*, image*, text*)>
 <!ATTLIST page
 	number CDATA #REQUIRED
@@ -38,3 +38,8 @@
 	height CDATA #REQUIRED
 	src CDATA #REQUIRED
 >
+<!ELEMENT outline (item | outline)*>
+<!ELEMENT item (#PCDATA)>
+<!ATTLIST item
+	page CDATA #IMPLIED
+>
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index f7d3f14..7347161 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -410,10 +410,7 @@ int main(int argc, char *argv[]) {
   {
     doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0,
 		      gTrue, gFalse, gFalse);
-  	if (!xml)
-	{
-		htmlOut->dumpDocOutline(doc);
-	}
+    htmlOut->dumpDocOutline(doc);
   }
   
   if ((complexMode || singleHtml) && !xml && !ignore) {


More information about the poppler mailing list