[poppler] Branch 'poppler-0.18' - utils/HtmlOutputDev.cc utils/HtmlOutputDev.h utils/pdftohtml.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Sun Feb 5 06:57:55 PST 2012


 utils/HtmlOutputDev.cc |  196 ++++++++++++++++++++++++++-----------------------
 utils/HtmlOutputDev.h  |    6 +
 utils/pdftohtml.cc     |    3 
 3 files changed, 112 insertions(+), 93 deletions(-)

New commits:
commit 40f7289ab04787734b856c53d5c0139445b52635
Author: Igor Slepchin <igor.redhat at gmail.com>
Date:   Sun Feb 5 15:55:39 2012 +0100

    Proper unicode support when dumping PDF outline.
    
    Also use of already existing Outline class rather than parsing the outline anew.
    
    Bug 45572

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 43e4ec4..5561528 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -30,6 +30,7 @@
 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
 // Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
 // Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -61,6 +62,8 @@
 #include "HtmlOutputDev.h"
 #include "HtmlFonts.h"
 #include "HtmlUtils.h"
+#include "Outline.h"
+#include "PDFDoc.h"
 
 #define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
 
@@ -1551,17 +1554,25 @@ void HtmlOutputDev::dumpMetaVars(FILE *file)
   }
 }
 
-GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
+GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 { 
+#ifdef DISABLE_OUTLINE
+	return gFalse;
+#else
 	FILE * output = NULL;
 	GBool bClose = gFalse;
+	Catalog *catalog = doc->getCatalog();
 
 	if (!ok || xml)
-    	return gFalse;
+                return gFalse;
   
-	Object *outlines = catalog->getOutline();
-  	if (!outlines->isDict())
-    	return gFalse;
+	Outline *outline = doc->getOutline();
+	if (!outline)
+		return gFalse;
+
+	GooList *outlines = outline->getItems();
+	if (!outlines)
+		return gFalse;
   
 	if (!complexMode && !xml)
   	{
@@ -1583,7 +1594,17 @@ GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
 				return gFalse;
 			delete str;
 			bClose = gTrue;
-     		fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output);
+
+			char *htmlEncoding =
+				HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
+
+			fprintf(output, "<HTML xmlns=\"http://www.w3.org/1999/xhtml\" " \
+                                "lang=\"\" xml:lang=\"\">\n"            \
+                                "<HEAD>\n"                              \
+                                "<TITLE>Document Outline</TITLE>\n"     \
+                                "<META http-equiv=\"Content-Type\" content=\"text/html; " \
+                                "charset=%s\"/>\n"                      \
+                                "</HEAD>\n<BODY>\n", htmlEncoding);
 		}
 	}
  
@@ -1597,97 +1618,92 @@ GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
 		fclose(output);
 	}
   	return done;
+#endif
 }
 
-GBool HtmlOutputDev::newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level)
+GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
 {
-  Object curr, next;
-  GBool atLeastOne = gFalse;
-  
-  if (node->dictLookup("First", &curr)->isDict()) {
-    if (level == 1)
+	GBool atLeastOne = gFalse;
+
+	if (level == 1)
 	{
 		fputs("<A name=\"outline\"></a>", output);
 		fputs("<h1>Document Outline</h1>\n", output);
 	}
-    fputs("<ul>",output);
-    do {
-      // get title, give up if not found
-      Object title;
-      if (curr.dictLookup("Title", &title)->isNull()) {
-		title.free();
-		break;
-      }
-      GooString *titleStr = new GooString(title.getString());
-      title.free();
-
-      // get corresponding link
-      // Note: some code duplicated from HtmlOutputDev::getLinkDest().
-      GooString *linkName = NULL;;
-      Object dest;
-      if (!curr.dictLookup("Dest", &dest)->isNull()) {
-		LinkGoTo *link = new LinkGoTo(&dest);
-		LinkDest *linkdest=NULL;
-		if (link->getDest()!=NULL)
-			linkdest=link->getDest()->copy();
-		else if (link->getNamedDest()!=NULL)
-			linkdest=catalog->findDest(link->getNamedDest());
-			
-		delete link;
-		if (linkdest) { 
-	  		int page;
-	  		if (linkdest->isPageRef()) {
-	    		Ref pageref=linkdest->getPageRef();
-	    		page=catalog->findPage(pageref.num,pageref.gen);
-	  		} else {
-	    		page=linkdest->getPageNum();
-	  		}
-	  		delete linkdest;
-
-			/* 			complex 	simple
-			frames		file-4.html	files.html#4
-			noframes	file.html#4	file.html#4
-	   		*/
-	  		linkName=basename(Docname);
-	  		GooString *str=GooString::fromInt(page);
-	  		if (noframes) {
-	    		linkName->append(".html#");
-				linkName->append(str);
-	  		} else {
-    			if( complexMode ) {
-	   		   		linkName->append("-");
-	      			linkName->append(str);
-	      			linkName->append(".html");
-	    		} else {
-	      			linkName->append("s.html#");
-	      			linkName->append(str);
-	    		}
-	  		}
-			delete str;
+	fputs("<ul>",output);
+
+	for (int i = 0; i < outlines->getLength(); i++)
+	{
+		OutlineItem *item = (OutlineItem*)outlines->get(i);
+		GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
+							   item->getTitleLength());
+
+		// get corresponding link
+		GooString *linkName = NULL;;
+		LinkAction *action = item->getAction();
+		LinkGoTo *link = NULL;
+		Object dest;
+		if (action && action->getKind() == actionGoTo)
+			link = dynamic_cast<LinkGoTo*>(action);
+		if (link && link->isOk()) {
+			LinkDest *linkdest=NULL;
+			if (link->getDest()!=NULL)
+				linkdest=link->getDest()->copy();
+			else if (link->getNamedDest()!=NULL)
+				linkdest=catalog->findDest(link->getNamedDest());
+
+			if (linkdest) {
+				int page;
+				if (linkdest->isPageRef()) {
+					Ref pageref=linkdest->getPageRef();
+					page=catalog->findPage(pageref.num,pageref.gen);
+				} else {
+					page=linkdest->getPageNum();
+				}
+				delete linkdest;
+
+				/*		complex		simple
+				frames		file-4.html	files.html#4
+				noframes	file.html#4	file.html#4
+				*/
+				linkName=basename(Docname);
+				GooString *str=GooString::fromInt(page);
+				if (noframes) {
+					linkName->append(".html#");
+					linkName->append(str);
+				} else {
+					if( complexMode ) {
+						linkName->append("-");
+						linkName->append(str);
+						linkName->append(".html");
+					} else {
+						linkName->append("s.html#");
+						linkName->append(str);
+					}
+				}
+				delete str;
+			}
 		}
-      }
-      dest.free();
-
-      fputs("<li>",output);
-      if (linkName)
-		fprintf(output,"<A href=\"%s\">", linkName->getCString());
-      fputs(titleStr->getCString(),output);
-      if (linkName) {
-		fputs("</A>",output);
-		delete linkName;
-      }
-      fputs("\n",output);
-      delete titleStr;
-      atLeastOne = gTrue;
-
-      newOutlineLevel(output, &curr, catalog, level+1);
-      curr.dictLookup("Next", &next);
-      curr.free();
-      curr = next;
-    } while(curr.isDict());
-    fputs("</ul>",output);
-  }
-  curr.free();
+		dest.free();
+
+		fputs("<li>",output);
+		if (linkName)
+			fprintf(output,"<A href=\"%s\">", linkName->getCString());
+		fputs(titleStr->getCString(),output);
+		if (linkName) {
+			fputs("</A>",output);
+			delete linkName;
+		}
+		fputs("\n",output);
+		delete titleStr;
+		atLeastOne = gTrue;
+
+		item->open();
+		if (item->hasKids())
+			newOutlineLevel(output, item->getKids(), catalog, level+1);
+		item->close();
+	}
+	fputs("</ul>",output);
 
-  return atLeastOne;
+	return atLeastOne;
 }
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index 474e3af..acf3c1e 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -21,6 +21,7 @@
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
 // Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -58,6 +59,7 @@
 
 class GfxState;
 class GooString;
+class PDFDoc;
 //------------------------------------------------------------------------
 // HtmlString
 //------------------------------------------------------------------------
@@ -298,7 +300,7 @@ public:
   int getPageWidth() { return maxPageWidth; }
   int getPageHeight() { return maxPageHeight; }
 
-  GBool dumpDocOutline(Catalog* catalog);
+  GBool dumpDocOutline(PDFDoc* catalog);
 
 private:
   // convert encoding into a HTML standard, or encoding->getCString if not
@@ -308,7 +310,7 @@ private:
   GooString* getLinkDest(AnnotLink *link,Catalog *catalog);
   void dumpMetaVars(FILE *);
   void doFrame(int firstPage);
-  GBool newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level = 1);
+  GBool newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
 
   FILE *fContentsFrame;
   FILE *page;                   // html file
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index fa00ae1..0bc1ca7 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -19,6 +19,7 @@
 // Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
 // Copyright (C) 2011 Steven Murdoch <Steven.Murdoch at cl.cam.ac.uk>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -404,7 +405,7 @@ int main(int argc, char *argv[]) {
 		      gTrue, gFalse, gFalse);
   	if (!xml)
 	{
-		htmlOut->dumpDocOutline(doc->getCatalog());
+		htmlOut->dumpDocOutline(doc);
 	}
   }
   


More information about the poppler mailing list