[poppler] utils/HtmlOutputDev.cc utils/HtmlOutputDev.h utils/pdftohtml.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Sun Feb 5 06:59:59 PST 2012
utils/HtmlOutputDev.cc | 197 ++++++++++++++++++++++++++-----------------------
utils/HtmlOutputDev.h | 6 -
utils/pdftohtml.cc | 3
3 files changed, 112 insertions(+), 94 deletions(-)
New commits:
commit 92ce79f47f929392f48737612a9690088573f63d
Author: Igor Slepchin <igor.redhat at gmail.com>
Date: Sun Feb 5 15:55:39 2012 +0100
Proper unicode support when dumping PDF outline.
Also use of already existing Outline class rather than parsing the outline anew.
Bug 45572
(cherry picked from commit 40f7289ab04787734b856c53d5c0139445b52635)
Conflicts:
utils/HtmlOutputDev.cc
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index c1447ad..50ceefe 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -30,7 +30,7 @@
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
// Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
-// Copyright (C) 2011 Igor Slepchin <igor.slepchin at gmail.com>
+// Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -62,6 +62,8 @@
#include "HtmlOutputDev.h"
#include "HtmlFonts.h"
#include "HtmlUtils.h"
+#include "Outline.h"
+#include "PDFDoc.h"
#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
@@ -1583,17 +1585,25 @@ void HtmlOutputDev::dumpMetaVars(FILE *file)
}
}
-GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
+GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
{
+#ifdef DISABLE_OUTLINE
+ return gFalse;
+#else
FILE * output = NULL;
GBool bClose = gFalse;
+ Catalog *catalog = doc->getCatalog();
if (!ok || xml)
- return gFalse;
+ return gFalse;
- Object *outlines = catalog->getOutline();
- if (!outlines->isDict())
- return gFalse;
+ Outline *outline = doc->getOutline();
+ if (!outline)
+ return gFalse;
+
+ GooList *outlines = outline->getItems();
+ if (!outlines)
+ return gFalse;
if (!complexMode && !xml)
{
@@ -1615,7 +1625,17 @@ GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
return gFalse;
delete str;
bClose = gTrue;
- fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output);
+
+ char *htmlEncoding =
+ HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
+
+ fprintf(output, "<HTML xmlns=\"http://www.w3.org/1999/xhtml\" " \
+ "lang=\"\" xml:lang=\"\">\n" \
+ "<HEAD>\n" \
+ "<TITLE>Document Outline</TITLE>\n" \
+ "<META http-equiv=\"Content-Type\" content=\"text/html; " \
+ "charset=%s\"/>\n" \
+ "</HEAD>\n<BODY>\n", htmlEncoding);
}
}
@@ -1629,97 +1649,92 @@ GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
fclose(output);
}
return done;
+#endif
}
-GBool HtmlOutputDev::newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level)
+GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
{
- Object curr, next;
- GBool atLeastOne = gFalse;
-
- if (node->dictLookup("First", &curr)->isDict()) {
- if (level == 1)
+ GBool atLeastOne = gFalse;
+
+ if (level == 1)
{
fputs("<A name=\"outline\"></a>", output);
fputs("<h1>Document Outline</h1>\n", output);
}
- fputs("<ul>",output);
- do {
- // get title, give up if not found
- Object title;
- if (curr.dictLookup("Title", &title)->isNull()) {
- title.free();
- break;
- }
- GooString *titleStr = new GooString(title.getString());
- title.free();
-
- // get corresponding link
- // Note: some code duplicated from HtmlOutputDev::getLinkDest().
- GooString *linkName = NULL;;
- Object dest;
- if (!curr.dictLookup("Dest", &dest)->isNull()) {
- LinkGoTo *link = new LinkGoTo(&dest);
- LinkDest *linkdest=NULL;
- if (link->getDest()!=NULL)
- linkdest=link->getDest()->copy();
- else if (link->getNamedDest()!=NULL)
- linkdest=catalog->findDest(link->getNamedDest());
-
- delete link;
- if (linkdest) {
- int page;
- if (linkdest->isPageRef()) {
- Ref pageref=linkdest->getPageRef();
- page=catalog->findPage(pageref.num,pageref.gen);
- } else {
- page=linkdest->getPageNum();
- }
- delete linkdest;
-
- /* complex simple
- frames file-4.html files.html#4
- noframes file.html#4 file.html#4
- */
- linkName=basename(Docname);
- GooString *str=GooString::fromInt(page);
- if (noframes) {
- linkName->append(".html#");
- linkName->append(str);
- } else {
- if( complexMode ) {
- linkName->append("-");
- linkName->append(str);
- linkName->append(".html");
- } else {
- linkName->append("s.html#");
- linkName->append(str);
- }
- }
- delete str;
+ fputs("<ul>",output);
+
+ for (int i = 0; i < outlines->getLength(); i++)
+ {
+ OutlineItem *item = (OutlineItem*)outlines->get(i);
+ GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
+ item->getTitleLength());
+
+ // get corresponding link
+ GooString *linkName = NULL;;
+ LinkAction *action = item->getAction();
+ LinkGoTo *link = NULL;
+ Object dest;
+ if (action && action->getKind() == actionGoTo)
+ link = dynamic_cast<LinkGoTo*>(action);
+ if (link && link->isOk()) {
+ LinkDest *linkdest=NULL;
+ if (link->getDest()!=NULL)
+ linkdest=link->getDest()->copy();
+ else if (link->getNamedDest()!=NULL)
+ linkdest=catalog->findDest(link->getNamedDest());
+
+ if (linkdest) {
+ int page;
+ if (linkdest->isPageRef()) {
+ Ref pageref=linkdest->getPageRef();
+ page=catalog->findPage(pageref.num,pageref.gen);
+ } else {
+ page=linkdest->getPageNum();
+ }
+ delete linkdest;
+
+ /* complex simple
+ frames file-4.html files.html#4
+ noframes file.html#4 file.html#4
+ */
+ linkName=basename(Docname);
+ GooString *str=GooString::fromInt(page);
+ if (noframes) {
+ linkName->append(".html#");
+ linkName->append(str);
+ } else {
+ if( complexMode ) {
+ linkName->append("-");
+ linkName->append(str);
+ linkName->append(".html");
+ } else {
+ linkName->append("s.html#");
+ linkName->append(str);
+ }
+ }
+ delete str;
+ }
}
- }
- dest.free();
-
- fputs("<li>",output);
- if (linkName)
- fprintf(output,"<A href=\"%s\">", linkName->getCString());
- fputs(titleStr->getCString(),output);
- if (linkName) {
- fputs("</A>",output);
- delete linkName;
- }
- fputs("\n",output);
- delete titleStr;
- atLeastOne = gTrue;
-
- newOutlineLevel(output, &curr, catalog, level+1);
- curr.dictLookup("Next", &next);
- curr.free();
- curr = next;
- } while(curr.isDict());
- fputs("</ul>",output);
- }
- curr.free();
+ dest.free();
+
+ fputs("<li>",output);
+ if (linkName)
+ fprintf(output,"<A href=\"%s\">", linkName->getCString());
+ fputs(titleStr->getCString(),output);
+ if (linkName) {
+ fputs("</A>",output);
+ delete linkName;
+ }
+ fputs("\n",output);
+ delete titleStr;
+ atLeastOne = gTrue;
+
+ item->open();
+ if (item->hasKids())
+ newOutlineLevel(output, item->getKids(), catalog, level+1);
+ item->close();
+ }
+ fputs("</ul>",output);
- return atLeastOne;
+ return atLeastOne;
}
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index 474e3af..acf3c1e 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -21,6 +21,7 @@
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
// Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -58,6 +59,7 @@
class GfxState;
class GooString;
+class PDFDoc;
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------
@@ -298,7 +300,7 @@ public:
int getPageWidth() { return maxPageWidth; }
int getPageHeight() { return maxPageHeight; }
- GBool dumpDocOutline(Catalog* catalog);
+ GBool dumpDocOutline(PDFDoc* catalog);
private:
// convert encoding into a HTML standard, or encoding->getCString if not
@@ -308,7 +310,7 @@ private:
GooString* getLinkDest(AnnotLink *link,Catalog *catalog);
void dumpMetaVars(FILE *);
void doFrame(int firstPage);
- GBool newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level = 1);
+ GBool newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
FILE *fContentsFrame;
FILE *page; // html file
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index fa00ae1..0bc1ca7 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -19,6 +19,7 @@
// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch at cl.cam.ac.uk>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -404,7 +405,7 @@ int main(int argc, char *argv[]) {
gTrue, gFalse, gFalse);
if (!xml)
{
- htmlOut->dumpDocOutline(doc->getCatalog());
+ htmlOut->dumpDocOutline(doc);
}
}
More information about the poppler
mailing list