[poppler] utils/HtmlOutputDev.cc utils/HtmlOutputDev.h utils/pdf2xml.dtd utils/pdftohtml.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Thu Feb 23 14:09:45 PST 2012
utils/HtmlOutputDev.cc | 130 +++++++++++++++++++++++++++++++++++--------------
utils/HtmlOutputDev.h | 7 ++
utils/pdf2xml.dtd | 7 ++
utils/pdftohtml.cc | 5 -
4 files changed, 108 insertions(+), 41 deletions(-)
New commits:
commit 7705e65c231cc3af296bf19f5cba110cabb72e7d
Author: Albert Astals Cid <aacid at kde.org>
Date: Thu Feb 23 23:09:23 2012 +0100
Generate outlines in pdftohtml in -xml mode.
Bug 56993
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 6ab7b9d..9e113eb 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -1564,7 +1564,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
GBool bClose = gFalse;
Catalog *catalog = doc->getCatalog();
- if (!ok || xml)
+ if (!ok)
return gFalse;
Outline *outline = doc->getOutline();
@@ -1575,7 +1575,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
if (!outlines)
return gFalse;
- if (!complexMode && !xml)
+ if (!complexMode || xml)
{
output = page;
}
@@ -1610,21 +1610,30 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
}
}
- GBool done = newOutlineLevel(output, outlines, catalog);
- if (done && !complexMode)
- fputs("<hr>\n", output);
-
- if (bClose)
+ if (!xml)
{
- fputs("</BODY>\n</HTML>\n", output);
- fclose(output);
+ GBool done = newHtmlOutlineLevel(output, outlines, catalog);
+ if (done && !complexMode)
+ fputs("<hr>\n", output);
+
+ if (bClose)
+ {
+ fputs("</BODY>\n</HTML>\n", output);
+ fclose(output);
+ }
}
- return done;
+ else
+ newXmlOutlineLevel(output, outlines, catalog);
+
+ return gTrue;
#endif
}
-GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
+GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
{
+#ifdef DISABLE_OUTLINE
+ return gFalse;
+#else
GBool atLeastOne = gFalse;
if (level == 1)
@@ -1640,29 +1649,10 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
item->getTitleLength());
- // get corresponding link
GooString *linkName = NULL;;
- LinkAction *action = item->getAction();
- LinkGoTo *link = NULL;
- if (action && action->getKind() == actionGoTo)
- link = dynamic_cast<LinkGoTo*>(action);
- if (link && link->isOk()) {
- LinkDest *linkdest=NULL;
- if (link->getDest()!=NULL)
- linkdest=link->getDest()->copy();
- else if (link->getNamedDest()!=NULL)
- linkdest=catalog->findDest(link->getNamedDest());
-
- if (linkdest) {
- int page;
- if (linkdest->isPageRef()) {
- Ref pageref=linkdest->getPageRef();
- page=catalog->findPage(pageref.num,pageref.gen);
- } else {
- page=linkdest->getPageNum();
- }
- delete linkdest;
-
+ int page = getOutlinePageNum(item);
+ if (page > 0)
+ {
/* complex simple
frames file-4.html files.html#4
noframes file.html#4 file.html#4
@@ -1683,7 +1673,6 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
}
}
delete str;
- }
}
fputs("<li>",output);
@@ -1701,7 +1690,7 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
if (item->hasKids())
{
fputs("\n",output);
- newOutlineLevel(output, item->getKids(), catalog, level+1);
+ newHtmlOutlineLevel(output, item->getKids(), catalog, level+1);
}
item->close();
fputs("</li>\n",output);
@@ -1709,4 +1698,75 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c
fputs("</ul>\n",output);
return atLeastOne;
+#endif
+}
+
+void HtmlOutputDev::newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog)
+{
+#ifndef DISABLE_OUTLINE
+ fputs("<outline>\n", output);
+
+ for (int i = 0; i < outlines->getLength(); i++)
+ {
+ OutlineItem *item = (OutlineItem*)outlines->get(i);
+ GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
+ item->getTitleLength());
+ int page = getOutlinePageNum(item);
+ if (page > 0)
+ {
+ fprintf(output, "<item page=\"%d\">%s</item>\n",
+ page, titleStr->getCString());
+ }
+ else
+ {
+ fprintf(output, "<item>%s</item>\n", titleStr->getCString());
+ }
+ delete titleStr;
+
+ item->open();
+ if (item->hasKids())
+ {
+ newXmlOutlineLevel(output, item->getKids(), catalog);
+ }
+ item->close();
+ }
+
+ fputs("</outline>\n", output);
+#endif
+}
+
+#ifndef DISABLE_OUTLINE
+int HtmlOutputDev::getOutlinePageNum(OutlineItem *item)
+{
+ LinkAction *action = item->getAction();
+ LinkGoTo *link = NULL;
+ LinkDest *linkdest = NULL;
+ int pagenum = -1;
+
+ if (!action || action->getKind() != actionGoTo)
+ return pagenum;
+
+ link = dynamic_cast<LinkGoTo*>(action);
+
+ if (!link || !link->isOk())
+ return pagenum;
+
+ if (link->getDest())
+ linkdest = link->getDest()->copy();
+ else if (link->getNamedDest())
+ linkdest = catalog->findDest(link->getNamedDest());
+
+ if (!linkdest)
+ return pagenum;
+
+ if (linkdest->isPageRef()) {
+ Ref pageref = linkdest->getPageRef();
+ pagenum = catalog->findPage(pageref.num, pageref.gen);
+ } else {
+ pagenum = linkdest->getPageNum();
+ }
+
+ delete linkdest;
+ return pagenum;
}
+#endif
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index a3db998..b730ead 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -60,6 +60,7 @@
class GfxState;
class GooString;
class PDFDoc;
+class OutlineItem;
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------
@@ -316,7 +317,11 @@ private:
GooString* getLinkDest(AnnotLink *link);
void dumpMetaVars(FILE *);
void doFrame(int firstPage);
- GBool newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
+ GBool newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
+ void newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog);
+#ifndef DISABLE_OUTLINE
+ int getOutlinePageNum(OutlineItem *item);
+#endif
void drawJpegImage(GfxState *state, Stream *str);
FILE *fContentsFrame;
diff --git a/utils/pdf2xml.dtd b/utils/pdf2xml.dtd
index 9cd3880..389676c 100644
--- a/utils/pdf2xml.dtd
+++ b/utils/pdf2xml.dtd
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!ELEMENT pdf2xml (page+)>
+<!ELEMENT pdf2xml (page+, outline?)>
<!ELEMENT page (fontspec*, image*, text*)>
<!ATTLIST page
number CDATA #REQUIRED
@@ -38,3 +38,8 @@
height CDATA #REQUIRED
src CDATA #REQUIRED
>
+<!ELEMENT outline (item | outline)*>
+<!ELEMENT item (#PCDATA)>
+<!ATTLIST item
+ page CDATA #IMPLIED
+>
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index f7d3f14..7347161 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -410,10 +410,7 @@ int main(int argc, char *argv[]) {
{
doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0,
gTrue, gFalse, gFalse);
- if (!xml)
- {
- htmlOut->dumpDocOutline(doc);
- }
+ htmlOut->dumpDocOutline(doc);
}
if ((complexMode || singleHtml) && !xml && !ignore) {
More information about the poppler
mailing list