[poppler] utils/HtmlFonts.cc utils/HtmlFonts.h utils/HtmlOutputDev.cc utils/pdftohtml.1 utils/pdftohtml.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Thu Sep 16 15:24:00 PDT 2010
utils/HtmlFonts.cc | 11 +++++-
utils/HtmlFonts.h | 18 +++++++++-
utils/HtmlOutputDev.cc | 84 ++++++++++++++++++++++++++++++-------------------
utils/pdftohtml.1 | 3 +
utils/pdftohtml.cc | 15 +++++++-
5 files changed, 93 insertions(+), 38 deletions(-)
New commits:
commit 3b4a901a4431814590449b6cd5ea418f4d6c1172
Author: Albert Astals Cid <aacid at kde.org>
Date: Thu Sep 16 23:23:03 2010 +0100
Add -s option
Writes a single html file
Since git does not allow multiple authors i'm adding them here
OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index d2cbfd5..4b592d5 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -20,6 +20,7 @@
// Copyright (C) 2007 Albert Astals Cid <aacid at kde.org>
// Copyright (C) 2008 Boris Toloknov <tlknv at yandex.ru>
// Copyright (C) 2008 Tomas Are Haavet <tomasare at gmail.com>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -288,12 +289,14 @@ int HtmlFontAccu::AddFont(const HtmlFont& font){
}
// get CSS font name for font #i
-GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
+GooString* HtmlFontAccu::getCSStyle(int i, GooString* content, int j){
GooString *tmp;
GooString *iStr=GooString::fromInt(i);
+ GooString *jStr=GooString::fromInt(j);
if (!xml) {
tmp = new GooString("<span class=\"ft");
+ tmp->append(jStr);
tmp->append(iStr);
tmp->append("\">");
tmp->append(content);
@@ -303,14 +306,16 @@ GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
tmp->append(content);
}
+ delete jStr;
delete iStr;
return tmp;
}
// get CSS font definition for font #i
-GooString* HtmlFontAccu::CSStyle(int i){
+GooString* HtmlFontAccu::CSStyle(int i, int j){
GooString *tmp=new GooString();
GooString *iStr=GooString::fromInt(i);
+ GooString *jStr=GooString::fromInt(j);
GooVector<HtmlFont>::iterator g=accu->begin();
g+=i;
@@ -322,6 +327,7 @@ GooString* HtmlFontAccu::CSStyle(int i){
if(!xml){
tmp->append(".ft");
+ tmp->append(jStr);
tmp->append(iStr);
tmp->append("{font-size:");
tmp->append(Size);
@@ -352,6 +358,7 @@ GooString* HtmlFontAccu::CSStyle(int i){
delete fontName;
delete colorStr;
+ delete jStr;
delete iStr;
delete Size;
return tmp;
diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h
index df2b570..54deaf8 100644
--- a/utils/HtmlFonts.h
+++ b/utils/HtmlFonts.h
@@ -10,6 +10,20 @@
//
//========================================================================
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
#ifndef _HTML_FONTS_H
#define _HTML_FONTS_H
#include "goo/GooVector.h"
@@ -89,8 +103,8 @@ public:
g+=i;
return g;
}
- GooString* getCSStyle (int i, GooString* content);
- GooString* CSStyle(int i);
+ GooString* getCSStyle (int i,GooString* content, int j = 0);
+ GooString* CSStyle(int i, int j = 0);
int size() const {return accu->size();}
};
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index dbf677f..8ff8f08 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -27,6 +27,7 @@
// Copyright (C) 2009 Reece Dunn <msclrhd at gmail.com>
// Copyright (C) 2010 Adrian Johnson <ajohnson at redneon.com>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -65,6 +66,7 @@ GooList *HtmlOutputDev::imgList=new GooList();
extern double scale;
extern GBool complexMode;
+extern GBool singleHtml;
extern GBool ignore;
extern GBool printCommands;
extern GBool printHtml;
@@ -670,22 +672,33 @@ void HtmlPage::dumpComplex(FILE *file, int page){
{
GooString* pgNum=GooString::fromInt(page);
tmp = new GooString(DocName);
- tmp->append('-')->append(pgNum)->append(".html");
+ if (!singleHtml){
+ tmp->append('-')->append(pgNum)->append(".html");
+ pageFile = fopen(tmp->getCString(), "w");
+ } else {
+ tmp->append("-html")->append(".html");
+ pageFile = fopen(tmp->getCString(), "a");
+ }
delete pgNum;
-
- if (!(pageFile = fopen(tmp->getCString(), "w"))) {
+ if (!pageFile) {
error(-1, "Couldn't open html file '%s'", tmp->getCString());
delete tmp;
return;
}
- delete tmp;
- fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n",
- DOCTYPE, page);
+ if (!singleHtml)
+ fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
+ else
+ fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
+
+ delete tmp;
htmlEncoding = HtmlOutputDev::mapEncodingToHtml
(globalParams->getTextEncodingName());
- fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ if (!singleHtml)
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ else
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n <br>\n", htmlEncoding);
}
else
{
@@ -701,7 +714,11 @@ void HtmlPage::dumpComplex(FILE *file, int page){
fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
for(int i=fontsPageMarker;i!=fonts->size();i++) {
- GooString *fontCSStyle = fonts->CSStyle(i);
+ GooString *fontCSStyle;
+ if (!singleHtml)
+ fontCSStyle = fonts->CSStyle(i);
+ else
+ fontCSStyle = fonts->CSStyle(i,page);
fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
delete fontCSStyle;
}
@@ -732,7 +749,10 @@ void HtmlPage::dumpComplex(FILE *file, int page){
xoutRound(tmp1->yMin),
xoutRound(tmp1->xMin));
fputs("<nobr>",pageFile);
- str1=fonts->getCSStyle(tmp1->fontpos, str);
+ if (!singleHtml)
+ str1=fonts->getCSStyle(tmp1->fontpos, str);
+ else
+ str1=fonts->getCSStyle(tmp1->fontpos, str, page);
fputs(str1->getCString(),pageFile);
delete str;
delete str1;
@@ -752,7 +772,7 @@ void HtmlPage::dumpComplex(FILE *file, int page){
void HtmlPage::dump(FILE *f, int pageNum)
{
- if (complexMode)
+ if (complexMode || singleHtml)
{
if (xml) dumpAsXML(f, pageNum);
if (!xml) dumpComplex(f, pageNum);
@@ -943,28 +963,30 @@ HtmlOutputDev::HtmlOutputDev(char *fileName, char *title,
// for non-xml output (complex or simple) with frames generate the left frame
if(!xml && !noframes)
{
- GooString* left=new GooString(fileName);
- left->append("_ind.html");
-
- doFrame(firstPage);
-
- if (!(fContentsFrame = fopen(left->getCString(), "w")))
- {
- error(-1, "Couldn't open html file '%s'", left->getCString());
- delete left;
- return;
+ if (!singleHtml)
+ {
+ GooString* left=new GooString(fileName);
+ left->append("_ind.html");
+
+ doFrame(firstPage);
+
+ if (!(fContentsFrame = fopen(left->getCString(), "w")))
+ {
+ error(-1, "Couldn't open html file '%s'", left->getCString());
+ delete left;
+ return;
+ }
+ delete left;
+ fputs(DOCTYPE, fContentsFrame);
+ fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
+
+ if (doOutline)
+ {
+ GooString *str = basename(Docname);
+ fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+ delete str;
+ }
}
- delete left;
- fputs(DOCTYPE, fContentsFrame);
- fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
-
- if (doOutline)
- {
- GooString *str = basename(Docname);
- fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
- delete str;
- }
-
if (!complexMode)
{ /* not in complex mode */
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index 6cdc6c6..bbdfa56 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -40,6 +40,9 @@ exchange .pdf links with .html
.B \-c
generate complex output
.TP
+.B \-s
+generate single html that includes all pages
+.TP
.B \-i
ignore images
.TP
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 5762f90..5323b6e 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -17,6 +17,7 @@
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2010 Mike Slegeir <tehpola at yahoo.com>
// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -67,6 +68,7 @@ GBool printCommands = gTrue;
static GBool printHelp = gFalse;
GBool printHtml = gFalse;
GBool complexMode=gFalse;
+GBool singleHtml=gFalse; // singleHtml
GBool ignore=gFalse;
GBool useSplash=gTrue;
char extension[5]="png";
@@ -107,6 +109,8 @@ static const ArgDesc argDesc[] = {
"exchange .pdf links by .html"},
{"-c", argFlag, &complexMode, 0,
"generate complex document"},
+ {"-s", argFlag, &singleHtml, 0,
+ "generate single document that includes all pages"},
{"-i", argFlag, &ignore, 0,
"ignore images"},
{"-noframes", argFlag, &noframes, 0,
@@ -293,7 +297,7 @@ int main(int argc, char *argv[]) {
if (scale>3.0) scale=3.0;
if (scale<0.5) scale=0.5;
- if (complexMode) {
+ if (complexMode || singleHtml) {
//noframes=gFalse;
stout=gFalse;
}
@@ -301,11 +305,13 @@ int main(int argc, char *argv[]) {
if (stout) {
noframes=gTrue;
complexMode=gFalse;
+ singleHtml=gFalse;
}
if (xml)
{
complexMode = gTrue;
+ singleHtml = gFalse;
noframes = gTrue;
noMerge = gTrue;
}
@@ -359,7 +365,10 @@ int main(int argc, char *argv[]) {
}
#endif
- rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
+ if (!singleHtml)
+ rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
+ else
+ rawOrder = singleHtml;
// write text file
htmlOut = new HtmlOutputDev(htmlFileName->getCString(),
@@ -400,7 +409,7 @@ int main(int argc, char *argv[]) {
}
}
- if( complexMode && !xml && !ignore ) {
+ if ((complexMode || singleHtml) && !xml && !ignore) {
if(useSplash) {
#ifdef HAVE_SPLASH
GooString *imgFileName = NULL;
More information about the poppler
mailing list