[poppler] utils/pdftohtml.1 utils/pdftohtml.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Sun Aug 22 14:14:11 PDT 2010
utils/pdftohtml.1 | 8 +-
utils/pdftohtml.cc | 210 ++++++++++++++++++++++++++++++++++++++---------------
2 files changed, 158 insertions(+), 60 deletions(-)
New commits:
commit 8591c804598576556c6d24a66b6648de8ed1c4eb
Author: Mike Slegeir <tehpola at yahoo.com>
Date: Sun Aug 22 22:01:03 2010 +0100
Use splash instead of external gs invocation to render the background
Patch in bug 19404, should fix 9746 too
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index 850aa84..fbcc077 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -68,7 +68,13 @@ user password (for encrypted files)
force hidden text extraction
.TP
.B \-dev
-output device name for Ghostscript (png16m, jpeg etc)
+output device name for Ghostscript (png16m, jpeg etc).
+Unless this option is specified, Splash will be used
+.TP
+.B \-fmt
+image file format for Splash output (png or jpg).
+If complex is selected, but neither -fmt or -dev are specified,
+-fmt png will be assumed
.TP
.B \-nomerge
do not merge paragraphs
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 3c74c6e..87a4d69 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -15,6 +15,7 @@
//
// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid at kde.org>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
+// Copyright (C) 2010 Mike Slegeir <tehpola at yahoo.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -44,6 +45,10 @@
#include "PDFDoc.h"
#include "PDFDocFactory.h"
#include "HtmlOutputDev.h"
+#ifdef HAVE_SPLASH
+#include "SplashOutputDev.h"
+#include "splash/SplashBitmap.h"
+#endif
#include "PSOutputDev.h"
#include "GlobalParams.h"
#include "Error.h"
@@ -62,7 +67,8 @@ static GBool printHelp = gFalse;
GBool printHtml = gFalse;
GBool complexMode=gFalse;
GBool ignore=gFalse;
-//char extension[5]=".png";
+GBool useSplash=gTrue;
+char extension[5]="png";
double scale=1.5;
GBool noframes=gFalse;
GBool stout=gFalse;
@@ -74,7 +80,7 @@ GBool showHidden = gFalse;
GBool noMerge = gFalse;
static char ownerPassword[33] = "";
static char userPassword[33] = "";
-static char gsDevice[33] = "png16m";
+static char gsDevice[33] = "none";
static GBool printVersion = gFalse;
static GooString* getInfoString(Dict *infoDict, char *key);
@@ -117,6 +123,8 @@ static const ArgDesc argDesc[] = {
"output text encoding name"},
{"-dev", argString, gsDevice, sizeof(gsDevice),
"output device name for Ghostscript (png16m, jpeg etc)"},
+ {"-fmt", argString, extension, sizeof(extension),
+ "image file format for Splash output (png or jpg)"},
{"-v", argFlag, &printVersion, 0,
"print copyright and version info"},
{"-opw", argString, ownerPassword, sizeof(ownerPassword),
@@ -128,6 +136,32 @@ static const ArgDesc argDesc[] = {
{NULL}
};
+#ifdef HAVE_SPLASH
+class SplashOutputDevNoText : public SplashOutputDev {
+public:
+ SplashOutputDevNoText(SplashColorMode colorModeA, int bitmapRowPadA,
+ GBool reverseVideoA, SplashColorPtr paperColorA,
+ GBool bitmapTopDownA = gTrue,
+ GBool allowAntialiasA = gTrue) : SplashOutputDev(colorModeA,
+ bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA,
+ allowAntialiasA) { }
+ virtual ~SplashOutputDevNoText() { }
+
+ void drawChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double originX, double originY,
+ CharCode code, int nBytes, Unicode *u, int uLen) { }
+ GBool beginType3Char(GfxState *state, double x, double y,
+ double dx, double dy,
+ CharCode code, Unicode *u, int uLen) { return false; }
+ void endType3Char(GfxState *state) { }
+ void beginTextObject(GfxState *state) { }
+ GBool deviceHasTextClip(GfxState *state) { return false; }
+ void endTextObject(GfxState *state) { }
+ GBool interpretType3Chars() { return gFalse; }
+};
+#endif
+
int main(int argc, char *argv[]) {
PDFDoc *doc = NULL;
GooString *fileName = NULL;
@@ -136,10 +170,12 @@ int main(int argc, char *argv[]) {
GooString *htmlFileName = NULL;
GooString *psFileName = NULL;
HtmlOutputDev *htmlOut = NULL;
+#ifdef HAVE_SPLASH
+ SplashOutputDev *splashOut = NULL;
+#endif
PSOutputDev *psOut = NULL;
GBool ok;
char *p;
- char extension[16] = "png";
GooString *ownerPW, *userPW;
Object info;
char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL};
@@ -289,16 +325,35 @@ int main(int argc, char *argv[]) {
info.free();
if( !docTitle ) docTitle = new GooString(htmlFileName);
- /* determine extensions of output backgroun images */
- {int i;
- for(i = 0; extsList[i]; i++)
- {
- if( strstr(gsDevice, extsList[i]) != (char *) NULL )
- {
- strncpy(extension, extsList[i], sizeof(extension));
- break;
- }
- }}
+ if( strcmp("none", gsDevice) ) {
+ useSplash = gFalse;
+ /* determine extensions of output background images */
+ int i;
+ for(i = 0; extsList[i]; i++)
+ {
+ if( strstr(gsDevice, extsList[i]) != (char *) NULL )
+ {
+ strncpy(extension, extsList[i], sizeof(extension));
+ break;
+ }
+ }
+ }
+
+#ifndef HAVE_SPLASH
+ if( useSplash ) {
+ fprintf(stderr, "You are trying to use the -fmt option but your pdftohtml was built without support for it. Please use the -dev option\n");
+ delete docTitle;
+ delete author;
+ delete keywords;
+ delete subject;
+ delete date;
+ delete htmlFileName;
+ delete globalParams;
+ delete fileName;
+ delete doc;
+ return -1;
+ }
+#endif
rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
@@ -342,55 +397,92 @@ int main(int argc, char *argv[]) {
}
if( complexMode && !xml && !ignore ) {
- int h=xoutRound(htmlOut->getPageHeight()/scale);
- int w=xoutRound(htmlOut->getPageWidth()/scale);
- //int h=xoutRound(doc->getPageHeight(1)/scale);
- //int w=xoutRound(doc->getPageWidth(1)/scale);
-
- psFileName = new GooString(htmlFileName->getCString());
- psFileName->append(".ps");
-
- psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
- doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
- psOut->setDisplayText(gFalse);
- doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
- gTrue, gFalse, gFalse);
- delete psOut;
+ if(useSplash) {
+#ifdef HAVE_SPLASH
+ GooString *imgFileName = NULL;
+ // White paper color
+ SplashColor color;
+ color[0] = color[1] = color[2] = 255;
+ // If the user specified "jpg" use JPEG, otherwise PNG
+ SplashImageFileFormat format = strcmp(extension, "jpg") ?
+ splashFormatPng : splashFormatJpeg;
+
+ splashOut = new SplashOutputDevNoText(splashModeRGB8, 4, gFalse, color);
+ splashOut->startDoc(doc->getXRef());
+
+ for (int pg = firstPage; pg <= lastPage; ++pg) {
+ int pg_w = doc->getPageMediaWidth(pg) / scale;
+ int pg_h = doc->getPageMediaHeight(pg) / scale;
+ if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) {
+ int tmp = pg_w;
+ pg_w = pg_h;
+ pg_h = tmp;
+ }
+
+ doc->displayPage(splashOut, pg, 72, 72, 0, gTrue, gFalse, gFalse);
+ SplashBitmap *bitmap = splashOut->getBitmap();
+
+ imgFileName = GooString::format("{0:s}{1:03d}.{2:s}",
+ htmlFileName->getCString(), pg, extension);
+
+ bitmap->writeImgFile(format, imgFileName->getCString(), 72, 72);
- /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h,
+ delete imgFileName;
+ }
+
+ delete splashOut;
+#endif
+ } else {
+ int h=xoutRound(htmlOut->getPageHeight()/scale);
+ int w=xoutRound(htmlOut->getPageWidth()/scale);
+ //int h=xoutRound(doc->getPageHeight(1)/scale);
+ //int w=xoutRound(doc->getPageWidth(1)/scale);
+
+ psFileName = new GooString(htmlFileName->getCString());
+ psFileName->append(".ps");
+
+ psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+ doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
+ psOut->setDisplayText(gFalse);
+ doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
+ gTrue, gFalse, gFalse);
+ delete psOut;
+
+ /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h,
psFileName->getCString());*/
-
- GooString *gsCmd = new GooString(GHOSTSCRIPT);
- GooString *tw, *th, *sc;
- gsCmd->append(" -sDEVICE=");
- gsCmd->append(gsDevice);
- gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
- sc = GooString::fromInt(static_cast<int>(72*scale));
- gsCmd->append(sc);
- gsCmd->append(" -sOutputFile=");
- gsCmd->append("\"");
- gsCmd->append(htmlFileName);
- gsCmd->append("%03d.");
- gsCmd->append(extension);
- gsCmd->append("\" -g");
- tw = GooString::fromInt(static_cast<int>(scale*w));
- gsCmd->append(tw);
- gsCmd->append("x");
- th = GooString::fromInt(static_cast<int>(scale*h));
- gsCmd->append(th);
- gsCmd->append(" -q \"");
- gsCmd->append(psFileName);
- gsCmd->append("\"");
-// printf("running: %s\n", gsCmd->getCString());
- if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
- error(-1, "Failed to launch Ghostscript!\n");
+
+ GooString *gsCmd = new GooString(GHOSTSCRIPT);
+ GooString *tw, *th, *sc;
+ gsCmd->append(" -sDEVICE=");
+ gsCmd->append(gsDevice);
+ gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
+ sc = GooString::fromInt(static_cast<int>(72*scale));
+ gsCmd->append(sc);
+ gsCmd->append(" -sOutputFile=");
+ gsCmd->append("\"");
+ gsCmd->append(htmlFileName);
+ gsCmd->append("%03d.");
+ gsCmd->append(extension);
+ gsCmd->append("\" -g");
+ tw = GooString::fromInt(static_cast<int>(scale*w));
+ gsCmd->append(tw);
+ gsCmd->append("x");
+ th = GooString::fromInt(static_cast<int>(scale*h));
+ gsCmd->append(th);
+ gsCmd->append(" -q \"");
+ gsCmd->append(psFileName);
+ gsCmd->append("\"");
+ // printf("running: %s\n", gsCmd->getCString());
+ if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
+ error(-1, "Failed to launch Ghostscript!\n");
+ }
+ unlink(psFileName->getCString());
+ delete tw;
+ delete th;
+ delete sc;
+ delete gsCmd;
+ delete psFileName;
}
- unlink(psFileName->getCString());
- delete tw;
- delete th;
- delete sc;
- delete gsCmd;
- delete psFileName;
}
delete htmlOut;
More information about the poppler
mailing list