[poppler] utils/pdftohtml.1 utils/pdftohtml.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Sun Aug 22 14:14:11 PDT 2010


 utils/pdftohtml.1  |    8 +-
 utils/pdftohtml.cc |  210 ++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 158 insertions(+), 60 deletions(-)

New commits:
commit 8591c804598576556c6d24a66b6648de8ed1c4eb
Author: Mike Slegeir <tehpola at yahoo.com>
Date:   Sun Aug 22 22:01:03 2010 +0100

    Use splash instead of external gs invocation to render the background
    
    Patch in bug 19404, should fix 9746 too

diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index 850aa84..fbcc077 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -68,7 +68,13 @@ user password (for encrypted files)
 force hidden text extraction
 .TP
 .B \-dev 
-output device name for Ghostscript (png16m, jpeg etc)
+output device name for Ghostscript (png16m, jpeg etc).
+Unless this option is specified, Splash will be used
+.TP
+.B \-fmt
+image file format for Splash output (png or jpg).
+If complex is selected, but neither -fmt or -dev are specified,
+-fmt png will be assumed
 .TP
 .B \-nomerge
 do not merge paragraphs
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 3c74c6e..87a4d69 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -15,6 +15,7 @@
 //
 // Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
+// Copyright (C) 2010 Mike Slegeir <tehpola at yahoo.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -44,6 +45,10 @@
 #include "PDFDoc.h"
 #include "PDFDocFactory.h"
 #include "HtmlOutputDev.h"
+#ifdef HAVE_SPLASH
+#include "SplashOutputDev.h"
+#include "splash/SplashBitmap.h"
+#endif
 #include "PSOutputDev.h"
 #include "GlobalParams.h"
 #include "Error.h"
@@ -62,7 +67,8 @@ static GBool printHelp = gFalse;
 GBool printHtml = gFalse;
 GBool complexMode=gFalse;
 GBool ignore=gFalse;
-//char extension[5]=".png";
+GBool useSplash=gTrue;
+char extension[5]="png";
 double scale=1.5;
 GBool noframes=gFalse;
 GBool stout=gFalse;
@@ -74,7 +80,7 @@ GBool showHidden = gFalse;
 GBool noMerge = gFalse;
 static char ownerPassword[33] = "";
 static char userPassword[33] = "";
-static char gsDevice[33] = "png16m";
+static char gsDevice[33] = "none";
 static GBool printVersion = gFalse;
 
 static GooString* getInfoString(Dict *infoDict, char *key);
@@ -117,6 +123,8 @@ static const ArgDesc argDesc[] = {
    "output text encoding name"},
   {"-dev",    argString,   gsDevice,       sizeof(gsDevice),
    "output device name for Ghostscript (png16m, jpeg etc)"},
+  {"-fmt",    argString,   extension,      sizeof(extension),
+   "image file format for Splash output (png or jpg)"},
   {"-v",      argFlag,     &printVersion,  0,
    "print copyright and version info"},
   {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
@@ -128,6 +136,32 @@ static const ArgDesc argDesc[] = {
   {NULL}
 };
 
+#ifdef HAVE_SPLASH
+class SplashOutputDevNoText : public SplashOutputDev {
+public:
+  SplashOutputDevNoText(SplashColorMode colorModeA, int bitmapRowPadA,
+        GBool reverseVideoA, SplashColorPtr paperColorA,
+        GBool bitmapTopDownA = gTrue,
+        GBool allowAntialiasA = gTrue) : SplashOutputDev(colorModeA,
+            bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA,
+            allowAntialiasA) { }
+  virtual ~SplashOutputDevNoText() { }
+  
+  void drawChar(GfxState *state, double x, double y,
+      double dx, double dy,
+      double originX, double originY,
+      CharCode code, int nBytes, Unicode *u, int uLen) { }
+  GBool beginType3Char(GfxState *state, double x, double y,
+      double dx, double dy,
+      CharCode code, Unicode *u, int uLen) { return false; }
+  void endType3Char(GfxState *state) { }
+  void beginTextObject(GfxState *state) { }
+  GBool deviceHasTextClip(GfxState *state) { return false; }
+  void endTextObject(GfxState *state) { }
+  GBool interpretType3Chars() { return gFalse; }
+};
+#endif
+
 int main(int argc, char *argv[]) {
   PDFDoc *doc = NULL;
   GooString *fileName = NULL;
@@ -136,10 +170,12 @@ int main(int argc, char *argv[]) {
   GooString *htmlFileName = NULL;
   GooString *psFileName = NULL;
   HtmlOutputDev *htmlOut = NULL;
+#ifdef HAVE_SPLASH
+  SplashOutputDev *splashOut = NULL;
+#endif
   PSOutputDev *psOut = NULL;
   GBool ok;
   char *p;
-  char extension[16] = "png";
   GooString *ownerPW, *userPW;
   Object info;
   char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL};
@@ -289,16 +325,35 @@ int main(int argc, char *argv[]) {
   info.free();
   if( !docTitle ) docTitle = new GooString(htmlFileName);
 
-  /* determine extensions of output backgroun images */
-  {int i;
-  for(i = 0; extsList[i]; i++)
-  {
-	  if( strstr(gsDevice, extsList[i]) != (char *) NULL )
-	  {
-		  strncpy(extension, extsList[i], sizeof(extension));
-		  break;
-	  }
-  }}
+  if( strcmp("none", gsDevice) ) {
+    useSplash = gFalse;
+    /* determine extensions of output background images */
+    int i;
+    for(i = 0; extsList[i]; i++)
+    {
+      if( strstr(gsDevice, extsList[i]) != (char *) NULL )
+      {
+        strncpy(extension, extsList[i], sizeof(extension));
+        break;
+      }
+    }
+  }
+
+#ifndef HAVE_SPLASH
+  if( useSplash ) {
+    fprintf(stderr, "You are trying to use the -fmt option but your pdftohtml was built without support for it. Please use the -dev option\n");
+    delete docTitle;
+    delete author;
+    delete keywords;
+    delete subject;
+    delete date;
+    delete htmlFileName;
+    delete globalParams;
+    delete fileName;
+    delete doc;
+    return -1;
+  }
+#endif
 
   rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
 
@@ -342,55 +397,92 @@ int main(int argc, char *argv[]) {
   }
   
   if( complexMode && !xml && !ignore ) {
-    int h=xoutRound(htmlOut->getPageHeight()/scale);
-    int w=xoutRound(htmlOut->getPageWidth()/scale);
-    //int h=xoutRound(doc->getPageHeight(1)/scale);
-    //int w=xoutRound(doc->getPageWidth(1)/scale);
-
-    psFileName = new GooString(htmlFileName->getCString());
-    psFileName->append(".ps");
-
-    psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
-			    doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
-    psOut->setDisplayText(gFalse);
-    doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
-		      gTrue, gFalse, gFalse);
-    delete psOut;
+    if(useSplash) {
+#ifdef HAVE_SPLASH
+      GooString *imgFileName = NULL;
+      // White paper color
+      SplashColor color;
+      color[0] = color[1] = color[2] = 255;
+      // If the user specified "jpg" use JPEG, otherwise PNG
+      SplashImageFileFormat format = strcmp(extension, "jpg") ?
+          splashFormatPng : splashFormatJpeg;
+
+      splashOut = new SplashOutputDevNoText(splashModeRGB8, 4, gFalse, color);
+      splashOut->startDoc(doc->getXRef());
+
+      for (int pg = firstPage; pg <= lastPage; ++pg) {
+        int pg_w = doc->getPageMediaWidth(pg) / scale;
+        int pg_h = doc->getPageMediaHeight(pg) / scale;
+        if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) {
+          int tmp = pg_w;
+          pg_w = pg_h;
+          pg_h = tmp;
+        }
+
+        doc->displayPage(splashOut, pg, 72, 72, 0, gTrue, gFalse, gFalse);
+        SplashBitmap *bitmap = splashOut->getBitmap();
+
+        imgFileName = GooString::format("{0:s}{1:03d}.{2:s}", 
+            htmlFileName->getCString(), pg, extension);
+
+        bitmap->writeImgFile(format, imgFileName->getCString(), 72, 72);
 
-    /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h,
+        delete imgFileName;
+      }
+
+      delete splashOut;
+#endif
+    } else {
+      int h=xoutRound(htmlOut->getPageHeight()/scale);
+      int w=xoutRound(htmlOut->getPageWidth()/scale);
+      //int h=xoutRound(doc->getPageHeight(1)/scale);
+      //int w=xoutRound(doc->getPageWidth(1)/scale);
+
+      psFileName = new GooString(htmlFileName->getCString());
+      psFileName->append(".ps");
+
+      psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+          doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
+      psOut->setDisplayText(gFalse);
+      doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
+          gTrue, gFalse, gFalse);
+      delete psOut;
+
+      /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h,
       psFileName->getCString());*/
-    
-    GooString *gsCmd = new GooString(GHOSTSCRIPT);
-    GooString *tw, *th, *sc;
-    gsCmd->append(" -sDEVICE=");
-	gsCmd->append(gsDevice);
-	gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
-    sc = GooString::fromInt(static_cast<int>(72*scale));
-    gsCmd->append(sc);
-    gsCmd->append(" -sOutputFile=");
-    gsCmd->append("\"");
-    gsCmd->append(htmlFileName);
-    gsCmd->append("%03d.");
-	gsCmd->append(extension);
-	gsCmd->append("\" -g");
-    tw = GooString::fromInt(static_cast<int>(scale*w));
-    gsCmd->append(tw);
-    gsCmd->append("x");
-    th = GooString::fromInt(static_cast<int>(scale*h));
-    gsCmd->append(th);
-    gsCmd->append(" -q \"");
-    gsCmd->append(psFileName);
-    gsCmd->append("\"");
-//    printf("running: %s\n", gsCmd->getCString());
-    if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
-      error(-1, "Failed to launch Ghostscript!\n");
+
+      GooString *gsCmd = new GooString(GHOSTSCRIPT);
+      GooString *tw, *th, *sc;
+      gsCmd->append(" -sDEVICE=");
+      gsCmd->append(gsDevice);
+      gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
+      sc = GooString::fromInt(static_cast<int>(72*scale));
+      gsCmd->append(sc);
+      gsCmd->append(" -sOutputFile=");
+      gsCmd->append("\"");
+      gsCmd->append(htmlFileName);
+      gsCmd->append("%03d.");
+      gsCmd->append(extension);
+      gsCmd->append("\" -g");
+      tw = GooString::fromInt(static_cast<int>(scale*w));
+      gsCmd->append(tw);
+      gsCmd->append("x");
+      th = GooString::fromInt(static_cast<int>(scale*h));
+      gsCmd->append(th);
+      gsCmd->append(" -q \"");
+      gsCmd->append(psFileName);
+      gsCmd->append("\"");
+      //    printf("running: %s\n", gsCmd->getCString());
+      if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
+        error(-1, "Failed to launch Ghostscript!\n");
+      }
+      unlink(psFileName->getCString());
+      delete tw;
+      delete th;
+      delete sc;
+      delete gsCmd;
+      delete psFileName;
     }
-    unlink(psFileName->getCString());
-    delete tw;
-    delete th;
-    delete sc;
-    delete gsCmd;
-    delete psFileName;
   }
   
   delete htmlOut;


More information about the poppler mailing list