[poppler] utils/HtmlFonts.cc utils/HtmlFonts.h utils/HtmlOutputDev.cc utils/pdftohtml.1 utils/pdftohtml.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Thu Sep 16 15:24:00 PDT 2010


 utils/HtmlFonts.cc     |   11 +++++-
 utils/HtmlFonts.h      |   18 +++++++++-
 utils/HtmlOutputDev.cc |   84 ++++++++++++++++++++++++++++++-------------------
 utils/pdftohtml.1      |    3 +
 utils/pdftohtml.cc     |   15 +++++++-
 5 files changed, 93 insertions(+), 38 deletions(-)

New commits:
commit 3b4a901a4431814590449b6cd5ea418f4d6c1172
Author: Albert Astals Cid <aacid at kde.org>
Date:   Thu Sep 16 23:23:03 2010 +0100

    Add -s option
    
    Writes a single html file
    Since git does not allow multiple authors i'm adding them here
    OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)

diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index d2cbfd5..4b592d5 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -20,6 +20,7 @@
 // Copyright (C) 2007 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2008 Boris Toloknov <tlknv at yandex.ru>
 // Copyright (C) 2008 Tomas Are Haavet <tomasare at gmail.com>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -288,12 +289,14 @@ int HtmlFontAccu::AddFont(const HtmlFont& font){
 }
 
 // get CSS font name for font #i 
-GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
+GooString* HtmlFontAccu::getCSStyle(int i, GooString* content, int j){
   GooString *tmp;
   GooString *iStr=GooString::fromInt(i);
+  GooString *jStr=GooString::fromInt(j);
   
   if (!xml) {
     tmp = new GooString("<span class=\"ft");
+    tmp->append(jStr);
     tmp->append(iStr);
     tmp->append("\">");
     tmp->append(content);
@@ -303,14 +306,16 @@ GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
     tmp->append(content);
   }
 
+  delete jStr;
   delete iStr;
   return tmp;
 }
 
 // get CSS font definition for font #i 
-GooString* HtmlFontAccu::CSStyle(int i){
+GooString* HtmlFontAccu::CSStyle(int i, int j){
    GooString *tmp=new GooString();
    GooString *iStr=GooString::fromInt(i);
+   GooString *jStr=GooString::fromInt(j);
 
    GooVector<HtmlFont>::iterator g=accu->begin();
    g+=i;
@@ -322,6 +327,7 @@ GooString* HtmlFontAccu::CSStyle(int i){
    
    if(!xml){
      tmp->append(".ft");
+     tmp->append(jStr);
      tmp->append(iStr);
      tmp->append("{font-size:");
      tmp->append(Size);
@@ -352,6 +358,7 @@ GooString* HtmlFontAccu::CSStyle(int i){
 
    delete fontName;
    delete colorStr;
+   delete jStr;
    delete iStr;
    delete Size;
    return tmp;
diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h
index df2b570..54deaf8 100644
--- a/utils/HtmlFonts.h
+++ b/utils/HtmlFonts.h
@@ -10,6 +10,20 @@
 //
 //========================================================================
 
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
 #ifndef _HTML_FONTS_H
 #define _HTML_FONTS_H
 #include "goo/GooVector.h"
@@ -89,8 +103,8 @@ public:
     g+=i;  
     return g;
   } 
-  GooString* getCSStyle (int i, GooString* content);
-  GooString* CSStyle(int i);
+  GooString* getCSStyle (int i,GooString* content, int j = 0);
+  GooString* CSStyle(int i, int j = 0);
   int size() const {return accu->size();}
   
 };  
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index dbf677f..8ff8f08 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -27,6 +27,7 @@
 // Copyright (C) 2009 Reece Dunn <msclrhd at gmail.com>
 // Copyright (C) 2010 Adrian Johnson <ajohnson at redneon.com>
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -65,6 +66,7 @@ GooList *HtmlOutputDev::imgList=new GooList();
 
 extern double scale;
 extern GBool complexMode;
+extern GBool singleHtml;
 extern GBool ignore;
 extern GBool printCommands;
 extern GBool printHtml;
@@ -670,22 +672,33 @@ void HtmlPage::dumpComplex(FILE *file, int page){
   {
       GooString* pgNum=GooString::fromInt(page);
       tmp = new GooString(DocName);
-      tmp->append('-')->append(pgNum)->append(".html");
+      if (!singleHtml){
+            tmp->append('-')->append(pgNum)->append(".html");
+            pageFile = fopen(tmp->getCString(), "w");
+      } else {
+            tmp->append("-html")->append(".html");
+            pageFile = fopen(tmp->getCString(), "a");
+      }
       delete pgNum;
-  
-      if (!(pageFile = fopen(tmp->getCString(), "w"))) {
+      if (!pageFile) {
 	  error(-1, "Couldn't open html file '%s'", tmp->getCString());
 	  delete tmp;
 	  return;
       } 
-      delete tmp;
 
-      fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n",
-	      DOCTYPE, page);
+      if (!singleHtml)
+          fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
+      else
+          fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
+
+      delete tmp;
 
       htmlEncoding = HtmlOutputDev::mapEncodingToHtml
 	  (globalParams->getTextEncodingName());
-      fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+      if (!singleHtml)
+          fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+      else
+          fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n <br>\n", htmlEncoding);
   }
   else 
   {
@@ -701,7 +714,11 @@ void HtmlPage::dumpComplex(FILE *file, int page){
    
   fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
   for(int i=fontsPageMarker;i!=fonts->size();i++) {
-    GooString *fontCSStyle = fonts->CSStyle(i);
+    GooString *fontCSStyle;
+    if (!singleHtml)
+         fontCSStyle = fonts->CSStyle(i);
+    else
+         fontCSStyle = fonts->CSStyle(i,page);
     fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
     delete fontCSStyle;
   }
@@ -732,7 +749,10 @@ void HtmlPage::dumpComplex(FILE *file, int page){
 	      xoutRound(tmp1->yMin),
 	      xoutRound(tmp1->xMin));
       fputs("<nobr>",pageFile); 
-      str1=fonts->getCSStyle(tmp1->fontpos, str);  
+      if (!singleHtml)
+          str1=fonts->getCSStyle(tmp1->fontpos, str);
+      else
+          str1=fonts->getCSStyle(tmp1->fontpos, str, page);
       fputs(str1->getCString(),pageFile);
       delete str;      
       delete str1;
@@ -752,7 +772,7 @@ void HtmlPage::dumpComplex(FILE *file, int page){
 
 void HtmlPage::dump(FILE *f, int pageNum) 
 {
-  if (complexMode)
+  if (complexMode || singleHtml)
   {
     if (xml) dumpAsXML(f, pageNum);
     if (!xml) dumpComplex(f, pageNum);  
@@ -943,28 +963,30 @@ HtmlOutputDev::HtmlOutputDev(char *fileName, char *title,
   // for non-xml output (complex or simple) with frames generate the left frame
   if(!xml && !noframes)
   {
-     GooString* left=new GooString(fileName);
-     left->append("_ind.html");
-
-     doFrame(firstPage);
-   
-     if (!(fContentsFrame = fopen(left->getCString(), "w")))
-	 {
-        error(-1, "Couldn't open html file '%s'", left->getCString());
-		delete left;
-        return;
+     if (!singleHtml)
+     {
+         GooString* left=new GooString(fileName);
+         left->append("_ind.html");
+
+         doFrame(firstPage);
+
+         if (!(fContentsFrame = fopen(left->getCString(), "w")))
+         {
+             error(-1, "Couldn't open html file '%s'", left->getCString());
+             delete left;
+             return;
+         }
+         delete left;
+         fputs(DOCTYPE, fContentsFrame);
+         fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
+
+         if (doOutline)
+         {
+             GooString *str = basename(Docname);
+             fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+             delete str;
+         }
      }
-     delete left;
-     fputs(DOCTYPE, fContentsFrame);
-     fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
-     
-  	if (doOutline)
-	{
-		GooString *str = basename(Docname);
-		fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
-		delete str;
-	}
-  	
 	if (!complexMode)
 	{	/* not in complex mode */
 		
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index 6cdc6c6..bbdfa56 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -40,6 +40,9 @@ exchange .pdf links with .html
 .B \-c
 generate complex output
 .TP
+.B \-s
+generate single html that includes all pages
+.TP
 .B \-i
 ignore images
 .TP
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 5762f90..5323b6e 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -17,6 +17,7 @@
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2010 Mike Slegeir <tehpola at yahoo.com>
 // Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac at cdacmumbai.in) and Onkar Potdar (onkar at cdacmumbai.in)
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -67,6 +68,7 @@ GBool printCommands = gTrue;
 static GBool printHelp = gFalse;
 GBool printHtml = gFalse;
 GBool complexMode=gFalse;
+GBool singleHtml=gFalse; // singleHtml
 GBool ignore=gFalse;
 GBool useSplash=gTrue;
 char extension[5]="png";
@@ -107,6 +109,8 @@ static const ArgDesc argDesc[] = {
    "exchange .pdf links by .html"}, 
   {"-c",      argFlag,     &complexMode,          0,
    "generate complex document"},
+  {"-s",      argFlag,     &singleHtml,          0,
+   "generate single document that includes all pages"},
   {"-i",      argFlag,     &ignore,        0,
    "ignore images"},
   {"-noframes", argFlag,   &noframes,      0,
@@ -293,7 +297,7 @@ int main(int argc, char *argv[]) {
    if (scale>3.0) scale=3.0;
    if (scale<0.5) scale=0.5;
    
-   if (complexMode) {
+   if (complexMode || singleHtml) {
      //noframes=gFalse;
      stout=gFalse;
    } 
@@ -301,11 +305,13 @@ int main(int argc, char *argv[]) {
    if (stout) {
      noframes=gTrue;
      complexMode=gFalse;
+     singleHtml=gFalse;
    }
 
    if (xml)
    { 
        complexMode = gTrue;
+       singleHtml = gFalse;
        noframes = gTrue;
        noMerge = gTrue;
    }
@@ -359,7 +365,10 @@ int main(int argc, char *argv[]) {
   }
 #endif
 
-  rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
+  if (!singleHtml)
+      rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
+  else
+      rawOrder = singleHtml;
 
   // write text file
   htmlOut = new HtmlOutputDev(htmlFileName->getCString(), 
@@ -400,7 +409,7 @@ int main(int argc, char *argv[]) {
 	}
   }
   
-  if( complexMode && !xml && !ignore ) {
+  if ((complexMode || singleHtml) && !xml && !ignore) {
     if(useSplash) {
 #ifdef HAVE_SPLASH
       GooString *imgFileName = NULL;


More information about the poppler mailing list