[poppler] 2 commits - utils/HtmlFonts.cc utils/HtmlOutputDev.cc utils/pdftohtml.1 utils/pdftohtml.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Mon May 28 14:46:12 PDT 2012


 utils/HtmlFonts.cc     |    4 +
 utils/HtmlOutputDev.cc |  105 ++++++++++++++++++++++++-------------------------
 utils/pdftohtml.1      |    3 +
 utils/pdftohtml.cc     |    4 +
 4 files changed, 63 insertions(+), 53 deletions(-)

New commits:
commit 20210fbb6117649b20f6930031f24b8fc97b773d
Author: Luis Parravicini <lparravi at gmail.com>
Date:   Mon May 28 23:44:17 2012 +0200

    pdftohtml: Add -fontfullname
    
    Outputs the font name without any substitutions. Bug #49872

diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
index be02c5f..e0ea8a7 100644
--- a/utils/HtmlFonts.cc
+++ b/utils/HtmlFonts.cc
@@ -24,6 +24,7 @@
 // Copyright (C) 2011 Joshua Richardson <jric at chegg.com>
 // Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
 // Copyright (C) 2012 Igor Slepchin <igor.slepchin at gmail.com>
+// Copyright (C) 2012 Luis Parravicini <lparravi at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -63,6 +64,7 @@ static Fonts fonts[font_num+1]={
 
 #define xoutRound(x) ((int)(x + 0.5))
 extern GBool xml;
+extern GBool fontFullName;
 
 GooString* HtmlFont::DefaultFont=new GooString("Times"); // Arial,Helvetica,sans-serif
 
@@ -318,7 +320,7 @@ GooString* HtmlFontAccu::CSStyle(int i, int j){
    HtmlFont font=*g;
    GooString *Size=GooString::fromInt(font.getSize());
    GooString *colorStr=font.getColor().toString();
-   GooString *fontName=font.getFontName();
+   GooString *fontName=(fontFullName ? font.getFullName() : font.getFontName());
    GooString *lSize;
    
    if(!xml){
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index fe9ec67..f08fccb 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -85,6 +85,9 @@ override document DRM settings
 adjust the word break threshold percent. Default is 10.
 Word break occurs when distance between two adjacent characters is
 greater than this percent of character height.
+.TP
+.B \-fontfullname
+outputs the font name without any substitutions.
 
 .SH AUTHOR
 
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index cff0964..97372be 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -21,6 +21,7 @@
 // Copyright (C) 2011 Steven Murdoch <Steven.Murdoch at cl.cam.ac.uk>
 // Copyright (C) 2012 Igor Slepchin <igor.redhat at gmail.com>
 // Copyright (C) 2012 Ihar Filipau <thephilips at gmail.com>
+// Copyright (C) 2012 Luis Parravicini <lparravi at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -82,6 +83,7 @@ double wordBreakThreshold=10;  // 10%, below converted into a coefficient - 0.1
 
 GBool showHidden = gFalse;
 GBool noMerge = gFalse;
+GBool fontFullName = gFalse;
 static char ownerPassword[33] = "";
 static char userPassword[33] = "";
 static GBool printVersion = gFalse;
@@ -138,6 +140,8 @@ static const ArgDesc argDesc[] = {
    "override document DRM settings"},
   {"-wbt",    argFP,    &wordBreakThreshold, 0,
    "word break threshold (default 10 percent)"},
+  {"-fontfullname", argFlag, &fontFullName, 0,
+   "outputs font full name"},   
   {NULL}
 };
 
commit 03f979a7e59c4eb5ecb8acc324c7faf700144589
Author: Gerald Schmidt <solahcin at gmail.com>
Date:   Sat May 26 17:46:59 2012 +0200

    Make the output more xhtml compliant

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 1d1b628..83f65d5 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -32,6 +32,7 @@
 // Copyright (C) 2011 Stephen Reichling <sreichling at chegg.com>
 // Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin at gmail.com>
 // Copyright (C) 2012 Ihar Filipau <thephilips at gmail.com>
+// Copyright (C) 2012 Gerald Schmidt <solahcin at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -782,7 +783,7 @@ static void printCSS(FILE *f)
   // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
   // tested in Chrome, Fx (Linux) and IE9 (W7)
   static const char css[] = 
-    "<STYLE type=\"text/css\">" "\n"
+    "<style type=\"text/css\">" "\n"
     "<!--" "\n"
     ".xflip {" "\n"
     "    -moz-transform: scaleX(-1);" "\n"
@@ -806,7 +807,7 @@ static void printCSS(FILE *f)
     "    filter: fliph + flipv;" "\n"
     "}" "\n"
     "-->" "\n"
-    "</STYLE>" "\n";
+    "</style>" "\n";
 
   fwrite( css, sizeof(css)-1, 1, f );
 }
@@ -833,17 +834,17 @@ int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page)
       } 
 
       if (!singleHtml)
-        fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
+        fprintf(pageFile,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page);
       else
-        fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
+        fprintf(pageFile,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, tmp->getCString());
 
       delete tmp;
 
       GooString *htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
       if (!singleHtml)
-        fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
+        fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
       else
-        fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding->getCString());
+        fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding->getCString());
       delete htmlEncoding;
   }
   else 
@@ -866,7 +867,7 @@ void HtmlPage::dumpComplex(FILE *file, int page){
 
   tmp=basename(DocName);
    
-  fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
+  fputs("<style type=\"text/css\">\n<!--\n",pageFile);
   fputs("\tp {margin: 0; padding: 0;}",pageFile);
   for(int i=fontsPageMarker;i!=fonts->size();i++) {
     GooString *fontCSStyle;
@@ -878,20 +879,20 @@ void HtmlPage::dumpComplex(FILE *file, int page){
     delete fontCSStyle;
   }
  
-  fputs("-->\n</STYLE>\n",pageFile);
+  fputs("-->\n</style>\n",pageFile);
   
   if( !noframes )
   {  
-      fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); 
+      fputs("</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); 
   }
   
-  fprintf(pageFile,"<DIV id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
+  fprintf(pageFile,"<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
       page, pageWidth, pageHeight);
 
   if( !ignore ) 
   {
     fprintf(pageFile,
-	    "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
+	    "<img width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
 	    pageWidth, pageHeight, tmp->getCString(), 
 		(page-firstPage+1), imgExt->getCString());
   }
@@ -901,7 +902,7 @@ void HtmlPage::dumpComplex(FILE *file, int page){
   for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
     if (tmp1->htext){
       fprintf(pageFile,
-	      "<P style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft",
+	      "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft",
 	      xoutRound(tmp1->yMin),
 	      xoutRound(tmp1->xMin));
       if (!singleHtml) {
@@ -911,15 +912,15 @@ void HtmlPage::dumpComplex(FILE *file, int page){
       }
       fprintf(pageFile,"%d\">", tmp1->fontpos);
       fputs(tmp1->htext->getCString(), pageFile);
-      fputs("</P>\n", pageFile);
+      fputs("</p>\n", pageFile);
     }
   }
 
-  fputs("</DIV>\n", pageFile);
+  fputs("</div>\n", pageFile);
   
   if( !noframes )
   {
-      fputs("</BODY>\n</HTML>\n",pageFile);
+      fputs("</body>\n</html>\n",pageFile);
       fclose(pageFile);
   }
 }
@@ -934,7 +935,7 @@ void HtmlPage::dump(FILE *f, int pageNum)
   }
   else
   {
-    fprintf(f,"<A name=%d></a>",pageNum);
+    fprintf(f,"<a name=%d></a>",pageNum);
     // Loop over the list of image names on this page
     int listlen=imgList->getLength();
     for (int i = 0; i < listlen; i++) {
@@ -946,7 +947,7 @@ void HtmlPage::dump(FILE *f, int pageNum)
       if (img->xMin > img->xMax) style_index += 1; // xFlip
       if (img->yMin > img->yMax) style_index += 2; // yFlip
 
-      fprintf(f,"<IMG%s src=\"%s\"/><br/>\n",styles[style_index],img->fName->getCString());
+      fprintf(f,"<img%s src=\"%s\"/><br/>\n",styles[style_index],img->fName->getCString());
       delete img;
     }
 
@@ -959,7 +960,7 @@ void HtmlPage::dump(FILE *f, int pageNum)
 		fputs("<br/>\n",f);
       }
     }
-	fputs("<hr>\n",f);  
+	fputs("<hr/>\n",f);  
   }
 }
 
@@ -1024,7 +1025,7 @@ HtmlMetaVar::~HtmlMetaVar()
     
 GooString* HtmlMetaVar::toString()	
 {
-    GooString *result = new GooString("<META name=\"");
+    GooString *result = new GooString("<meta name=\"");
     result->append(name);
     result->append("\" content=\"");
     result->append(content);
@@ -1070,22 +1071,22 @@ void HtmlOutputDev::doFrame(int firstPage){
     
   fName=basename(Docname);
   fputs(DOCTYPE, fContentsFrame);
-  fputs("\n<HTML>",fContentsFrame);
-  fputs("\n<HEAD>",fContentsFrame);
-  fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString());
+  fputs("\n<html>",fContentsFrame);
+  fputs("\n<head>",fContentsFrame);
+  fprintf(fContentsFrame,"\n<title>%s</title>",docTitle->getCString());
   htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
-  fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
+  fprintf(fContentsFrame, "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
   dumpMetaVars(fContentsFrame);
-  fprintf(fContentsFrame, "</HEAD>\n");
-  fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame);
-  fprintf(fContentsFrame,"<FRAME name=\"links\" src=\"%s_ind.html\">\n",fName->getCString());
-  fputs("<FRAME name=\"contents\" src=",fContentsFrame); 
+  fprintf(fContentsFrame, "</head>\n");
+  fputs("<frameset cols=\"100,*\">\n",fContentsFrame);
+  fprintf(fContentsFrame,"<frame name=\"links\" src=\"%s_ind.html\"/>\n",fName->getCString());
+  fputs("<frame name=\"contents\" src=",fContentsFrame); 
   if (complexMode) 
       fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
   else
       fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
   
-  fputs(">\n</FRAMESET>\n</HTML>\n",fContentsFrame);
+  fputs("/>\n</frameset>\n</html>\n",fContentsFrame);
  
   delete fName;
   delete htmlEncoding;
@@ -1143,12 +1144,12 @@ HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title,
          }
          delete left;
          fputs(DOCTYPE, fContentsFrame);
-         fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n", fContentsFrame);
+         fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", fContentsFrame);
 
          if (doOutline)
          {
              GooString *str = basename(Docname);
-             fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+             fprintf(fContentsFrame, "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
              delete str;
          }
      }
@@ -1165,9 +1166,9 @@ HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title,
        }
        delete right;
        fputs(DOCTYPE, page);
-       fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n",page);
+       fputs("<html>\n<head>\n<title></title>\n",page);
        printCSS(page);
-       fputs("</HEAD>\n<BODY>\n",page);
+       fputs("</head>\n<body>\n",page);
      }
   }
 
@@ -1194,14 +1195,14 @@ HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title,
     } 
     else 
     {
-      fprintf(page,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n", DOCTYPE, docTitle->getCString());
+      fprintf(page,"%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->getCString());
       
-      fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
+      fprintf(page, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
       
       dumpMetaVars(page);
       printCSS(page);
-      fprintf(page,"</HEAD>\n");
-      fprintf(page,"<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
+      fprintf(page,"</head>\n");
+      fprintf(page,"<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
     }
     delete htmlEncoding;
   }
@@ -1217,7 +1218,7 @@ HtmlOutputDev::~HtmlOutputDev() {
     deleteGooList(glMetaVars, HtmlMetaVar);
 
     if (fContentsFrame){
-      fputs("</BODY>\n</HTML>\n",fContentsFrame);  
+      fputs("</body>\n</html>\n",fContentsFrame);  
       fclose(fContentsFrame);
     }
     if (page != NULL) {
@@ -1227,7 +1228,7 @@ HtmlOutputDev::~HtmlOutputDev() {
       } else
       if ( !complexMode || xml || noframes )
       { 
-        fputs("</BODY>\n</HTML>\n",page);  
+        fputs("</body>\n</html>\n",page);  
         fclose(page);
       }
     }
@@ -1263,9 +1264,9 @@ void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
     if (fContentsFrame)
 	{
       if (complexMode)
-		fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum);
+		fprintf(fContentsFrame,"<a href=\"%s-%d.html\"",str->getCString(),pageNum);
       else 
-		fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum);
+		fprintf(fContentsFrame,"<a href=\"%ss.html#%d\"",str->getCString(),pageNum);
       fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum);
     }
   }
@@ -1295,7 +1296,7 @@ void HtmlOutputDev::endPage() {
   maxPageWidth = pages->pageWidth;
   maxPageHeight = pages->pageHeight;
   
-  //if(!noframes&&!xml) fputs("<br>\n", fContentsFrame);
+  //if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame);
   if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
 }
 
@@ -1698,7 +1699,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 		if (noframes)
 		{
 			output = page; 
-			fputs("<hr>\n", output);
+			fputs("<hr/>\n", output);
 		}
 		else
 		{
@@ -1713,13 +1714,13 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 			GooString *htmlEncoding =
 				HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
 
-			fprintf(output, "<HTML xmlns=\"http://www.w3.org/1999/xhtml\" " \
+			fprintf(output, "<html xmlns=\"http://www.w3.org/1999/xhtml\" " \
                                 "lang=\"\" xml:lang=\"\">\n"            \
-                                "<HEAD>\n"                              \
-                                "<TITLE>Document Outline</TITLE>\n"     \
-                                "<META http-equiv=\"Content-Type\" content=\"text/html; " \
+                                "<head>\n"                              \
+                                "<title>Document Outline</title>\n"     \
+                                "<meta http-equiv=\"Content-Type\" content=\"text/html; " \
                                 "charset=%s\"/>\n"                      \
-                                "</HEAD>\n<BODY>\n", htmlEncoding->getCString());
+                                "</head>\n<body>\n", htmlEncoding->getCString());
 			delete htmlEncoding;
 		}
 	}
@@ -1728,11 +1729,11 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
 	{
 		GBool done = newHtmlOutlineLevel(output, outlines, catalog);
 		if (done && !complexMode)
-			fputs("<hr>\n", output);
+			fputs("<hr/>\n", output);
 	
 		if (bClose)
 		{
-			fputs("</BODY>\n</HTML>\n", output);
+			fputs("</body>\n</html>\n", output);
 			fclose(output);
 		}
 	}
@@ -1752,7 +1753,7 @@ GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalo
 
 	if (level == 1)
 	{
-		fputs("<A name=\"outline\"></a>", output);
+		fputs("<a name=\"outline\"></a>", output);
 		fputs("<h1>Document Outline</h1>\n", output);
 	}
 	fputs("<ul>\n",output);
@@ -1791,10 +1792,10 @@ GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalo
 
 		fputs("<li>",output);
 		if (linkName)
-			fprintf(output,"<A href=\"%s\">", linkName->getCString());
+			fprintf(output,"<a href=\"%s\">", linkName->getCString());
 		fputs(titleStr->getCString(),output);
 		if (linkName) {
-			fputs("</A>",output);
+			fputs("</a>",output);
 			delete linkName;
 		}
 		delete titleStr;


More information about the poppler mailing list