[poppler] utils/HtmlOutputDev.cc utils/pdftohtml.1 utils/pdftohtml.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Fri Jul 20 22:18:23 UTC 2018


 utils/HtmlOutputDev.cc |   22 ++++++++++++++++++----
 utils/pdftohtml.1      |    3 +++
 utils/pdftohtml.cc     |    4 ++++
 3 files changed, 25 insertions(+), 4 deletions(-)

New commits:
commit bcd89bc0abb2cc05d3dc428074bb24b450ab7cf0
Author: Thibaut Brard <thibaut.brard at gmail.com>
Date:   Sat Jul 21 00:17:58 2018 +0200

    pdftohtml: Add option to not round coordinates
    
    when outputing as xml

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index ace303b3..8a088c22 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -41,6 +41,7 @@
 // Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent at gmail.com>
 // Copyright (C) 2017 Caolán McNamara <caolanm at redhat.com>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Thibaut Brard <thibaut.brard at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -109,6 +110,7 @@ extern GBool printHtml;
 extern GBool noframes;
 extern GBool stout;
 extern GBool xml;
+extern GBool noRoundedCoordinates;
 extern GBool showHidden;
 extern GBool noMerge;
 
@@ -760,16 +762,28 @@ void HtmlPage::dumpAsXML(FILE* f,int page){
   int listlen=imgList->getLength();
   for (int i = 0; i < listlen; i++) {
     HtmlImage *img = (HtmlImage*)imgList->del(0);
-    fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin));
-    fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin));
+    if (!noRoundedCoordinates) {
+      fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin));
+      fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin));
+    }
+    else {
+      fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin);
+      fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin);
+    }
     fprintf(f,"src=\"%s\"/>\n",img->fName->getCString());
     delete img;
   }
 
   for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
     if (tmp->htext){
-      fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
-      fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
+      if (!noRoundedCoordinates) {
+        fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin));
+        fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin));
+      }
+      else {
+        fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin);
+        fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin);
+      }
       fprintf(f,"font=\"%d\">", tmp->fontpos);
       fputs(tmp->htext->getCString(),f);
       fputs("</text>\n",f);
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index 5de42880..5d711ba9 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -58,6 +58,9 @@ zoom the PDF document (default 1.5)
 .B \-xml
 output for XML post-processing
 .TP
+.B \-noRoundedCoordinates
+do not round coordinates (with XML output only)
+.TP
 .B \-enc <string>
 output text encoding name
 .TP
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 04aeb1bc..b82c2552 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -26,6 +26,7 @@
 // Copyright (C) 2015 William Bader <williambader at hotmail.com>
 // Copyright (C) 2017 Adrian Johnson <ajohnson at redneon.com>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Thibaut Brard <thibaut.brard at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -81,6 +82,7 @@ static double scale=1.5;
 GBool noframes=gFalse;
 GBool stout=gFalse;
 GBool xml=gFalse;
+GBool noRoundedCoordinates = gFalse;
 static GBool errQuiet=gFalse;
 static GBool noDrm=gFalse;
 double wordBreakThreshold=10;  // 10%, below converted into a coefficient - 0.1
@@ -130,6 +132,8 @@ static const ArgDesc argDesc[] = {
    "zoom the pdf document (default 1.5)"},
   {"-xml",    argFlag,    &xml,         0,
    "output for XML post-processing"},
+  {"-noroundcoord", argFlag, &noRoundedCoordinates, 0,
+    "do not round coordinates (with XML output only)"},
   {"-hidden", argFlag,   &showHidden,   0,
    "output hidden text"},
   {"-nomerge", argFlag, &noMerge, 0,


More information about the poppler mailing list