[poppler] utils/HtmlOutputDev.cc utils/pdftohtml.1 utils/pdftohtml.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Fri Jul 20 22:18:23 UTC 2018
utils/HtmlOutputDev.cc | 22 ++++++++++++++++++----
utils/pdftohtml.1 | 3 +++
utils/pdftohtml.cc | 4 ++++
3 files changed, 25 insertions(+), 4 deletions(-)
New commits:
commit bcd89bc0abb2cc05d3dc428074bb24b450ab7cf0
Author: Thibaut Brard <thibaut.brard at gmail.com>
Date: Sat Jul 21 00:17:58 2018 +0200
pdftohtml: Add option to not round coordinates
when outputing as xml
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index ace303b3..8a088c22 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -41,6 +41,7 @@
// Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent at gmail.com>
// Copyright (C) 2017 Caolán McNamara <caolanm at redhat.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Thibaut Brard <thibaut.brard at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -109,6 +110,7 @@ extern GBool printHtml;
extern GBool noframes;
extern GBool stout;
extern GBool xml;
+extern GBool noRoundedCoordinates;
extern GBool showHidden;
extern GBool noMerge;
@@ -760,16 +762,28 @@ void HtmlPage::dumpAsXML(FILE* f,int page){
int listlen=imgList->getLength();
for (int i = 0; i < listlen; i++) {
HtmlImage *img = (HtmlImage*)imgList->del(0);
- fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin));
- fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin));
+ if (!noRoundedCoordinates) {
+ fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin));
+ fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin));
+ }
+ else {
+ fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin);
+ fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin);
+ }
fprintf(f,"src=\"%s\"/>\n",img->fName->getCString());
delete img;
}
for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
if (tmp->htext){
- fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
- fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
+ if (!noRoundedCoordinates) {
+ fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin));
+ fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin));
+ }
+ else {
+ fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin);
+ fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin);
+ }
fprintf(f,"font=\"%d\">", tmp->fontpos);
fputs(tmp->htext->getCString(),f);
fputs("</text>\n",f);
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
index 5de42880..5d711ba9 100644
--- a/utils/pdftohtml.1
+++ b/utils/pdftohtml.1
@@ -58,6 +58,9 @@ zoom the PDF document (default 1.5)
.B \-xml
output for XML post-processing
.TP
+.B \-noRoundedCoordinates
+do not round coordinates (with XML output only)
+.TP
.B \-enc <string>
output text encoding name
.TP
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 04aeb1bc..b82c2552 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -26,6 +26,7 @@
// Copyright (C) 2015 William Bader <williambader at hotmail.com>
// Copyright (C) 2017 Adrian Johnson <ajohnson at redneon.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Thibaut Brard <thibaut.brard at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -81,6 +82,7 @@ static double scale=1.5;
GBool noframes=gFalse;
GBool stout=gFalse;
GBool xml=gFalse;
+GBool noRoundedCoordinates = gFalse;
static GBool errQuiet=gFalse;
static GBool noDrm=gFalse;
double wordBreakThreshold=10; // 10%, below converted into a coefficient - 0.1
@@ -130,6 +132,8 @@ static const ArgDesc argDesc[] = {
"zoom the pdf document (default 1.5)"},
{"-xml", argFlag, &xml, 0,
"output for XML post-processing"},
+ {"-noroundcoord", argFlag, &noRoundedCoordinates, 0,
+ "do not round coordinates (with XML output only)"},
{"-hidden", argFlag, &showHidden, 0,
"output hidden text"},
{"-nomerge", argFlag, &noMerge, 0,
More information about the poppler
mailing list