[poppler] poppler/TextOutputDev.cc poppler/TextOutputDev.h utils/pdftotext.cc
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Sun Sep 2 11:26:19 UTC 2018
poppler/TextOutputDev.cc | 5 +++++
poppler/TextOutputDev.h | 5 +++++
utils/pdftotext.cc | 6 ++----
3 files changed, 12 insertions(+), 4 deletions(-)
New commits:
commit e8e95d2ca4f1c108cc69cab72c7c5ab31f80a597
Author: Sanchit Anand <sanxchit at gmail.com>
Date: Tue Aug 28 02:58:39 2018 -0400
pdftotext: Fix only outputs first page content with -bbox-layout option
Issue #88
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 4a3070af..7a92ff3b 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -37,6 +37,7 @@
// Copyright (C) 2013 Ed Catmur <ed at catmur.co.uk>
// Copyright (C) 2016 Khaled Hosny <khaledhosny at eglug.org>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Sanchit Anand <sanxchit at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -5878,3 +5879,7 @@ TextPage *TextOutputDev::takeText() {
text = new TextPage(rawOrder);
return ret;
}
+
+TextFlow *TextOutputDev::getFlows() {
+ return text->getFlows();
+}
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 092acd67..f2435545 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -22,6 +22,7 @@
// Copyright (C) 2012, 2013, 2015, 2016 Jason Crain <jason at aquaticape.us>
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Sanchit Anand <sanxchit at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -890,6 +891,10 @@ public:
// Turn extra processing for HTML conversion on or off.
void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
+ // Get the head of the linked list of TextFlows for the
+ // last rasterized page.
+ TextFlow *getFlows();
+
private:
TextOutputFunc outputFunc; // output function
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index 3d8dc95f..9844ba00 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -28,6 +28,7 @@
// Copyright (C) 2017 Adrian Johnson <ajohnson at redneon.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Adam Reichold <adam.reichold at t-online.de>
+// Copyright (C) 2018 Sanchit Anand <sanxchit at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -511,7 +512,6 @@ static void printLine(FILE *f, TextLine *line) {
void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int last) {
double xMin, yMin, xMax, yMax;
- TextPage *textPage;
TextFlow *flow;
TextBlock *blk;
TextLine *line;
@@ -520,8 +520,7 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l
for (int page = first; page <= last; ++page) {
fprintf(f, " <page width=\"%f\" height=\"%f\">\n",doc->getPageMediaWidth(page), doc->getPageMediaHeight(page));
doc->displayPage(textOut, page, resolution, resolution, 0, gTrue, gFalse, gFalse);
- textPage = textOut->takeText();
- for (flow = textPage->getFlows(); flow; flow = flow->getNext()) {
+ for (flow = textOut->getFlows(); flow; flow = flow->getNext()) {
fprintf(f, " <flow>\n");
for (blk = flow->getBlocks(); blk; blk = blk->getNext()) {
blk->getBBox(&xMin, &yMin, &xMax, &yMax);
@@ -534,7 +533,6 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l
fprintf(f, " </flow>\n");
}
fprintf(f, " </page>\n");
- textPage->decRefCnt();
}
fprintf(f, "</doc>\n");
}
More information about the poppler
mailing list