[poppler] poppler/TextOutputDev.cc poppler/TextOutputDev.h utils/pdftotext.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sun Sep 2 11:26:19 UTC 2018


 poppler/TextOutputDev.cc |    5 +++++
 poppler/TextOutputDev.h  |    5 +++++
 utils/pdftotext.cc       |    6 ++----
 3 files changed, 12 insertions(+), 4 deletions(-)

New commits:
commit e8e95d2ca4f1c108cc69cab72c7c5ab31f80a597
Author: Sanchit Anand <sanxchit at gmail.com>
Date:   Tue Aug 28 02:58:39 2018 -0400

    pdftotext: Fix only outputs first page content with -bbox-layout option
    
    Issue #88

diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 4a3070af..7a92ff3b 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -37,6 +37,7 @@
 // Copyright (C) 2013 Ed Catmur <ed at catmur.co.uk>
 // Copyright (C) 2016 Khaled Hosny <khaledhosny at eglug.org>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Sanchit Anand <sanxchit at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -5878,3 +5879,7 @@ TextPage *TextOutputDev::takeText() {
   text = new TextPage(rawOrder);
   return ret;
 }
+
+TextFlow *TextOutputDev::getFlows() {
+  return text->getFlows();
+}
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 092acd67..f2435545 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -22,6 +22,7 @@
 // Copyright (C) 2012, 2013, 2015, 2016 Jason Crain <jason at aquaticape.us>
 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Sanchit Anand <sanxchit at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -890,6 +891,10 @@ public:
   // Turn extra processing for HTML conversion on or off.
   void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
 
+  // Get the head of the linked list of TextFlows for the
+  // last rasterized page.
+  TextFlow *getFlows();
+
 private:
 
   TextOutputFunc outputFunc;	// output function
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index 3d8dc95f..9844ba00 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -28,6 +28,7 @@
 // Copyright (C) 2017 Adrian Johnson <ajohnson at redneon.com>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
 // Copyright (C) 2018 Adam Reichold <adam.reichold at t-online.de>
+// Copyright (C) 2018 Sanchit Anand <sanxchit at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -511,7 +512,6 @@ static void printLine(FILE *f, TextLine *line) {
 
 void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int last) {
   double xMin, yMin, xMax, yMax;
-  TextPage *textPage;
   TextFlow *flow;
   TextBlock *blk;
   TextLine *line;
@@ -520,8 +520,7 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l
   for (int page = first; page <= last; ++page) {
     fprintf(f, "  <page width=\"%f\" height=\"%f\">\n",doc->getPageMediaWidth(page), doc->getPageMediaHeight(page));
     doc->displayPage(textOut, page, resolution, resolution, 0, gTrue, gFalse, gFalse);
-    textPage = textOut->takeText();
-    for (flow = textPage->getFlows(); flow; flow = flow->getNext()) {
+    for (flow = textOut->getFlows(); flow; flow = flow->getNext()) {
       fprintf(f, "    <flow>\n");
       for (blk = flow->getBlocks(); blk; blk = blk->getNext()) {
         blk->getBBox(&xMin, &yMin, &xMax, &yMax);
@@ -534,7 +533,6 @@ void printDocBBox(FILE *f, PDFDoc *doc, TextOutputDev *textOut, int first, int l
       fprintf(f, "    </flow>\n");
     }
     fprintf(f, "  </page>\n");
-    textPage->decRefCnt();
   }
   fprintf(f, "</doc>\n");
 }


More information about the poppler mailing list