[poppler] poppler/GlobalParams.cc poppler/GlobalParams.h poppler/TextOutputDev.cc poppler/TextOutputDev.h utils/pdftotext.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 6 00:00:46 UTC 2019


 poppler/GlobalParams.cc  |   35 -----------------------------------
 poppler/GlobalParams.h   |   13 -------------
 poppler/TextOutputDev.cc |   18 ++++++++++--------
 poppler/TextOutputDev.h  |   22 ++++++++++++++++++++--
 utils/pdftotext.cc       |   26 +++++++++++++++++++-------
 5 files changed, 49 insertions(+), 65 deletions(-)

New commits:
commit ac485b0309f5e51d74fdb7484b9f6a7f79448f52
Author: Albert Astals Cid <aacid at kde.org>
Date:   Thu Nov 28 16:31:32 2019 +0100

    Move textEOL and textPageBreaks out of GlobalParams to TextOutputDev

diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc
index 58364cb5..ea5d8125 100644
--- a/poppler/GlobalParams.cc
+++ b/poppler/GlobalParams.cc
@@ -403,12 +403,6 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir)
   psShrinkLarger = true;
   psLevel = psLevel2;
   textEncoding = new GooString("UTF-8");
-#if defined(_WIN32)
-  textEOL = eolDOS;
-#else
-  textEOL = eolUnix;
-#endif
-  textPageBreaks = true;
   overprintPreview = false;
   printCommands = false;
   profileCommands = false;
@@ -1138,16 +1132,6 @@ std::string GlobalParams::getTextEncodingName() const {
   return textEncoding->toStr();
 }
 
-EndOfLineKind GlobalParams::getTextEOL() {
-  globalParamsLocker();
-  return textEOL;
-}
-
-bool GlobalParams::getTextPageBreaks() {
-  globalParamsLocker();
-  return textPageBreaks;
-}
-
 bool GlobalParams::getPrintCommands() {
   globalParamsLocker();
   return printCommands;
@@ -1246,25 +1230,6 @@ void GlobalParams::setTextEncoding(const char *encodingName) {
   textEncoding = new GooString(encodingName);
 }
 
-bool GlobalParams::setTextEOL(const char *s) {
-  globalParamsLocker();
-  if (!strcmp(s, "unix")) {
-    textEOL = eolUnix;
-  } else if (!strcmp(s, "dos")) {
-    textEOL = eolDOS;
-  } else if (!strcmp(s, "mac")) {
-    textEOL = eolMac;
-  } else {
-    return false;
-  }
-  return true;
-}
-
-void GlobalParams::setTextPageBreaks(bool pageBreaks) {
-  globalParamsLocker();
-  textPageBreaks = pageBreaks;
-}
-
 void GlobalParams::setOverprintPreview(bool overprintPreviewA) {
   globalParamsLocker();
   overprintPreview = overprintPreviewA;
diff --git a/poppler/GlobalParams.h b/poppler/GlobalParams.h
index 9363be34..755cdc1a 100644
--- a/poppler/GlobalParams.h
+++ b/poppler/GlobalParams.h
@@ -88,12 +88,6 @@ enum PSLevel {
 
 //------------------------------------------------------------------------
 
-enum EndOfLineKind {
-  eolUnix,			// LF
-  eolDOS,			// CR+LF
-  eolMac			// CR
-};
-
 //------------------------------------------------------------------------
 
 class GlobalParams {
@@ -134,8 +128,6 @@ public:
   bool getPSShrinkLarger();
   PSLevel getPSLevel();
   std::string getTextEncodingName() const;
-  EndOfLineKind getTextEOL();
-  bool getTextPageBreaks();
   bool getOverprintPreview() { return overprintPreview; }
   bool getPrintCommands();
   bool getProfileCommands();
@@ -154,8 +146,6 @@ public:
   void setPSShrinkLarger(bool shrink);
   void setPSLevel(PSLevel level);
   void setTextEncoding(const char *encodingName);
-  bool setTextEOL(const char *s);
-  void setTextPageBreaks(bool pageBreaks);
   void setOverprintPreview(bool overprintPreviewA);
   void setPrintCommands(bool printCommandsA);
   void setProfileCommands(bool profileCommandsA);
@@ -209,9 +199,6 @@ private:
   PSLevel psLevel;		// PostScript level to generate
   GooString *textEncoding;	// encoding (unicodeMap) to use for text
 				//   output
-  EndOfLineKind textEOL;	// type of EOL marker to use for text
-				//   output
-  bool textPageBreaks;		// insert end-of-page markers?
   bool overprintPreview;	// enable overprint preview
   bool printCommands;		// print the drawing commands
   bool profileCommands;	// profile the drawing commands
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 6447eedd..2c630403 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4166,7 +4166,7 @@ bool TextPage::findText(const Unicode *s, int len,
 }
 
 GooString *TextPage::getText(double xMin, double yMin,
-			   double xMax, double yMax) const {
+			   double xMax, double yMax, EndOfLineKind textEOL) const {
   GooString *s;
   UnicodeMap *uMap;
   TextBlock *blk;
@@ -4209,7 +4209,7 @@ GooString *TextPage::getText(double xMin, double yMin,
 
   spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
   eolLen = 0; // make gcc happy
-  switch (globalParams->getTextEOL()) {
+  switch (textEOL) {
   case eolUnix:
     eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
     break;
@@ -5289,7 +5289,7 @@ bool TextPage::findCharRange(int pos, int length,
 }
 
 void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
-		    bool physLayout) {
+		    bool physLayout, EndOfLineKind textEOL, bool pageBreaks) {
   UnicodeMap *uMap;
   TextFlow *flow;
   TextBlock *blk;
@@ -5300,7 +5300,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
   TextLineFrag *frag;
   char space[8], eol[16], eop[8];
   int spaceLen, eolLen, eopLen;
-  bool pageBreaks;
   GooString *s;
   double delta;
   int col, i, j, d, n;
@@ -5311,7 +5310,7 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
   }
   spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
   eolLen = 0; // make gcc happy
-  switch (globalParams->getTextEOL()) {
+  switch (textEOL) {
   case eolUnix:
     eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
     break;
@@ -5324,7 +5323,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
     break;
   }
   eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
-  pageBreaks = globalParams->getTextPageBreaks();
 
   //~ writing mode (horiz/vert)
 
@@ -5674,6 +5672,8 @@ TextOutputDev::TextOutputDev(const char *fileName, bool physLayoutA,
   rawOrder = rawOrderA;
   discardDiag = discardDiagA;
   doHTML = false;
+  textEOL = defaultEndOfLine();
+  textPageBreaks = true;
   ok = true;
 
   // open file
@@ -5716,6 +5716,8 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
   doHTML = false;
   text = new TextPage(rawOrderA, discardDiagA);
   actualText = new ActualText(text);
+  textEOL = defaultEndOfLine();
+  textPageBreaks = true;
   ok = true;
 }
 
@@ -5737,7 +5739,7 @@ void TextOutputDev::endPage() {
   text->endPage();
   text->coalesce(physLayout, fixedPitch, doHTML);
   if (outputStream) {
-    text->dump(outputStream, outputFunc, physLayout);
+    text->dump(outputStream, outputFunc, physLayout, textEOL, textPageBreaks);
   }
 }
 
@@ -5934,7 +5936,7 @@ bool TextOutputDev::findText(const Unicode *s, int len,
 
 GooString *TextOutputDev::getText(double xMin, double yMin,
 				double xMax, double yMax) const {
-  return text->getText(xMin, yMin, xMax, yMax);
+  return text->getText(xMin, yMin, xMax, yMax, textEOL);
 }
 
 void TextOutputDev::drawSelection(OutputDev *out,
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 51e397f5..0d008b3d 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -70,6 +70,12 @@ enum SelectionStyle {
   selectionStyleLine
 };
 
+enum EndOfLineKind {
+  eolUnix,			// LF
+  eolDOS,			// CR+LF
+  eolMac			// CR
+};
+
 //------------------------------------------------------------------------
 // TextFontInfo
 //------------------------------------------------------------------------
@@ -626,7 +632,7 @@ public:
 
   // Get the text which is inside the specified rectangle.
   GooString *getText(double xMin, double yMin,
-		     double xMax, double yMax) const;
+		     double xMax, double yMax, EndOfLineKind textEOL) const;
 
   void visitSelection(TextSelectionVisitor *visitor,
 		      const PDFRectangle *selection,
@@ -659,7 +665,7 @@ public:
 
   // Dump contents of page to a file.
   void dump(void *outputStream, TextOutputFunc outputFunc,
-	    bool physLayout);
+	    bool physLayout, EndOfLineKind textEOL, bool pageBreaks);
 
   // Get the head of the linked list of TextFlows.
   const TextFlow *getFlows() const { return flows; }
@@ -912,6 +918,16 @@ public:
   // last rasterized page.
   const TextFlow *getFlows() const;
 
+  static constexpr EndOfLineKind defaultEndOfLine() {
+#if defined(_WIN32)
+    return eolDOS;
+#else
+    return eolUnix;
+#endif
+  }
+  void setTextEOL(EndOfLineKind textEOLA) { textEOL = textEOLA; }
+  void setTextPageBreaks(bool textPageBreaksA) { textPageBreaks = textPageBreaksA; }
+
 private:
 
   TextOutputFunc outputFunc;	// output function
@@ -930,6 +946,8 @@ private:
 				// to skip watermarks drawn on top of body text, etc.
   bool doHTML;			// extra processing for HTML conversion
   bool ok;			// set up ok?
+  bool textPageBreaks;		// insert end-of-page markers?
+  EndOfLineKind textEOL;       // type of EOL marker to use
 
   ActualText *actualText;
 };
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index 1d366234..53f2e131 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -88,7 +88,7 @@ static bool rawOrder = false;
 static bool discardDiag = false;
 static bool htmlMeta = false;
 static char textEncName[128] = "";
-static char textEOL[16] = "";
+static char textEOLStr[16] = "";
 static bool noPageBreaks = false;
 static char ownerPassword[33] = "\001";
 static char userPassword[33] = "\001";
@@ -126,7 +126,7 @@ static const ArgDesc argDesc[] = {
    "output text encoding name"},
   {"-listenc",argFlag,     &printEnc,      0,
    "list available encodings"},
-  {"-eol",     argString,   textEOL,        sizeof(textEOL),
+  {"-eol",     argString,   textEOLStr,        sizeof(textEOLStr),
    "output end-of-line convention (unix, dos, or mac)"},
   {"-nopgbrk", argFlag,     &noPageBreaks,  0,
    "don't insert page breaks between pages"},
@@ -188,6 +188,7 @@ int main(int argc, char *argv[]) {
   Object info;
   bool ok;
   int exitCode;
+  EndOfLineKind textEOL = TextOutputDev::defaultEndOfLine();
 
   Win32Console win32Console(&argc, &argv);
   exitCode = 99;
@@ -229,14 +230,17 @@ int main(int argc, char *argv[]) {
   if (textEncName[0]) {
     globalParams->setTextEncoding(textEncName);
   }
-  if (textEOL[0]) {
-    if (!globalParams->setTextEOL(textEOL)) {
+  if (textEOLStr[0]) {
+    if (!strcmp(textEOLStr, "unix")) {
+      textEOL = eolUnix;
+    } else if (!strcmp(textEOLStr, "dos")) {
+      textEOL = eolDOS;
+    } else if (!strcmp(textEOLStr, "mac")) {
+      textEOL = eolMac;
+    } else {
       fprintf(stderr, "Bad '-eol' value on command line\n");
     }
   }
-  if (noPageBreaks) {
-    globalParams->setTextPageBreaks(false);
-  }
   if (quiet) {
     globalParams->setErrQuiet(quiet);
   }
@@ -370,6 +374,10 @@ int main(int argc, char *argv[]) {
     textOut = new TextOutputDev(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
 
     if (textOut->isOk()) {
+      textOut->setTextEOL(textEOL);
+      if (noPageBreaks) {
+	textOut->setTextPageBreaks(false);
+      }
       if (bboxLayout) {
         printDocBBox(f, doc, textOut, firstPage, lastPage);
       }
@@ -384,6 +392,10 @@ int main(int argc, char *argv[]) {
     textOut = new TextOutputDev(textFileName->c_str(),
 				physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
     if (textOut->isOk()) {
+      textOut->setTextEOL(textEOL);
+      if (noPageBreaks) {
+	textOut->setTextPageBreaks(false);
+      }
       if ((w==0) && (h==0) && (x==0) && (y==0)) {
 	doc->displayPages(textOut, firstPage, lastPage, resolution, resolution, 0,
 			  true, false, false);


More information about the poppler mailing list