[poppler] utils/pdftotext.cc
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Sep 13 20:43:32 UTC 2022
utils/pdftotext.cc | 112 ++++++++++++++++++++---------------------------------
1 file changed, 43 insertions(+), 69 deletions(-)
New commits:
commit be94b9c1d46db29592817d4afda63d9733b70b36
Author: Albert Astals Cid <aacid at kde.org>
Date: Tue Sep 13 22:32:10 2022 +0200
pdftotext: Simplify memory handling
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index 987a48b2..c1d4bc8f 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -165,19 +165,15 @@ static std::string myXmlTokenReplace(const char *inString)
int main(int argc, char *argv[])
{
std::unique_ptr<PDFDoc> doc;
- GooString *fileName;
- GooString *textFileName;
+ std::unique_ptr<GooString> textFileName;
std::optional<GooString> ownerPW, userPW;
- TextOutputDev *textOut;
FILE *f;
const UnicodeMap *uMap;
Object info;
bool ok;
- int exitCode;
EndOfLineKind textEOL = TextOutputDev::defaultEndOfLine();
Win32Console win32Console(&argc, &argv);
- exitCode = 99;
// parse args
ok = parseArgs(argDesc, &argc, argv);
@@ -189,7 +185,7 @@ int main(int argc, char *argv[])
}
if (colspacing <= 0 || colspacing > 10) {
error(errCommandLine, -1, "Bogus value provided for -colspacing");
- goto err1;
+ return 99;
}
if (!ok || (argc < 2 && !printEnc) || argc > 3 || printVersion || printHelp) {
fprintf(stderr, "pdftotext version %s\n", PACKAGE_VERSION);
@@ -199,9 +195,9 @@ int main(int argc, char *argv[])
printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
}
if (printVersion || printHelp) {
- exitCode = 0;
+ return 0;
}
- goto err0;
+ return 99;
}
// read config file
@@ -209,11 +205,10 @@ int main(int argc, char *argv[])
if (printEnc) {
printEncodings();
- exitCode = 0;
- goto err0;
+ return 0;
}
- fileName = new GooString(argv[1]);
+ GooString fileName(argv[1]);
if (fixedPitch) {
physLayout = true;
}
@@ -239,8 +234,7 @@ int main(int argc, char *argv[])
// get mapping to output encoding
if (!(uMap = globalParams->getTextEncoding())) {
error(errCommandLine, -1, "Couldn't get text encoding");
- delete fileName;
- goto err1;
+ return 99;
}
// open PDF file
@@ -251,39 +245,36 @@ int main(int argc, char *argv[])
userPW = GooString(userPassword);
}
- if (fileName->cmp("-") == 0) {
- delete fileName;
- fileName = new GooString("fd://0");
+ if (fileName.cmp("-") == 0) {
+ fileName = GooString("fd://0");
}
- doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+ doc = PDFDocFactory().createPDFDoc(fileName, ownerPW, userPW);
if (!doc->isOk()) {
- exitCode = 1;
- goto err2;
+ return 1;
}
#ifdef ENFORCE_PERMISSIONS
// check for copy permission
if (!doc->okToCopy()) {
error(errNotAllowed, -1, "Copying of text from this document is not allowed.");
- exitCode = 3;
- goto err2;
+ return 3;
}
#endif
// construct text file name
if (argc == 3) {
- textFileName = new GooString(argv[2]);
- } else if (fileName->cmp("fd://0") == 0) {
+ textFileName = std::make_unique<GooString>(argv[2]);
+ } else if (fileName.cmp("fd://0") == 0) {
error(errCommandLine, -1, "You have to provide an output filename when reading from stdin.");
- goto err2;
+ return 99;
} else {
- const char *p = fileName->c_str() + fileName->getLength() - 4;
+ const char *p = fileName.c_str() + fileName.getLength() - 4;
if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
- textFileName = new GooString(fileName->c_str(), fileName->getLength() - 4);
+ textFileName = std::make_unique<GooString>(fileName.c_str(), fileName.getLength() - 4);
} else {
- textFileName = fileName->copy();
+ textFileName.reset(fileName.copy());
}
textFileName->append(htmlMeta ? ".html" : ".txt");
}
@@ -297,7 +288,7 @@ int main(int argc, char *argv[])
}
if (lastPage < firstPage) {
error(errCommandLine, -1, "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage);
- goto err3;
+ return 99;
}
// write HTML header
@@ -306,9 +297,8 @@ int main(int argc, char *argv[])
f = stdout;
} else {
if (!(f = fopen(textFileName->c_str(), "wb"))) {
- error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName);
- exitCode = 2;
- goto err3;
+ error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName.get());
+ return 2;
}
}
fputs("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", f);
@@ -342,18 +332,18 @@ int main(int argc, char *argv[])
// write text file
if (htmlMeta && bbox) { // htmlMeta && is superfluous but makes gcc happier
- textOut = new TextOutputDev(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
+ TextOutputDev textOut(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
- if (textOut->isOk()) {
- textOut->setTextEOL(textEOL);
- textOut->setMinColSpacing1(colspacing);
+ if (textOut.isOk()) {
+ textOut.setTextEOL(textEOL);
+ textOut.setMinColSpacing1(colspacing);
if (noPageBreaks) {
- textOut->setTextPageBreaks(false);
+ textOut.setTextPageBreaks(false);
}
if (bboxLayout) {
- printDocBBox(f, doc.get(), textOut, firstPage, lastPage);
+ printDocBBox(f, doc.get(), &textOut, firstPage, lastPage);
} else {
- printWordBBox(f, doc.get(), textOut, firstPage, lastPage);
+ printWordBBox(f, doc.get(), &textOut, firstPage, lastPage);
}
}
if (f != stdout) {
@@ -362,47 +352,42 @@ int main(int argc, char *argv[])
} else {
if (tsvMode) {
- textOut = new TextOutputDev(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
+ TextOutputDev textOut(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
if (!textFileName->cmp("-")) {
f = stdout;
} else {
if (!(f = fopen(textFileName->c_str(), "wb"))) {
- error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName);
- delete textOut;
- exitCode = 2;
- goto err3;
+ error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName.get());
+ return 2;
}
}
- printTSVBBox(f, doc.get(), textOut, firstPage, lastPage);
+ printTSVBBox(f, doc.get(), &textOut, firstPage, lastPage);
if (f != stdout) {
fclose(f);
}
} else {
- textOut = new TextOutputDev(textFileName->c_str(), physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
- if (textOut->isOk()) {
- textOut->setTextEOL(textEOL);
- textOut->setMinColSpacing1(colspacing);
+ TextOutputDev textOut(textFileName->c_str(), physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
+ if (textOut.isOk()) {
+ textOut.setTextEOL(textEOL);
+ textOut.setMinColSpacing1(colspacing);
if (noPageBreaks) {
- textOut->setTextPageBreaks(false);
+ textOut.setTextPageBreaks(false);
}
if ((w == 0) && (h == 0) && (x == 0) && (y == 0)) {
- doc->displayPages(textOut, firstPage, lastPage, resolution, resolution, 0, true, false, false);
+ doc->displayPages(&textOut, firstPage, lastPage, resolution, resolution, 0, true, false, false);
} else {
for (int page = firstPage; page <= lastPage; ++page) {
- doc->displayPageSlice(textOut, page, resolution, resolution, 0, true, false, false, x, y, w, h);
+ doc->displayPageSlice(&textOut, page, resolution, resolution, 0, true, false, false, x, y, w, h);
}
}
} else {
- delete textOut;
- exitCode = 2;
- goto err3;
+ return 2;
}
}
}
- delete textOut;
// write end of HTML file
if (htmlMeta) {
@@ -410,9 +395,8 @@ int main(int argc, char *argv[])
f = stdout;
} else {
if (!(f = fopen(textFileName->c_str(), "ab"))) {
- error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName);
- exitCode = 2;
- goto err3;
+ error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName.get());
+ return 2;
}
}
if (!bbox) {
@@ -425,17 +409,7 @@ int main(int argc, char *argv[])
}
}
- exitCode = 0;
-
- // clean up
-err3:
- delete textFileName;
-err2:
- delete fileName;
-err1:
-err0:
-
- return exitCode;
+ return 0;
}
static void printInfoString(FILE *f, Dict *infoDict, const char *key, const char *text1, const char *text2, const UnicodeMap *uMap)
More information about the poppler
mailing list