[poppler] 2 commits - utils/pdfinfo.1 utils/pdfinfo.cc

Adrian Johnson ajohnson at kemper.freedesktop.org
Wed Jul 13 21:47:34 UTC 2016


 utils/pdfinfo.1  |    2 
 utils/pdfinfo.cc |  579 +++++++++++++++++++++++++++----------------------------
 2 files changed, 291 insertions(+), 290 deletions(-)

New commits:
commit 13f0333a46e2a498700ee6bff9845ae0eceafe80
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Tue Jul 5 06:54:54 2016 +0930

    pdfinfo: update man page for listenc, meta, js, and struct* options

diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1
index f3a60d7..741219f 100644
--- a/utils/pdfinfo.1
+++ b/utils/pdfinfo.1
@@ -74,6 +74,8 @@ PDF version
 .RS
 metadata (only if requested)
 .RE
+.PP
+The options \-listenc, \-meta, \-js, \-struct, and \-struct-text only print the requested information. The 'Info' dictionary and related data listed above is not printed. At most one of these five options may be used.
 .SH OPTIONS
 .TP
 .BI \-f " number"
commit c91483aceb1b640771f572cb3df9ad707e5cad0d
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Mon Jul 4 21:55:53 2016 +0930

    pdfinfo: Don't print pdf info when printing metadata, javascript, or structure
    
    Bug 96801

diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index b3e6513..bffd67f 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -61,13 +61,6 @@
 #include "StructTreeRoot.h"
 #include "StructElement.h"
 
-static void printInfoString(Dict *infoDict, const char *key, const char *text,
-			    UnicodeMap *uMap);
-static void printInfoDate(Dict *infoDict, const char *key, const char *text);
-static void printISODate(Dict *infoDict, const char *key, const char *text);
-static void printBox(const char *text, PDFRectangle *box);
-static void printStruct(const StructElement *element, unsigned indent = 0);
-static void printIndent(unsigned level);
 
 static int firstPage = 1;
 static int lastPage = 0;
@@ -125,114 +118,184 @@ static const ArgDesc argDesc[] = {
   {NULL}
 };
 
-int main(int argc, char *argv[]) {
-  PDFDoc *doc;
-  GooString *fileName;
-  GooString *ownerPW, *userPW;
-  UnicodeMap *uMap;
-  Page *page;
-  Object info;
-  char buf[256];
-  double w, h, wISO, hISO;
-  FILE *f;
-  GooString *metadata;
-  GBool ok;
-  int exitCode;
-  int pg, i;
-  GBool multiPage;
-  int r;
-
-  exitCode = 99;
+static void printInfoString(Dict *infoDict, const char *key, const char *text,
+			    UnicodeMap *uMap) {
+  Object obj;
+  GooString *s1;
+  Unicode *u;
+  char buf[8];
+  int i, n, len;
 
-  // parse args
-  ok = parseArgs(argDesc, &argc, argv);
-  if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
-    fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
-    fprintf(stderr, "%s\n", popplerCopyright);
-    fprintf(stderr, "%s\n", xpdfCopyright);
-    if (!printVersion) {
-      printUsage("pdfinfo", "<PDF-file>", argDesc);
+  if (infoDict->lookup(key, &obj)->isString()) {
+    fputs(text, stdout);
+    s1 = obj.getString();
+    len = TextStringToUCS4(s1, &u);
+    for (i = 0; i < len; i++) {
+      n = uMap->mapUnicode(u[i], buf, sizeof(buf));
+      fwrite(buf, 1, n, stdout);
     }
-    if (printVersion || printHelp)
-      exitCode = 0;
-    goto err0;
+    gfree(u);
+    fputc('\n', stdout);
   }
+  obj.free();
+}
 
-  if (printStructureText)
-    printStructure = gTrue;
-
-  // read config file
-  globalParams = new GlobalParams();
+static void printInfoDate(Dict *infoDict, const char *key, const char *text) {
+  Object obj;
+  char *s;
+  int year, mon, day, hour, min, sec, tz_hour, tz_minute;
+  char tz;
+  struct tm tmStruct;
+  time_t time;
+  char buf[256];
 
-  if (printEnc) {
-    printEncodings();
-    delete globalParams;
-    exitCode = 0;
-    goto err0;
+  if (infoDict->lookup(key, &obj)->isString()) {
+    fputs(text, stdout);
+    s = obj.getString()->getCString();
+    // TODO do something with the timezone info
+    if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
+      tmStruct.tm_year = year - 1900;
+      tmStruct.tm_mon = mon - 1;
+      tmStruct.tm_mday = day;
+      tmStruct.tm_hour = hour;
+      tmStruct.tm_min = min;
+      tmStruct.tm_sec = sec;
+      tmStruct.tm_wday = -1;
+      tmStruct.tm_yday = -1;
+      tmStruct.tm_isdst = -1;
+      // compute the tm_wday and tm_yday fields
+      time = timegm(&tmStruct);
+      if (time != (time_t)-1) {
+	int offset = (tz_hour*60 + tz_minute)*60;
+	if (tz == '-')
+	  offset *= -1;
+	time -= offset;
+	localtime_r(&time, &tmStruct);
+	strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
+	fputs(buf, stdout);
+      } else {
+	fputs(s, stdout);
+      }
+    } else {
+      fputs(s, stdout);
+    }
+    fputc('\n', stdout);
   }
+  obj.free();
+}
 
-  fileName = new GooString(argv[1]);
-
-  if (textEncName[0]) {
-    globalParams->setTextEncoding(textEncName);
-  }
+void printISODate(Dict *infoDict, const char *key, const char *text)
+{
+  Object obj;
+  char *s;
+  int year, mon, day, hour, min, sec, tz_hour, tz_minute;
+  char tz;
 
-  // get mapping to output encoding
-  if (!(uMap = globalParams->getTextEncoding())) {
-    error(errCommandLine, -1, "Couldn't get text encoding");
-    delete fileName;
-    goto err1;
+  if (infoDict->lookup(key, &obj)->isString()) {
+    fputs(text, stdout);
+    s = obj.getString()->getCString();
+    if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
+      fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
+      if (tz_hour == 0 && tz_minute == 0) {
+	fprintf(stdout, "Z");
+      } else {
+	fprintf(stdout, "%c%02d", tz, tz_hour);
+	if (tz_minute)
+	  fprintf(stdout, ":%02d", tz_minute);
+      }
+    } else {
+      fputs(s, stdout);
+    }
+    fputc('\n', stdout);
   }
+  obj.free();
+}
 
-  // open PDF file
-  if (ownerPassword[0] != '\001') {
-    ownerPW = new GooString(ownerPassword);
-  } else {
-    ownerPW = NULL;
-  }
-  if (userPassword[0] != '\001') {
-    userPW = new GooString(userPassword);
-  } else {
-    userPW = NULL;
-  }
+static void printBox(const char *text, PDFRectangle *box) {
+  printf("%s%8.2f %8.2f %8.2f %8.2f\n",
+	 text, box->x1, box->y1, box->x2, box->y2);
+}
 
-  if (fileName->cmp("-") == 0) {
-      delete fileName;
-      fileName = new GooString("fd://0");
+static void printIndent(unsigned indent) {
+  while (indent--) {
+    putchar(' ');
+    putchar(' ');
   }
+}
 
-  doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
-
-  if (userPW) {
-    delete userPW;
+static void printAttribute(const Attribute *attribute, unsigned indent)
+{
+  printIndent(indent);
+  printf(" /%s ", attribute->getTypeName());
+  if (attribute->getType() == Attribute::UserProperty) {
+    GooString *name = attribute->getName();
+    printf("(%s) ", name->getCString());
+    delete name;
   }
-  if (ownerPW) {
-    delete ownerPW;
+  attribute->getValue()->print(stdout);
+  if (attribute->getFormattedValue()) {
+    printf(" \"%s\"", attribute->getFormattedValue());
   }
-  if (!doc->isOk()) {
-    exitCode = 1;
-    goto err2;
+  if (attribute->isHidden()) {
+    printf(" [hidden]");
   }
+}
 
-  // get page range
-  if (firstPage < 1) {
-    firstPage = 1;
-  }
-  if (lastPage == 0) {
-    multiPage = gFalse;
-    lastPage = 1;
-  } else {
-    multiPage = gTrue;
+static void printStruct(const StructElement *element, unsigned indent) {
+  if (element->isObjectRef()) {
+    printIndent(indent);
+    printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen);
+    return;
   }
-  if (lastPage < 1 || lastPage > doc->getNumPages()) {
-    lastPage = doc->getNumPages();
+
+  if (printStructureText && element->isContent()) {
+    GooString *text = element->getText(gFalse);
+    printIndent(indent);
+    if (text) {
+      printf("\"%s\"\n", text->getCString());
+    } else {
+      printf("(No content?)\n");
+    }
+    delete text;
   }
-  if (lastPage < firstPage) {
-    error(errCommandLine, -1,
-          "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).",
-          firstPage, lastPage);
-    goto err2;
+
+  if (!element->isContent()) {
+      printIndent(indent);
+      printf("%s", element->getTypeName());
+      if (element->getID()) {
+          printf(" <%s>", element->getID()->getCString());
+      }
+      if (element->getTitle()) {
+          printf(" \"%s\"", element->getTitle()->getCString());
+      }
+      if (element->getRevision() > 0) {
+          printf(" r%u", element->getRevision());
+      }
+      if (element->isInline() || element->isBlock()) {
+          printf(" (%s)", element->isInline() ? "inline" : "block");
+      }
+      if (element->getNumAttributes()) {
+          putchar(':');
+          for (unsigned i = 0; i < element->getNumAttributes(); i++) {
+              putchar('\n');
+              printAttribute(element->getAttribute(i), indent + 1);
+          }
+      }
+
+      putchar('\n');
+      for (unsigned i = 0; i < element->getNumChildren(); i++) {
+          printStruct(element->getChild(i), indent + 1);
+      }
   }
+}
+
+void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool multiPage) {
+  Page *page;
+  Object info;
+  char buf[256];
+  double w, h, wISO, hISO;
+  int pg, i;
+  int r;
 
   // print doc info
   doc->getDocInfo(&info);
@@ -354,7 +417,7 @@ int main(int argc, char *argv[]) {
     } else {
       printf("Page rot:       %d\n", r);
     }
-  } 
+  }
 
   // print the boxes
   if (printBoxes) {
@@ -391,232 +454,168 @@ int main(int argc, char *argv[]) {
   }
 
   // print file size
-#ifdef VMS
-  f = fopen(fileName->getCString(), "rb", "ctx=stm");
-#else
-  f = fopen(fileName->getCString(), "rb");
-#endif
-  if (f) {
-    Gfseek(f, 0, SEEK_END);
-    printf("File size:      %lld bytes\n", (long long)Gftell(f));
-    fclose(f);
-  }
+  printf("File size:      %lld bytes\n", filesize);
 
   // print linearization info
   printf("Optimized:      %s\n", doc->isLinearized() ? "yes" : "no");
 
   // print PDF version
   printf("PDF version:    %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
+}
 
-  // print the metadata
-  if (printMetadata && (metadata = doc->readMetadata())) {
-    fputs("Metadata:\n", stdout);
-    fputs(metadata->getCString(), stdout);
-    fputc('\n', stdout);
-    delete metadata;
-  }
-
-  // print javascript
-  if (printJS) {
-    JSInfo jsInfo(doc, firstPage - 1);
-    fputs("\n", stdout);
-    jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
-  }
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  GooString *ownerPW, *userPW;
+  UnicodeMap *uMap;
+  Object info;
+  FILE *f;
+  GBool ok;
+  int exitCode;
+  GBool multiPage;
+
+  exitCode = 99;
 
-  // print the structure
-  const StructTreeRoot *structTree;
-  if (printStructure && (structTree = doc->getCatalog()->getStructTreeRoot())) {
-    fputs("Structure:\n", stdout);
-    for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
-      printStruct(structTree->getChild(i), 1);
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
+    fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
+    fprintf(stderr, "%s\n", popplerCopyright);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfinfo", "<PDF-file>", argDesc);
     }
+    if (printVersion || printHelp)
+      exitCode = 0;
+    goto err0;
   }
 
-  exitCode = 0;
-
-  // clean up
- err2:
-  uMap->decRefCnt();
-  delete doc;
-  delete fileName;
- err1:
-  delete globalParams;
- err0:
+  if (printStructureText)
+    printStructure = gTrue;
 
-  // check for memory leaks
-  Object::memCheck(stderr);
-  gMemReport(stderr);
+  // read config file
+  globalParams = new GlobalParams();
 
-  return exitCode;
-}
+  if (printEnc) {
+    printEncodings();
+    delete globalParams;
+    exitCode = 0;
+    goto err0;
+  }
 
-static void printInfoString(Dict *infoDict, const char *key, const char *text,
-			    UnicodeMap *uMap) {
-  Object obj;
-  GooString *s1;
-  Unicode *u;
-  char buf[8];
-  int i, n, len;
+  fileName = new GooString(argv[1]);
 
-  if (infoDict->lookup(key, &obj)->isString()) {
-    fputs(text, stdout);
-    s1 = obj.getString();
-    len = TextStringToUCS4(s1, &u);
-    for (i = 0; i < len; i++) {
-      n = uMap->mapUnicode(u[i], buf, sizeof(buf));
-      fwrite(buf, 1, n, stdout);
-    }
-    gfree(u);
-    fputc('\n', stdout);
+  if (textEncName[0]) {
+    globalParams->setTextEncoding(textEncName);
   }
-  obj.free();
-}
 
-static void printInfoDate(Dict *infoDict, const char *key, const char *text) {
-  Object obj;
-  char *s;
-  int year, mon, day, hour, min, sec, tz_hour, tz_minute;
-  char tz;
-  struct tm tmStruct;
-  time_t time;
-  char buf[256];
-
-  if (infoDict->lookup(key, &obj)->isString()) {
-    fputs(text, stdout);
-    s = obj.getString()->getCString();
-    // TODO do something with the timezone info
-    if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
-      tmStruct.tm_year = year - 1900;
-      tmStruct.tm_mon = mon - 1;
-      tmStruct.tm_mday = day;
-      tmStruct.tm_hour = hour;
-      tmStruct.tm_min = min;
-      tmStruct.tm_sec = sec;
-      tmStruct.tm_wday = -1;
-      tmStruct.tm_yday = -1;
-      tmStruct.tm_isdst = -1;
-      // compute the tm_wday and tm_yday fields
-      time = timegm(&tmStruct);
-      if (time != (time_t)-1) {
-	int offset = (tz_hour*60 + tz_minute)*60;
-	if (tz == '-')
-	  offset *= -1;
-	time -= offset;
-	localtime_r(&time, &tmStruct);
-	strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
-	fputs(buf, stdout);
-      } else {
-	fputs(s, stdout);
-      }
-    } else {
-      fputs(s, stdout);
-    }
-    fputc('\n', stdout);
+  // get mapping to output encoding
+  if (!(uMap = globalParams->getTextEncoding())) {
+    error(errCommandLine, -1, "Couldn't get text encoding");
+    delete fileName;
+    goto err1;
   }
-  obj.free();
-}
 
-void printISODate(Dict *infoDict, const char *key, const char *text)
-{
-  Object obj;
-  char *s;
-  int year, mon, day, hour, min, sec, tz_hour, tz_minute;
-  char tz;
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
 
-  if (infoDict->lookup(key, &obj)->isString()) {
-    fputs(text, stdout);
-    s = obj.getString()->getCString();
-    if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
-      fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
-      if (tz_hour == 0 && tz_minute == 0) {
-	fprintf(stdout, "Z");
-      } else {
-	fprintf(stdout, "%c%02d", tz, tz_hour);
-	if (tz_minute)
-	  fprintf(stdout, ":%02d", tz_minute);
-      }
-    } else {
-      fputs(s, stdout);
-    }
-    fputc('\n', stdout);
+  if (fileName->cmp("-") == 0) {
+      delete fileName;
+      fileName = new GooString("fd://0");
   }
-  obj.free();
-}
 
-static void printBox(const char *text, PDFRectangle *box) {
-  printf("%s%8.2f %8.2f %8.2f %8.2f\n",
-	 text, box->x1, box->y1, box->x2, box->y2);
-}
+  doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
 
-static void printIndent(unsigned indent) {
-  while (indent--) {
-    putchar(' ');
-    putchar(' ');
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err2;
   }
-}
 
-static void printAttribute(const Attribute *attribute, unsigned indent)
-{
-  printIndent(indent);
-  printf(" /%s ", attribute->getTypeName());
-  if (attribute->getType() == Attribute::UserProperty) {
-    GooString *name = attribute->getName();
-    printf("(%s) ", name->getCString());
-    delete name;
+  // get page range
+  if (firstPage < 1) {
+    firstPage = 1;
   }
-  attribute->getValue()->print(stdout);
-  if (attribute->getFormattedValue()) {
-    printf(" \"%s\"", attribute->getFormattedValue());
+  if (lastPage == 0) {
+    multiPage = gFalse;
+    lastPage = 1;
+  } else {
+    multiPage = gTrue;
   }
-  if (attribute->isHidden()) {
-    printf(" [hidden]");
+  if (lastPage < 1 || lastPage > doc->getNumPages()) {
+    lastPage = doc->getNumPages();
   }
-}
-
-static void printStruct(const StructElement *element, unsigned indent) {
-  if (element->isObjectRef()) {
-    printIndent(indent);
-    printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen);
-    return;
+  if (lastPage < firstPage) {
+    error(errCommandLine, -1,
+          "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).",
+          firstPage, lastPage);
+    goto err2;
   }
 
-  if (printStructureText && element->isContent()) {
-    GooString *text = element->getText(gFalse);
-    printIndent(indent);
-    if (text) {
-      printf("\"%s\"\n", text->getCString());
-    } else {
-      printf("(No content?)\n");
+  if (printMetadata) {
+    // print the metadata
+    GooString *metadata = doc->readMetadata();
+    if (metadata) {
+      fputs(metadata->getCString(), stdout);
+      fputc('\n', stdout);
+      delete metadata;
     }
-    delete text;
-  }
-
-  if (!element->isContent()) {
-      printIndent(indent);
-      printf("%s", element->getTypeName());
-      if (element->getID()) {
-          printf(" <%s>", element->getID()->getCString());
-      }
-      if (element->getTitle()) {
-          printf(" \"%s\"", element->getTitle()->getCString());
-      }
-      if (element->getRevision() > 0) {
-          printf(" r%u", element->getRevision());
-      }
-      if (element->isInline() || element->isBlock()) {
-          printf(" (%s)", element->isInline() ? "inline" : "block");
-      }
-      if (element->getNumAttributes()) {
-          putchar(':');
-          for (unsigned i = 0; i < element->getNumAttributes(); i++) {
-              putchar('\n');
-              printAttribute(element->getAttribute(i), indent + 1);
-          }
+  } else if (printJS) {
+    // print javascript
+    JSInfo jsInfo(doc, firstPage - 1);
+    jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
+  } else if (printStructure || printStructureText) {
+    // print structure
+    const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot();
+    if (structTree) {
+      for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
+	printStruct(structTree->getChild(i), 0);
       }
+    }
+  } else {
+    // print info
+    long long filesize = 0;
 
-      putchar('\n');
-      for (unsigned i = 0; i < element->getNumChildren(); i++) {
-          printStruct(element->getChild(i), indent + 1);
-      }
+#ifdef VMS
+    f = fopen(fileName->getCString(), "rb", "ctx=stm");
+#else
+    f = fopen(fileName->getCString(), "rb");
+#endif
+    if (f) {
+      Gfseek(f, 0, SEEK_END);
+      filesize = Gftell(f);
+      fclose(f);
+    }
+    printInfo(doc, uMap, filesize, multiPage);
   }
+  exitCode = 0;
+
+  // clean up
+ err2:
+  uMap->decRefCnt();
+  delete doc;
+  delete fileName;
+ err1:
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
 }


More information about the poppler mailing list