[poppler] utils/pdfinfo.1 utils/pdfinfo.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sun Sep 26 16:48:24 UTC 2021


 utils/pdfinfo.1  |   12 +++++
 utils/pdfinfo.cc |  115 +++++++++++++++++++++++++++++++++----------------------
 2 files changed, 82 insertions(+), 45 deletions(-)

New commits:
commit 66890c1cbf9c2a37b3bb9ec36562e78d88fb5348
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Sun Sep 26 19:41:39 2021 +0930

    pdfinfo: add metadata flags

diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1
index f4225a9e..abd34a8c 100644
--- a/utils/pdfinfo.1
+++ b/utils/pdfinfo.1
@@ -46,9 +46,21 @@ modification date
 In addition, the following information is printed:
 .PP
 .RS
+custom metadata (yes/no)
+.RE
+.RS
+metadata stream (yes/no)
+.RE
+.RS
 tagged (yes/no)
 .RE
 .RS
+userproperties (yes/no)
+.RE
+.RS
+suspects (yes/no)
+.RE
+.RS
 form (AcroForm / XFA / none)
 .RE
 .RS
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index b46d1aa3..c923a6b8 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -664,19 +664,19 @@ static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap)
         for (const std::string &key : keys) {
             if (key == "CreationDate") {
                 if (isoDates) {
-                    printISODate(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
+                    printISODate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
                 } else if (rawDates) {
-                    printInfoString(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
+                    printInfoString(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
                 } else {
-                    printInfoDate(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
+                    printInfoDate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
                 }
             } else if (key == "ModDate") {
                 if (isoDates) {
-                    printISODate(info.getDict(), "ModDate", "ModDate:        ", uMap);
+                    printISODate(info.getDict(), "ModDate", "ModDate:         ", uMap);
                 } else if (rawDates) {
-                    printInfoString(info.getDict(), "ModDate", "ModDate:        ", uMap);
+                    printInfoString(info.getDict(), "ModDate", "ModDate:         ", uMap);
                 } else {
-                    printInfoDate(info.getDict(), "ModDate", "ModDate:        ", uMap);
+                    printInfoDate(info.getDict(), "ModDate", "ModDate:         ", uMap);
                 }
             } else {
                 Object obj = dict->lookup(key.c_str());
@@ -686,7 +686,7 @@ static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap)
                     int len = utf8ToUCS4(key.c_str(), &u);
                     printUCS4String(u, len, uMap);
                     fputs(":", stdout);
-                    while (len < 15) {
+                    while (len < 16) {
                         fputs(" ", stdout);
                         len++;
                     }
@@ -713,39 +713,64 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
     // print doc info
     Object info = doc->getDocInfo();
     if (info.isDict()) {
-        printInfoString(info.getDict(), "Title", "Title:          ", uMap);
-        printInfoString(info.getDict(), "Subject", "Subject:        ", uMap);
-        printInfoString(info.getDict(), "Keywords", "Keywords:       ", uMap);
-        printInfoString(info.getDict(), "Author", "Author:         ", uMap);
-        printInfoString(info.getDict(), "Creator", "Creator:        ", uMap);
-        printInfoString(info.getDict(), "Producer", "Producer:       ", uMap);
+        printInfoString(info.getDict(), "Title", "Title:           ", uMap);
+        printInfoString(info.getDict(), "Subject", "Subject:         ", uMap);
+        printInfoString(info.getDict(), "Keywords", "Keywords:        ", uMap);
+        printInfoString(info.getDict(), "Author", "Author:          ", uMap);
+        printInfoString(info.getDict(), "Creator", "Creator:         ", uMap);
+        printInfoString(info.getDict(), "Producer", "Producer:        ", uMap);
         if (isoDates) {
-            printISODate(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
-            printISODate(info.getDict(), "ModDate", "ModDate:        ", uMap);
+            printISODate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
+            printISODate(info.getDict(), "ModDate", "ModDate:         ", uMap);
         } else if (rawDates) {
-            printInfoString(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
-            printInfoString(info.getDict(), "ModDate", "ModDate:        ", uMap);
+            printInfoString(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
+            printInfoString(info.getDict(), "ModDate", "ModDate:         ", uMap);
         } else {
-            printInfoDate(info.getDict(), "CreationDate", "CreationDate:   ", uMap);
-            printInfoDate(info.getDict(), "ModDate", "ModDate:        ", uMap);
+            printInfoDate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
+            printInfoDate(info.getDict(), "ModDate", "ModDate:         ", uMap);
         }
     }
 
+    bool hasMetadata = false;
+    const GooString *metadata = doc->readMetadata();
+    if (metadata) {
+        hasMetadata = true;
+        delete metadata;
+    }
+
+    const std::set<std::string> docInfoStandardKeys { "Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped" };
+
+    bool hasCustom = false;
+    if (info.isDict()) {
+        Dict *dict = info.getDict();
+        for (i = 0; i < dict->getLength(); i++) {
+            std::string key(dict->getKey(i));
+            if (docInfoStandardKeys.find(key) == docInfoStandardKeys.end()) {
+                hasCustom = true;
+                break;
+            }
+        }
+    }
+
+    // print metadata info
+    printf("Custom Metadata: %s\n", hasCustom ? "yes" : "no");
+    printf("Metadata Stream: %s\n", hasMetadata ? "yes" : "no");
+
     // print tagging info
-    printf("Tagged:         %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
-    printf("UserProperties: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
-    printf("Suspects:       %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
+    printf("Tagged:          %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
+    printf("UserProperties:  %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
+    printf("Suspects:        %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
 
     // print form info
     switch (doc->getCatalog()->getFormType()) {
     case Catalog::NoForm:
-        printf("Form:           none\n");
+        printf("Form:            none\n");
         break;
     case Catalog::AcroForm:
-        printf("Form:           AcroForm\n");
+        printf("Form:            AcroForm\n");
         break;
     case Catalog::XfaForm:
-        printf("Form:           XFA\n");
+        printf("Form:            XFA\n");
         break;
     }
 
@@ -753,14 +778,14 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
     {
         JSInfo jsInfo(doc, firstPage - 1);
         jsInfo.scanJS(lastPage - firstPage + 1);
-        printf("JavaScript:     %s\n", jsInfo.containsJS() ? "yes" : "no");
+        printf("JavaScript:      %s\n", jsInfo.containsJS() ? "yes" : "no");
     }
 
     // print page count
-    printf("Pages:          %d\n", doc->getNumPages());
+    printf("Pages:           %d\n", doc->getNumPages());
 
     // print encryption info
-    printf("Encrypted:      ");
+    printf("Encrypted:       ");
     if (doc->isEncrypted()) {
         unsigned char *fileKey;
         CryptAlgorithm encAlgorithm;
@@ -793,9 +818,9 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
         w = doc->getPageCropWidth(pg);
         h = doc->getPageCropHeight(pg);
         if (multiPage) {
-            printf("Page %4d size: %g x %g pts", pg, w, h);
+            printf("Page %4d size:  %g x %g pts", pg, w, h);
         } else {
-            printf("Page size:      %g x %g pts", w, h);
+            printf("Page size:       %g x %g pts", w, h);
         }
         if ((fabs(w - 612) < 1 && fabs(h - 792) < 1) || (fabs(w - 792) < 1 && fabs(h - 612) < 1)) {
             printf(" (letter)");
@@ -816,9 +841,9 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
         printf("\n");
         r = doc->getPageRotate(pg);
         if (multiPage) {
-            printf("Page %4d rot:  %d\n", pg, r);
+            printf("Page %4d rot:   %d\n", pg, r);
         } else {
-            printf("Page rot:       %d\n", r);
+            printf("Page rot:        %d\n", r);
         }
     }
 
@@ -831,15 +856,15 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
                     error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg);
                     continue;
                 }
-                sprintf(buf, "Page %4d MediaBox: ", pg);
+                sprintf(buf, "Page %4d MediaBox:  ", pg);
                 printBox(buf, page->getMediaBox());
-                sprintf(buf, "Page %4d CropBox:  ", pg);
+                sprintf(buf, "Page %4d CropBox:   ", pg);
                 printBox(buf, page->getCropBox());
-                sprintf(buf, "Page %4d BleedBox: ", pg);
+                sprintf(buf, "Page %4d BleedBox:  ", pg);
                 printBox(buf, page->getBleedBox());
-                sprintf(buf, "Page %4d TrimBox:  ", pg);
+                sprintf(buf, "Page %4d TrimBox:   ", pg);
                 printBox(buf, page->getTrimBox());
-                sprintf(buf, "Page %4d ArtBox:   ", pg);
+                sprintf(buf, "Page %4d ArtBox:    ", pg);
                 printBox(buf, page->getArtBox());
             }
         } else {
@@ -847,23 +872,23 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
             if (!page) {
                 error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage);
             } else {
-                printBox("MediaBox:       ", page->getMediaBox());
-                printBox("CropBox:        ", page->getCropBox());
-                printBox("BleedBox:       ", page->getBleedBox());
-                printBox("TrimBox:        ", page->getTrimBox());
-                printBox("ArtBox:         ", page->getArtBox());
+                printBox("MediaBox:        ", page->getMediaBox());
+                printBox("CropBox:         ", page->getCropBox());
+                printBox("BleedBox:        ", page->getBleedBox());
+                printBox("TrimBox:         ", page->getTrimBox());
+                printBox("ArtBox:          ", page->getArtBox());
             }
         }
     }
 
     // print file size
-    printf("File size:      %lld bytes\n", filesize);
+    printf("File size:       %lld bytes\n", filesize);
 
     // print linearization info
-    printf("Optimized:      %s\n", doc->isLinearized() ? "yes" : "no");
+    printf("Optimized:       %s\n", doc->isLinearized() ? "yes" : "no");
 
     // print PDF version
-    printf("PDF version:    %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
+    printf("PDF version:     %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
 
     printPdfSubtype(doc, uMap);
 }


More information about the poppler mailing list