[poppler] 2 commits - poppler/PDFDoc.cc poppler/PDFDoc.h utils/pdfinfo.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Sep 1 21:50:55 UTC 2018


 poppler/PDFDoc.cc |  132 +++++++++++++++++++++++++++++
 poppler/PDFDoc.h  |   46 ++++++++++
 utils/pdfinfo.cc  |  238 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 416 insertions(+)

New commits:
commit 45f0f6d21d51c0408fe1d876f18ef05489e69bc0
Author: Evangelos Rigas <erigas at rnd2.org>
Date:   Mon Aug 6 10:57:47 2018 +0100

    [utils] Add PDF subtype to pdfinfo
    
    If the document is compliant with PDF A, E, VT, UA or X standard
    print PDF subtype version, title, subtitle and explain the part
    and conformance levels.

diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index 50042393..91423ebd 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -24,6 +24,7 @@
 // Copyright (C) 2013 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
 // Copyright (C) 2018 Adam Reichold <adam.reichold at t-online.de>
+// Copyright (C) 2018 Evangelos Rigas <erigas at rnd2.org>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -424,6 +425,241 @@ static void printDestinations(PDFDoc *doc, UnicodeMap *uMap) {
   }
 }
 
+static void printPdfSubtype(PDFDoc *doc, UnicodeMap *uMap) {
+  const Object info = doc->getDocInfo();
+  if (info.isDict()) {
+    const PDFSubtype pdftype = doc->getPDFSubtype();
+
+    if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) {
+      return;
+    }
+
+    std::unique_ptr<GooString> part;
+    std::unique_ptr<GooString> abbr;
+    std::unique_ptr<GooString> standard;
+    std::unique_ptr<GooString> typeExp;
+    std::unique_ptr<GooString> confExp;
+
+    // Form title from PDFSubtype
+    switch (pdftype)
+    {
+      case subtypePDFA:
+        printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype:    ", uMap);
+        typeExp.reset( new GooString("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)") );
+        standard.reset(  new GooString("ISO 19005") );
+        abbr.reset( new GooString("PDF/A") );
+        break;
+      case subtypePDFE:
+        printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype:    ", uMap);
+        typeExp.reset( new GooString("ISO 24517 - Engineering document format using PDF (PDF/E)") );
+        standard.reset( new GooString("ISO 24517") );
+        abbr.reset( new GooString("PDF/E") );
+        break;
+      case subtypePDFUA:
+        printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype:    ", uMap);
+        typeExp.reset( new GooString("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)") );
+        standard.reset( new GooString("ISO 14289") );
+        abbr.reset( new GooString("PDF/UA") );
+        break;
+      case subtypePDFVT:
+        printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype:    ", uMap);
+        typeExp.reset( new GooString("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)") );
+        standard.reset( new GooString("ISO 16612") );
+        abbr.reset( new GooString("PDF/VT") );
+        break;
+      case subtypePDFX:
+        printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype:    ", uMap);
+        typeExp.reset( new GooString("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)") );
+        standard.reset( new GooString("ISO 15930") );
+        abbr.reset( new GooString("PDF/X") );
+        break;
+      case subtypeNone:
+      case subtypeNull:
+      default:
+        return;
+    }
+
+    // Form the abbreviation from PDFSubtypePart and PDFSubtype
+    const PDFSubtypePart subpart = doc->getPDFSubtypePart();
+    switch (pdftype) {
+      case subtypePDFX:
+        switch (subpart) {
+          case subtypePart1:
+            abbr->append("-1:2001");
+            break;
+          case subtypePart2:
+            abbr->append("-2");
+            break;
+          case subtypePart3:
+            abbr->append("-3:2002");
+            break;
+          case subtypePart4:
+            abbr->append("-1:2003");
+            break;
+          case subtypePart5:
+            abbr->append("-2");
+            break;
+          case subtypePart6:
+            abbr->append("-3:2003");
+            break;
+          case subtypePart7:
+            abbr->append("-4");
+            break;
+          case subtypePart8:
+            abbr->append("-5");
+            break;
+          default:
+            break;
+        }
+        break;
+      case subtypeNone:
+      case subtypeNull:
+        break;
+      default:
+        abbr->appendf("-{0:d}", subpart);
+        break;
+    }
+
+    // Form standard from PDFSubtypePart
+    switch (subpart) {
+      case subtypePartNone:
+      case subtypePartNull:
+        break;
+      default:
+        standard->appendf("-{0:d}", subpart);
+        break;
+    }
+
+    // Form the subtitle from PDFSubtypePart and PDFSubtype
+    switch (pdftype) {
+      case subtypePDFA:
+          switch (subpart) {
+          case subtypePart1:
+            part.reset( new GooString("Use of PDF 1.4") );
+            break;
+          case subtypePart2:
+            part.reset( new GooString("Use of ISO 32000-1") );
+            break;
+          case subtypePart3:
+            part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") );
+            break;
+          default:
+            break;
+          }
+          break;
+      case subtypePDFE:
+        switch (subpart) {
+          case subtypePart1:
+            part.reset( new GooString("Use of PDF 1.6") );
+            break;
+          default:
+            break;
+          }
+          break;
+      case subtypePDFUA:
+        switch (subpart) {
+          case subtypePart1:
+            part.reset( new GooString("Use of ISO 32000-1") );
+            break;
+          case subtypePart2:
+            part.reset( new GooString("Use of ISO 32000-2") );
+            break;
+          case subtypePart3:
+            part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") );
+            break;
+          default:
+            break;
+          }
+          break;
+      case subtypePDFVT:
+        switch (subpart) {
+          case subtypePart1:
+            part.reset( new GooString("Using PPML 2.1 and PDF 1.4") );
+            break;
+          case subtypePart2:
+            part.reset( new GooString("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)") );
+            break;
+          case subtypePart3:
+            part.reset( new GooString("Using PDF/X-6 (PDF/VT-3)") );
+            break;
+          default:
+            break;
+          }
+          break;
+      case subtypePDFX:
+        switch (subpart) {
+          case subtypePart1:
+            part.reset( new GooString("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)") );
+            break;
+          case subtypePart3:
+            part.reset( new GooString("Complete exchange suitable for colour-managed workflows (PDF/X-3)") );
+            break;
+          case subtypePart4:
+            part.reset( new GooString("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)") );
+            break;
+          case subtypePart5:
+            part.reset( new GooString("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]") );
+            break;
+          case subtypePart6:
+            part.reset( new GooString("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)") );
+            break;
+          case subtypePart7:
+            part.reset( new GooString("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6") );
+            break;
+          case subtypePart8:
+            part.reset( new GooString("Partial exchange of printing data using PDF 1.6 (PDF/X-5)") );
+            break;
+          default:
+            break;
+          }
+          break;
+      default:
+        break;
+    }
+
+    // Form Conformance explanation from PDFSubtypeConformance
+    switch (doc->getPDFSubtypeConformance())
+    {
+      case subtypeConfA:
+        confExp.reset( new GooString("Level A, Accessible") );
+        break;
+      case subtypeConfB:
+        confExp.reset( new GooString("Level B, Basic") );
+        break;
+      case subtypeConfG:
+        confExp.reset( new GooString("Level G, External graphical content") );
+        break;
+      case subtypeConfN:
+        confExp.reset( new GooString("Level N, External ICC profile") );
+        break;
+      case subtypeConfP:
+        confExp.reset( new GooString("Level P, Embedded ICC profile") );
+        break;
+      case subtypeConfPG:
+        confExp.reset( new GooString("Level PG, Embedded ICC profile and external graphical content") );
+        break;
+      case subtypeConfU:
+        confExp.reset( new GooString("Level U, Unicode support") );
+        break;
+      case subtypeConfNone:
+      case subtypeConfNull:
+      default:
+        confExp.reset();
+        break;
+    }
+
+    printf("    Title:         %s\n",typeExp->getCString());
+    printf("    Abbreviation:  %s\n", abbr->getCString());
+    if (part.get())
+      printf("    Subtitle:      Part %d: %s\n", subpart, part->getCString());
+    else
+      printf("    Subtitle:      Part %d\n", subpart);
+    printf("    Standard:      %s-%d\n", typeExp->toStr().substr(0,9).c_str(), subpart);
+    if (confExp.get())
+      printf("    Conformance:   %s\n", confExp->getCString());
+  }
+}
+
 static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool multiPage) {
   Page *page;
   char buf[256];
@@ -596,6 +832,8 @@ static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool m
 
   // print PDF version
   printf("PDF version:    %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
+
+  printPdfSubtype(doc, uMap);
 }
 
 int main(int argc, char *argv[]) {
commit 98d1b3dcc2c0530c12fb4422067c529ab375c680
Author: Evangelos Rigas <erigas at rnd2.org>
Date:   Wed Aug 22 10:51:12 2018 +0300

    [core] Add support for PDF subtype property
    
    Parse /GTS_PDF(A,E,UA,VT,X)Version from the PDF Information
    Dictionary into three enums: PDFSubtype, PDFSubtypePart, and
    PDFSubtypeConformance.

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 0ee0b50e..cb8fd0d7 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -40,6 +40,7 @@
 // Copyright (C) 2018 Ben Timby <btimby at gmail.com>
 // Copyright (C) 2018 Evangelos Foutras <evangelos at foutrelis.com>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Evangelos Rigas <erigas at rnd2.org>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -62,6 +63,7 @@
 #include <stddef.h>
 #include <string.h>
 #include <time.h>
+#include <regex>
 #include <sys/stat.h>
 #include "goo/glibc.h"
 #include "goo/gstrtod.h"
@@ -318,6 +320,9 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
     }
   }
 
+  // Extract PDF Subtype information
+  extractPDFSubtype();
+
   // done
   return gTrue;
 }
@@ -482,6 +487,133 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword)
   return ret;
 }
 
+static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, GooString *pdfSubtypeVersion) {
+  const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?");
+  std::smatch match;
+  std::string pdfsubver = pdfSubtypeVersion->toStr();
+  PDFSubtypePart subtypePart = subtypePartNone;
+
+  if (std::regex_search(pdfsubver, match, regex)) {
+    int date = 0;
+    const int part = std::stoi(match.str(1));
+
+    if (match[2].matched) {
+      date = std::stoi(match.str(2));
+    }
+
+    switch (subtype) {
+      case subtypePDFX:
+        switch (part) {
+          case 1:
+            switch (date) {
+              case 2001:
+              default:
+                subtypePart = subtypePart1;
+                break;
+              case 2003:
+                subtypePart = subtypePart4;
+                break;
+            }
+            break;
+          case 2:
+            subtypePart = subtypePart5;
+            break;
+          case 3:
+            switch (date) {
+              case 2002:
+              default:
+                subtypePart = subtypePart3;
+                break;
+              case 2003:
+                subtypePart = subtypePart6;
+                break;
+            }
+            break;
+          case 4:
+            subtypePart = subtypePart7;
+            break;
+          case 5:
+            subtypePart = subtypePart8;
+            break;
+        }
+        break;
+      default:
+        subtypePart = (PDFSubtypePart)part;
+        break;
+
+    }
+  }
+
+  return subtypePart;
+}
+
+static PDFSubtypeConformance pdfConformanceFromString(GooString *pdfSubtypeVersion) {
+  const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)");
+  std::smatch match;
+  const std::string pdfsubver = pdfSubtypeVersion->toStr();
+  PDFSubtypeConformance pdfConf = subtypeConfNone;
+
+  // match contains the PDF conformance (A, B, G, N, P, PG or U)
+  if (std::regex_search(pdfsubver, match, regex)) {
+    GooString *conf = new GooString(match.str(1));
+    // Convert to lowercase as the conformance may appear in both cases
+    conf->lowerCase();
+    if (conf->cmp("a")==0) {
+      pdfConf = subtypeConfA;
+    } else if (conf->cmp("b")==0) {
+      pdfConf = subtypeConfB;
+    } else if (conf->cmp("g")==0) {
+      pdfConf = subtypeConfG;
+    } else if (conf->cmp("n")==0) {
+      pdfConf = subtypeConfN;
+    } else if (conf->cmp("p")==0) {
+      pdfConf = subtypeConfP;
+    } else if (conf->cmp("pg")==0) {
+      pdfConf = subtypeConfPG;
+    } else if (conf->cmp("u")==0) {
+      pdfConf = subtypeConfU;
+    } else {
+      pdfConf = subtypeConfNone;
+    }
+    delete conf;
+  }
+
+  return pdfConf;
+}
+
+void PDFDoc::extractPDFSubtype() {
+  pdfSubtype = subtypeNull;
+  pdfPart = subtypePartNull;
+  pdfConformance = subtypeConfNull;
+
+  GooString *pdfSubtypeVersion = nullptr;
+  // Find PDF InfoDict subtype key if any
+  if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) {
+    pdfSubtype = subtypePDFA;
+  } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) {
+    pdfSubtype = subtypePDFE;
+  } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) {
+    pdfSubtype = subtypePDFUA;
+  } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) {
+    pdfSubtype = subtypePDFVT;
+  } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) {
+    pdfSubtype = subtypePDFX;
+  } else {
+    pdfSubtype = subtypeNone;
+    pdfPart = subtypePartNone;
+    pdfConformance = subtypeConfNone;
+    return;
+  }
+
+  // Extract part from version string
+  pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion);
+
+  // Extract conformance from version string
+  pdfConformance = pdfConformanceFromString(pdfSubtypeVersion);
+
+  delete pdfSubtypeVersion;
+}
+
 std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets()
 {
   int num_pages = getNumPages();
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 1678d167..3353db74 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -31,6 +31,7 @@
 // Copyright (C) 2015 André Esser <bepandre at hotmail.com>
 // Copyright (C) 2016 Jakub Alba <jakubalba at gmail.com>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Evangelos Rigas <erigas at rnd2.org>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -74,6 +75,41 @@ enum PDFWriteMode {
   writeForceIncremental
 };
 
+enum PDFSubtype {
+  subtypeNull,
+  subtypePDFA,
+  subtypePDFE,
+  subtypePDFUA,
+  subtypePDFVT,
+  subtypePDFX,
+  subtypeNone
+};
+
+enum PDFSubtypePart {
+  subtypePartNull,
+  subtypePart1,
+  subtypePart2,
+  subtypePart3,
+  subtypePart4,
+  subtypePart5,
+  subtypePart6,
+  subtypePart7,
+  subtypePart8,
+  subtypePartNone
+};
+
+enum PDFSubtypeConformance {
+  subtypeConfNull,
+  subtypeConfA,
+  subtypeConfB,
+  subtypeConfG,
+  subtypeConfN,
+  subtypeConfP,
+  subtypeConfPG,
+  subtypeConfU,
+  subtypeConfNone
+};
+
 //------------------------------------------------------------------------
 // PDFDoc
 //------------------------------------------------------------------------
@@ -273,6 +309,11 @@ public:
   GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); }
   GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); }
 
+  // Return the PDF subtype, part, and conformance
+  PDFSubtype getPDFSubtype() const { return pdfSubtype; }
+  PDFSubtypePart getPDFSubtypePart() const { return pdfPart; }
+  PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; }
+
   // Return the PDF version specified by the file.
   int getPDFMajorVersion() { return pdfMajorVersion; }
   int getPDFMinorVersion() { return pdfMinorVersion; }
@@ -346,6 +387,8 @@ private:
   GBool checkFooter();
   void checkHeader();
   GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
+  void extractPDFSubtype();
+
   // Get the offset of the start xref table.
   Goffset getStartXRef(GBool tryingToReconstruct = gFalse);
   // Get the offset of the entries in the main XRef table of a
@@ -365,6 +408,9 @@ private:
   void *guiData;
   int pdfMajorVersion;
   int pdfMinorVersion;
+  PDFSubtype pdfSubtype;
+  PDFSubtypePart pdfPart;
+  PDFSubtypeConformance pdfConformance;
   Linearization *linearization;
   // linearizationState = 0: unchecked
   // linearizationState = 1: checked and valid


More information about the poppler mailing list