[poppler] 2 commits - poppler/PDFDoc.cc poppler/PDFDoc.h utils/pdfinfo.cc
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Sat Sep 1 21:50:55 UTC 2018
poppler/PDFDoc.cc | 132 +++++++++++++++++++++++++++++
poppler/PDFDoc.h | 46 ++++++++++
utils/pdfinfo.cc | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 416 insertions(+)
New commits:
commit 45f0f6d21d51c0408fe1d876f18ef05489e69bc0
Author: Evangelos Rigas <erigas at rnd2.org>
Date: Mon Aug 6 10:57:47 2018 +0100
[utils] Add PDF subtype to pdfinfo
If the document is compliant with PDF A, E, VT, UA or X standard
print PDF subtype version, title, subtitle and explain the part
and conformance levels.
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index 50042393..91423ebd 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -24,6 +24,7 @@
// Copyright (C) 2013 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Adam Reichold <adam.reichold at t-online.de>
+// Copyright (C) 2018 Evangelos Rigas <erigas at rnd2.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -424,6 +425,241 @@ static void printDestinations(PDFDoc *doc, UnicodeMap *uMap) {
}
}
+static void printPdfSubtype(PDFDoc *doc, UnicodeMap *uMap) {
+ const Object info = doc->getDocInfo();
+ if (info.isDict()) {
+ const PDFSubtype pdftype = doc->getPDFSubtype();
+
+ if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) {
+ return;
+ }
+
+ std::unique_ptr<GooString> part;
+ std::unique_ptr<GooString> abbr;
+ std::unique_ptr<GooString> standard;
+ std::unique_ptr<GooString> typeExp;
+ std::unique_ptr<GooString> confExp;
+
+ // Form title from PDFSubtype
+ switch (pdftype)
+ {
+ case subtypePDFA:
+ printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype: ", uMap);
+ typeExp.reset( new GooString("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)") );
+ standard.reset( new GooString("ISO 19005") );
+ abbr.reset( new GooString("PDF/A") );
+ break;
+ case subtypePDFE:
+ printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype: ", uMap);
+ typeExp.reset( new GooString("ISO 24517 - Engineering document format using PDF (PDF/E)") );
+ standard.reset( new GooString("ISO 24517") );
+ abbr.reset( new GooString("PDF/E") );
+ break;
+ case subtypePDFUA:
+ printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype: ", uMap);
+ typeExp.reset( new GooString("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)") );
+ standard.reset( new GooString("ISO 14289") );
+ abbr.reset( new GooString("PDF/UA") );
+ break;
+ case subtypePDFVT:
+ printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype: ", uMap);
+ typeExp.reset( new GooString("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)") );
+ standard.reset( new GooString("ISO 16612") );
+ abbr.reset( new GooString("PDF/VT") );
+ break;
+ case subtypePDFX:
+ printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype: ", uMap);
+ typeExp.reset( new GooString("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)") );
+ standard.reset( new GooString("ISO 15930") );
+ abbr.reset( new GooString("PDF/X") );
+ break;
+ case subtypeNone:
+ case subtypeNull:
+ default:
+ return;
+ }
+
+ // Form the abbreviation from PDFSubtypePart and PDFSubtype
+ const PDFSubtypePart subpart = doc->getPDFSubtypePart();
+ switch (pdftype) {
+ case subtypePDFX:
+ switch (subpart) {
+ case subtypePart1:
+ abbr->append("-1:2001");
+ break;
+ case subtypePart2:
+ abbr->append("-2");
+ break;
+ case subtypePart3:
+ abbr->append("-3:2002");
+ break;
+ case subtypePart4:
+ abbr->append("-1:2003");
+ break;
+ case subtypePart5:
+ abbr->append("-2");
+ break;
+ case subtypePart6:
+ abbr->append("-3:2003");
+ break;
+ case subtypePart7:
+ abbr->append("-4");
+ break;
+ case subtypePart8:
+ abbr->append("-5");
+ break;
+ default:
+ break;
+ }
+ break;
+ case subtypeNone:
+ case subtypeNull:
+ break;
+ default:
+ abbr->appendf("-{0:d}", subpart);
+ break;
+ }
+
+ // Form standard from PDFSubtypePart
+ switch (subpart) {
+ case subtypePartNone:
+ case subtypePartNull:
+ break;
+ default:
+ standard->appendf("-{0:d}", subpart);
+ break;
+ }
+
+ // Form the subtitle from PDFSubtypePart and PDFSubtype
+ switch (pdftype) {
+ case subtypePDFA:
+ switch (subpart) {
+ case subtypePart1:
+ part.reset( new GooString("Use of PDF 1.4") );
+ break;
+ case subtypePart2:
+ part.reset( new GooString("Use of ISO 32000-1") );
+ break;
+ case subtypePart3:
+ part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") );
+ break;
+ default:
+ break;
+ }
+ break;
+ case subtypePDFE:
+ switch (subpart) {
+ case subtypePart1:
+ part.reset( new GooString("Use of PDF 1.6") );
+ break;
+ default:
+ break;
+ }
+ break;
+ case subtypePDFUA:
+ switch (subpart) {
+ case subtypePart1:
+ part.reset( new GooString("Use of ISO 32000-1") );
+ break;
+ case subtypePart2:
+ part.reset( new GooString("Use of ISO 32000-2") );
+ break;
+ case subtypePart3:
+ part.reset( new GooString("Use of ISO 32000-1 with support for embedded files") );
+ break;
+ default:
+ break;
+ }
+ break;
+ case subtypePDFVT:
+ switch (subpart) {
+ case subtypePart1:
+ part.reset( new GooString("Using PPML 2.1 and PDF 1.4") );
+ break;
+ case subtypePart2:
+ part.reset( new GooString("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)") );
+ break;
+ case subtypePart3:
+ part.reset( new GooString("Using PDF/X-6 (PDF/VT-3)") );
+ break;
+ default:
+ break;
+ }
+ break;
+ case subtypePDFX:
+ switch (subpart) {
+ case subtypePart1:
+ part.reset( new GooString("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)") );
+ break;
+ case subtypePart3:
+ part.reset( new GooString("Complete exchange suitable for colour-managed workflows (PDF/X-3)") );
+ break;
+ case subtypePart4:
+ part.reset( new GooString("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)") );
+ break;
+ case subtypePart5:
+ part.reset( new GooString("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]") );
+ break;
+ case subtypePart6:
+ part.reset( new GooString("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)") );
+ break;
+ case subtypePart7:
+ part.reset( new GooString("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6") );
+ break;
+ case subtypePart8:
+ part.reset( new GooString("Partial exchange of printing data using PDF 1.6 (PDF/X-5)") );
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // Form Conformance explanation from PDFSubtypeConformance
+ switch (doc->getPDFSubtypeConformance())
+ {
+ case subtypeConfA:
+ confExp.reset( new GooString("Level A, Accessible") );
+ break;
+ case subtypeConfB:
+ confExp.reset( new GooString("Level B, Basic") );
+ break;
+ case subtypeConfG:
+ confExp.reset( new GooString("Level G, External graphical content") );
+ break;
+ case subtypeConfN:
+ confExp.reset( new GooString("Level N, External ICC profile") );
+ break;
+ case subtypeConfP:
+ confExp.reset( new GooString("Level P, Embedded ICC profile") );
+ break;
+ case subtypeConfPG:
+ confExp.reset( new GooString("Level PG, Embedded ICC profile and external graphical content") );
+ break;
+ case subtypeConfU:
+ confExp.reset( new GooString("Level U, Unicode support") );
+ break;
+ case subtypeConfNone:
+ case subtypeConfNull:
+ default:
+ confExp.reset();
+ break;
+ }
+
+ printf(" Title: %s\n",typeExp->getCString());
+ printf(" Abbreviation: %s\n", abbr->getCString());
+ if (part.get())
+ printf(" Subtitle: Part %d: %s\n", subpart, part->getCString());
+ else
+ printf(" Subtitle: Part %d\n", subpart);
+ printf(" Standard: %s-%d\n", typeExp->toStr().substr(0,9).c_str(), subpart);
+ if (confExp.get())
+ printf(" Conformance: %s\n", confExp->getCString());
+ }
+}
+
static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool multiPage) {
Page *page;
char buf[256];
@@ -596,6 +832,8 @@ static void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool m
// print PDF version
printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
+
+ printPdfSubtype(doc, uMap);
}
int main(int argc, char *argv[]) {
commit 98d1b3dcc2c0530c12fb4422067c529ab375c680
Author: Evangelos Rigas <erigas at rnd2.org>
Date: Wed Aug 22 10:51:12 2018 +0300
[core] Add support for PDF subtype property
Parse /GTS_PDF(A,E,UA,VT,X)Version from the PDF Information
Dictionary into three enums: PDFSubtype, PDFSubtypePart, and
PDFSubtypeConformance.
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 0ee0b50e..cb8fd0d7 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -40,6 +40,7 @@
// Copyright (C) 2018 Ben Timby <btimby at gmail.com>
// Copyright (C) 2018 Evangelos Foutras <evangelos at foutrelis.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Evangelos Rigas <erigas at rnd2.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -62,6 +63,7 @@
#include <stddef.h>
#include <string.h>
#include <time.h>
+#include <regex>
#include <sys/stat.h>
#include "goo/glibc.h"
#include "goo/gstrtod.h"
@@ -318,6 +320,9 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
}
}
+ // Extract PDF Subtype information
+ extractPDFSubtype();
+
// done
return gTrue;
}
@@ -482,6 +487,133 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword)
return ret;
}
+static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, GooString *pdfSubtypeVersion) {
+ const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?");
+ std::smatch match;
+ std::string pdfsubver = pdfSubtypeVersion->toStr();
+ PDFSubtypePart subtypePart = subtypePartNone;
+
+ if (std::regex_search(pdfsubver, match, regex)) {
+ int date = 0;
+ const int part = std::stoi(match.str(1));
+
+ if (match[2].matched) {
+ date = std::stoi(match.str(2));
+ }
+
+ switch (subtype) {
+ case subtypePDFX:
+ switch (part) {
+ case 1:
+ switch (date) {
+ case 2001:
+ default:
+ subtypePart = subtypePart1;
+ break;
+ case 2003:
+ subtypePart = subtypePart4;
+ break;
+ }
+ break;
+ case 2:
+ subtypePart = subtypePart5;
+ break;
+ case 3:
+ switch (date) {
+ case 2002:
+ default:
+ subtypePart = subtypePart3;
+ break;
+ case 2003:
+ subtypePart = subtypePart6;
+ break;
+ }
+ break;
+ case 4:
+ subtypePart = subtypePart7;
+ break;
+ case 5:
+ subtypePart = subtypePart8;
+ break;
+ }
+ break;
+ default:
+ subtypePart = (PDFSubtypePart)part;
+ break;
+
+ }
+ }
+
+ return subtypePart;
+}
+
+static PDFSubtypeConformance pdfConformanceFromString(GooString *pdfSubtypeVersion) {
+ const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)");
+ std::smatch match;
+ const std::string pdfsubver = pdfSubtypeVersion->toStr();
+ PDFSubtypeConformance pdfConf = subtypeConfNone;
+
+ // match contains the PDF conformance (A, B, G, N, P, PG or U)
+ if (std::regex_search(pdfsubver, match, regex)) {
+ GooString *conf = new GooString(match.str(1));
+ // Convert to lowercase as the conformance may appear in both cases
+ conf->lowerCase();
+ if (conf->cmp("a")==0) {
+ pdfConf = subtypeConfA;
+ } else if (conf->cmp("b")==0) {
+ pdfConf = subtypeConfB;
+ } else if (conf->cmp("g")==0) {
+ pdfConf = subtypeConfG;
+ } else if (conf->cmp("n")==0) {
+ pdfConf = subtypeConfN;
+ } else if (conf->cmp("p")==0) {
+ pdfConf = subtypeConfP;
+ } else if (conf->cmp("pg")==0) {
+ pdfConf = subtypeConfPG;
+ } else if (conf->cmp("u")==0) {
+ pdfConf = subtypeConfU;
+ } else {
+ pdfConf = subtypeConfNone;
+ }
+ delete conf;
+ }
+
+ return pdfConf;
+}
+
+void PDFDoc::extractPDFSubtype() {
+ pdfSubtype = subtypeNull;
+ pdfPart = subtypePartNull;
+ pdfConformance = subtypeConfNull;
+
+ GooString *pdfSubtypeVersion = nullptr;
+ // Find PDF InfoDict subtype key if any
+ if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) {
+ pdfSubtype = subtypePDFA;
+ } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) {
+ pdfSubtype = subtypePDFE;
+ } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) {
+ pdfSubtype = subtypePDFUA;
+ } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) {
+ pdfSubtype = subtypePDFVT;
+ } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) {
+ pdfSubtype = subtypePDFX;
+ } else {
+ pdfSubtype = subtypeNone;
+ pdfPart = subtypePartNone;
+ pdfConformance = subtypeConfNone;
+ return;
+ }
+
+ // Extract part from version string
+ pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion);
+
+ // Extract conformance from version string
+ pdfConformance = pdfConformanceFromString(pdfSubtypeVersion);
+
+ delete pdfSubtypeVersion;
+}
+
std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets()
{
int num_pages = getNumPages();
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 1678d167..3353db74 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -31,6 +31,7 @@
// Copyright (C) 2015 André Esser <bepandre at hotmail.com>
// Copyright (C) 2016 Jakub Alba <jakubalba at gmail.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info at kdab.com>. Work sponsored by the LiMux project of the city of Munich
+// Copyright (C) 2018 Evangelos Rigas <erigas at rnd2.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -74,6 +75,41 @@ enum PDFWriteMode {
writeForceIncremental
};
+enum PDFSubtype {
+ subtypeNull,
+ subtypePDFA,
+ subtypePDFE,
+ subtypePDFUA,
+ subtypePDFVT,
+ subtypePDFX,
+ subtypeNone
+};
+
+enum PDFSubtypePart {
+ subtypePartNull,
+ subtypePart1,
+ subtypePart2,
+ subtypePart3,
+ subtypePart4,
+ subtypePart5,
+ subtypePart6,
+ subtypePart7,
+ subtypePart8,
+ subtypePartNone
+};
+
+enum PDFSubtypeConformance {
+ subtypeConfNull,
+ subtypeConfA,
+ subtypeConfB,
+ subtypeConfG,
+ subtypeConfN,
+ subtypeConfP,
+ subtypeConfPG,
+ subtypeConfU,
+ subtypeConfNone
+};
+
//------------------------------------------------------------------------
// PDFDoc
//------------------------------------------------------------------------
@@ -273,6 +309,11 @@ public:
GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); }
GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); }
+ // Return the PDF subtype, part, and conformance
+ PDFSubtype getPDFSubtype() const { return pdfSubtype; }
+ PDFSubtypePart getPDFSubtypePart() const { return pdfPart; }
+ PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; }
+
// Return the PDF version specified by the file.
int getPDFMajorVersion() { return pdfMajorVersion; }
int getPDFMinorVersion() { return pdfMinorVersion; }
@@ -346,6 +387,8 @@ private:
GBool checkFooter();
void checkHeader();
GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
+ void extractPDFSubtype();
+
// Get the offset of the start xref table.
Goffset getStartXRef(GBool tryingToReconstruct = gFalse);
// Get the offset of the entries in the main XRef table of a
@@ -365,6 +408,9 @@ private:
void *guiData;
int pdfMajorVersion;
int pdfMinorVersion;
+ PDFSubtype pdfSubtype;
+ PDFSubtypePart pdfPart;
+ PDFSubtypeConformance pdfConformance;
Linearization *linearization;
// linearizationState = 0: unchecked
// linearizationState = 1: checked and valid
More information about the poppler
mailing list