[poppler] utils/pdfinfo.cc
Adrián Pérez de Castro
aperez at igalia.com
Tue Mar 1 20:53:08 UTC 2016
Quoting Albert Astals Cid (2016-03-01 20:29:55)
> This commit forgot to update the man page.
Good point, I'll see to post a follow-up patch with the changes for the man
page tomorrow -- today is already a bit late for me, sorry.
>
> El Tuesday 01 March 2016, a les 04:58:43, Carlos Garcia Campos va escriure:
> > utils/pdfinfo.cc | 96
> > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96
> > insertions(+)
> >
> > New commits:
> > commit a8853b1df0a15570dff6ecc333769257bbf874c3
> > Author: Adrian Perez de Castro <aperez at igalia.com>
> > Date: Thu May 9 19:11:26 2013 +0300
> >
> > Tagged-PDF: Modify pdfinfo to show the document structure
> >
> > https://bugs.freedesktop.org/show_bug.cgi?id=64816
> >
> > diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
> > index 5a9745f..18221c2 100644
> > --- a/utils/pdfinfo.cc
> > +++ b/utils/pdfinfo.cc
> > @@ -3,6 +3,7 @@
> > // pdfinfo.cc
> > //
> > // Copyright 1998-2003 Glyph & Cog, LLC
> > +// Copyright 2013 Igalia S.L.
> > //
> > //========================================================================
> >
> > @@ -56,11 +57,15 @@
> > #include "Error.h"
> > #include "DateInfo.h"
> > #include "JSInfo.h"
> > +#include "StructTreeRoot.h"
> > +#include "StructElement.h"
> >
> > static void printInfoString(Dict *infoDict, const char *key, const char
> > *text, UnicodeMap *uMap);
> > static void printInfoDate(Dict *infoDict, const char *key, const char
> > *text); static void printBox(const char *text, PDFRectangle *box);
> > +static void printStruct(const StructElement *element, unsigned indent = 0);
> > +static void printIndent(unsigned level);
> >
> > static int firstPage = 1;
> > static int lastPage = 0;
> > @@ -74,6 +79,8 @@ static char userPassword[33] = "\001";
> > static GBool printVersion = gFalse;
> > static GBool printHelp = gFalse;
> > static GBool printEnc = gFalse;
> > +static GBool printStructure = gFalse;
> > +static GBool printStructureText = gFalse;
> >
> > static const ArgDesc argDesc[] = {
> > {"-f", argInt, &firstPage, 0,
> > @@ -86,6 +93,10 @@ static const ArgDesc argDesc[] = {
> > "print the document metadata (XML)"},
> > {"-js", argFlag, &printJS, 0,
> > "print all JavaScript in the PDF"},
> > + {"-struct", argFlag, &printStructure, 0,
> > + "print the logical document structure (for tagged files)"},
> > + {"-struct-text", argFlag, &printStructureText, 0,
> > + "print text contents along with document structure (for tagged files)"},
> > {"-rawdates", argFlag, &rawDates, 0,
> > "print the undecoded date strings directly from the PDF file"},
> > {"-enc", argString, textEncName, sizeof(textEncName),
> > @@ -142,6 +153,9 @@ int main(int argc, char *argv[]) {
> > goto err0;
> > }
> >
> > + if (printStructureText)
> > + printStructure = gTrue;
> > +
> > // read config file
> > globalParams = new GlobalParams();
> >
> > @@ -401,6 +415,15 @@ int main(int argc, char *argv[]) {
> > jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
> > }
> >
> > + // print the structure
> > + const StructTreeRoot *structTree;
> > + if (printStructure && (structTree =
> > doc->getCatalog()->getStructTreeRoot())) { + fputs("Structure:\n",
> > stdout);
> > + for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
> > + printStruct(structTree->getChild(i), 1);
> > + }
> > + }
> > +
> > exitCode = 0;
> >
> > // clean up
> > @@ -481,3 +504,76 @@ static void printBox(const char *text, PDFRectangle
> > *box) { printf("%s%8.2f %8.2f %8.2f %8.2f\n",
> > text, box->x1, box->y1, box->x2, box->y2);
> > }
> > +
> > +static void printIndent(unsigned indent) {
> > + while (indent--) {
> > + putchar(' ');
> > + putchar(' ');
> > + }
> > +}
> > +
> > +static void printAttribute(const Attribute *attribute, unsigned indent)
> > +{
> > + printIndent(indent);
> > + printf(" /%s ", attribute->getTypeName());
> > + if (attribute->getType() == Attribute::UserProperty) {
> > + GooString *name = attribute->getName();
> > + printf("(%s) ", name->getCString());
> > + delete name;
> > + }
> > + attribute->getValue()->print(stdout);
> > + if (attribute->getFormattedValue()) {
> > + printf(" \"%s\"", attribute->getFormattedValue());
> > + }
> > + if (attribute->isHidden()) {
> > + printf(" [hidden]");
> > + }
> > +}
> > +
> > +static void printStruct(const StructElement *element, unsigned indent) {
> > + if (element->isObjectRef()) {
> > + printIndent(indent);
> > + printf("Object %i %i\n", element->getObjectRef().num,
> > element->getObjectRef().gen); + return;
> > + }
> > +
> > + if (printStructureText && element->isContent()) {
> > + GooString *text = element->getText(gFalse);
> > + printIndent(indent);
> > + if (text) {
> > + printf("\"%s\"\n", text->getCString());
> > + } else {
> > + printf("(No content?)\n");
> > + }
> > + delete text;
> > + }
> > +
> > + if (!element->isContent()) {
> > + printIndent(indent);
> > + printf("%s", element->getTypeName());
> > + if (element->getID()) {
> > + printf(" <%s>", element->getID()->getCString());
> > + }
> > + if (element->getTitle()) {
> > + printf(" \"%s\"", element->getTitle()->getCString());
> > + }
> > + if (element->getRevision() > 0) {
> > + printf(" r%u", element->getRevision());
> > + }
> > + if (element->isInline() || element->isBlock()) {
> > + printf(" (%s)", element->isInline() ? "inline" : "block");
> > + }
> > + if (element->getNumAttributes()) {
> > + putchar(':');
> > + for (unsigned i = 0; i < element->getNumAttributes(); i++) {
> > + putchar('\n');
> > + printAttribute(element->getAttribute(i), indent + 1);
> > + }
> > + }
> > +
> > + putchar('\n');
> > + for (unsigned i = 0; i < element->getNumChildren(); i++) {
> > + printStruct(element->getChild(i), indent + 1);
> > + }
> > + }
> > +}
> > _______________________________________________
> > poppler mailing list
> > poppler at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/poppler
>
> _______________________________________________
> poppler mailing list
> poppler at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/poppler
--
⌨ Adrian
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: signature
URL: <https://lists.freedesktop.org/archives/poppler/attachments/20160301/c9555b9f/attachment-0001.sig>
More information about the poppler
mailing list