[poppler] utils/pdfinfo.cc

Adrián Pérez de Castro aperez at igalia.com
Tue Mar 1 20:53:08 UTC 2016


Quoting Albert Astals Cid (2016-03-01 20:29:55)
> This commit forgot to update the man page.

Good point, I'll see to post a follow-up patch with the changes for the man
page tomorrow -- today is already a bit late for me, sorry.

> 
> El Tuesday 01 March 2016, a les 04:58:43, Carlos Garcia Campos va escriure:
> >  utils/pdfinfo.cc |   96
> > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96
> > insertions(+)
> > 
> > New commits:
> > commit a8853b1df0a15570dff6ecc333769257bbf874c3
> > Author: Adrian Perez de Castro <aperez at igalia.com>
> > Date:   Thu May 9 19:11:26 2013 +0300
> > 
> >     Tagged-PDF: Modify pdfinfo to show the document structure
> > 
> >     https://bugs.freedesktop.org/show_bug.cgi?id=64816
> > 
> > diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
> > index 5a9745f..18221c2 100644
> > --- a/utils/pdfinfo.cc
> > +++ b/utils/pdfinfo.cc
> > @@ -3,6 +3,7 @@
> >  // pdfinfo.cc
> >  //
> >  // Copyright 1998-2003 Glyph & Cog, LLC
> > +// Copyright 2013 Igalia S.L.
> >  //
> >  //========================================================================
> > 
> > @@ -56,11 +57,15 @@
> >  #include "Error.h"
> >  #include "DateInfo.h"
> >  #include "JSInfo.h"
> > +#include "StructTreeRoot.h"
> > +#include "StructElement.h"
> > 
> >  static void printInfoString(Dict *infoDict, const char *key, const char
> > *text, UnicodeMap *uMap);
> >  static void printInfoDate(Dict *infoDict, const char *key, const char
> > *text); static void printBox(const char *text, PDFRectangle *box);
> > +static void printStruct(const StructElement *element, unsigned indent = 0);
> > +static void printIndent(unsigned level);
> > 
> >  static int firstPage = 1;
> >  static int lastPage = 0;
> > @@ -74,6 +79,8 @@ static char userPassword[33] = "\001";
> >  static GBool printVersion = gFalse;
> >  static GBool printHelp = gFalse;
> >  static GBool printEnc = gFalse;
> > +static GBool printStructure = gFalse;
> > +static GBool printStructureText = gFalse;
> > 
> >  static const ArgDesc argDesc[] = {
> >    {"-f",      argInt,      &firstPage,        0,
> > @@ -86,6 +93,10 @@ static const ArgDesc argDesc[] = {
> >     "print the document metadata (XML)"},
> >    {"-js",     argFlag,     &printJS,          0,
> >     "print all JavaScript in the PDF"},
> > +  {"-struct", argFlag,     &printStructure,   0,
> > +   "print the logical document structure (for tagged files)"},
> > +  {"-struct-text", argFlag, &printStructureText, 0,
> > +   "print text contents along with document structure (for tagged files)"},
> > {"-rawdates", argFlag,   &rawDates,         0,
> >     "print the undecoded date strings directly from the PDF file"},
> >    {"-enc",    argString,   textEncName,    sizeof(textEncName),
> > @@ -142,6 +153,9 @@ int main(int argc, char *argv[]) {
> >      goto err0;
> >    }
> > 
> > +  if (printStructureText)
> > +    printStructure = gTrue;
> > +
> >    // read config file
> >    globalParams = new GlobalParams();
> > 
> > @@ -401,6 +415,15 @@ int main(int argc, char *argv[]) {
> >      jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
> >    }
> > 
> > +  // print the structure
> > +  const StructTreeRoot *structTree;
> > +  if (printStructure && (structTree =
> > doc->getCatalog()->getStructTreeRoot())) { +    fputs("Structure:\n",
> > stdout);
> > +    for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
> > +      printStruct(structTree->getChild(i), 1);
> > +    }
> > +  }
> > +
> >    exitCode = 0;
> > 
> >    // clean up
> > @@ -481,3 +504,76 @@ static void printBox(const char *text, PDFRectangle
> > *box) { printf("%s%8.2f %8.2f %8.2f %8.2f\n",
> >        text, box->x1, box->y1, box->x2, box->y2);
> >  }
> > +
> > +static void printIndent(unsigned indent) {
> > +  while (indent--) {
> > +    putchar(' ');
> > +    putchar(' ');
> > +  }
> > +}
> > +
> > +static void printAttribute(const Attribute *attribute, unsigned indent)
> > +{
> > +  printIndent(indent);
> > +  printf(" /%s ", attribute->getTypeName());
> > +  if (attribute->getType() == Attribute::UserProperty) {
> > +    GooString *name = attribute->getName();
> > +    printf("(%s) ", name->getCString());
> > +    delete name;
> > +  }
> > +  attribute->getValue()->print(stdout);
> > +  if (attribute->getFormattedValue()) {
> > +    printf(" \"%s\"", attribute->getFormattedValue());
> > +  }
> > +  if (attribute->isHidden()) {
> > +    printf(" [hidden]");
> > +  }
> > +}
> > +
> > +static void printStruct(const StructElement *element, unsigned indent) {
> > +  if (element->isObjectRef()) {
> > +    printIndent(indent);
> > +    printf("Object %i %i\n", element->getObjectRef().num,
> > element->getObjectRef().gen); +    return;
> > +  }
> > +
> > +  if (printStructureText && element->isContent()) {
> > +    GooString *text = element->getText(gFalse);
> > +    printIndent(indent);
> > +    if (text) {
> > +      printf("\"%s\"\n", text->getCString());
> > +    } else {
> > +      printf("(No content?)\n");
> > +    }
> > +    delete text;
> > +  }
> > +
> > +  if (!element->isContent()) {
> > +      printIndent(indent);
> > +      printf("%s", element->getTypeName());
> > +      if (element->getID()) {
> > +          printf(" <%s>", element->getID()->getCString());
> > +      }
> > +      if (element->getTitle()) {
> > +          printf(" \"%s\"", element->getTitle()->getCString());
> > +      }
> > +      if (element->getRevision() > 0) {
> > +          printf(" r%u", element->getRevision());
> > +      }
> > +      if (element->isInline() || element->isBlock()) {
> > +          printf(" (%s)", element->isInline() ? "inline" : "block");
> > +      }
> > +      if (element->getNumAttributes()) {
> > +          putchar(':');
> > +          for (unsigned i = 0; i < element->getNumAttributes(); i++) {
> > +              putchar('\n');
> > +              printAttribute(element->getAttribute(i), indent + 1);
> > +          }
> > +      }
> > +
> > +      putchar('\n');
> > +      for (unsigned i = 0; i < element->getNumChildren(); i++) {
> > +          printStruct(element->getChild(i), indent + 1);
> > +      }
> > +  }
> > +}
> > _______________________________________________
> > poppler mailing list
> > poppler at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/poppler
> 
> _______________________________________________
> poppler mailing list
> poppler at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/poppler

--
 ⌨ Adrian

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: signature
URL: <https://lists.freedesktop.org/archives/poppler/attachments/20160301/c9555b9f/attachment-0001.sig>


More information about the poppler mailing list