[poppler] pdftoabw patch

Jeff Muizelaar jeff at infidigm.net
Thu Apr 5 15:46:39 PDT 2007


On Thu, Apr 05, 2007 at 03:40:19PM -0400, Dominic Lachowicz wrote:
> Attached is a patch to pdftoabw. It should probably be split into
> multiple patches, but it's pretty small and easy to follow. It:
> 
> *) Allows you to save to a file other than stdout
> *) Checks for error conditions when reading the PDF doc and writing the ABW 
> doc
> *) Removes dead code in pdftoabw.cc
> *) Fixes a SEGV I encountered when converting my new home's floor plan
> *) Returns proper error conditions should the conversion fail for any reason
> 
> If someone (Jeff?) could review this, I'd appreciate it. I'm in the
> process of writing an AbiWord plugin that uses pdftoabw, at least
> until I bind this to poppler's glib API.

I'm looking forward to that as it will make it easy to add to the
regression tester.

Patch review follows inline,

-Jeff

> diff -u -p -u -p -r1.1 ABWOutputDev.cc
> --- poppler/ABWOutputDev.cc	4 Apr 2007 02:42:29 -0000	1.1
> +++ poppler/ABWOutputDev.cc	5 Apr 2007 19:36:10 -0000
> @@ -652,6 +652,9 @@ void ABWOutputDev::interpretXYTree(){
>  void ABWOutputDev::ATP_recursive(xmlNodePtr N_parent){
>    xmlNodePtr N_first, N_second, N_line, N_tempCol, N_tempColset;
>    N_first  = N_parent->children;
> +  if (!N_first)
> +    return;
> +
>    N_second = N_first->next;
>    char buf[20];
>  /*

I assume this is the crash fix? It looks sort of like a bandaid. It'd be
nice to know what problem this is actually solving. i.e what about your
home's floor plan triggers this. However, If this isn't easy to
determine I'd still rather have a bandaid then nothing. Though, it would
also probably be good to add a pdf that causes the problem to the
testsuite.

> Index: utils/pdftoabw.cc
> ===================================================================
> RCS file: /cvs/poppler/poppler/utils/pdftoabw.cc,v
> retrieving revision 1.1
> diff -u -p -u -p -r1.1 pdftoabw.cc
> --- utils/pdftoabw.cc	4 Apr 2007 02:42:29 -0000	1.1
> +++ utils/pdftoabw.cc	5 Apr 2007 19:36:10 -0000
> @@ -34,22 +34,13 @@
>  #include <libxml/parser.h>
>  #include <libxml/tree.h>
>  
> -
>  static int firstPage = 1;
>  static int lastPage = 0;
> -GBool printCommands = gTrue;
> -GBool prettyPrint = gFalse;
>  static GBool printHelp = gFalse;
> -GBool stout=gFalse;
> -
> +GBool stout = gFalse;
>  static char ownerPassword[33] = "";
>  static char userPassword[33] = "";
>  
> -static GooString* getInfoString(Dict *infoDict, char *key);
> -static GooString* getInfoDate(Dict *infoDict, char *key);
> -
> -xmlDocPtr XMLdoc;
> -
>  static char textEncName[128] = "";
>  
>  static ArgDesc argDesc[] = {
> @@ -61,8 +52,6 @@ static ArgDesc argDesc[] = {
>     "print usage information"},
>    {"--help",   argFlag,     &printHelp,     0,
>     "print usage information"},
> -  {"--format",   argFlag,     &prettyPrint,     0,
> -   "print usage information"},
>    {"--stdout"  ,argFlag,    &stout,         0,
>     "use standard output"},
>    {"--opw",    argString,   ownerPassword,  sizeof(ownerPassword),
> @@ -75,64 +64,64 @@ static ArgDesc argDesc[] = {
>  int main(int argc, char *argv[]) {
>    PDFDoc *doc = NULL;
>    GooString *fileName = NULL;
> -  GooString *docTitle = NULL;
> -  GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL;
> -  GooString *htmlFileName = NULL;
> -  GooString *psFileName = NULL;
> -  ABWOutputDev *htmlOut = NULL;
> -  PSOutputDev *psOut = NULL;
> +  GooString *abwFileName = NULL;
> +  ABWOutputDev *abwOut = NULL;
>    GBool ok;
> -  char *p;
> -  char extension[16] = "png";
>    GooString *ownerPW, *userPW;
>    Object info;
> +
> +  int result = 0;
>    
>    char * outpName;
> +  xmlDocPtr XMLdoc;
>  
>    // parse args
>    parseArgs(argDesc, &argc, argv);
>    globalParams = new GlobalParams();
>  
>    fileName = new GooString(argv[1]);
> -  /*
> -  if (stout){*/
> +  if (stout || (argc < 2)){
>      outpName = "-";
> -/*  }
> +  }
>    else {
> -    //FIXME: add outputfilename stuff
> +    outpName = argv[2];
>    }
> -  */
> +
>    doc = new PDFDoc(fileName);
> -  XMLdoc = xmlNewDoc(BAD_CAST "1.0");
> -  htmlOut = new ABWOutputDev(XMLdoc);
> -  htmlOut->setPDFDoc(doc);
> -  /* check for copy permission
> -  if (!doc->okToCopy()) {
> -    error(-1, "Copying of text from this document is not allowed.");
> -    goto error;
> -  }*/

This check for copy permission should probably be uncommented instead of
removed. Poppler's stance on the permissions is that user programs
should follow them but API should not. So, if abiword to use the glib
bindings it would be up to abiword to enforce this.

> +  if (!doc)
> +    {
> +      fprintf (stderr, "Error opening PDF %s", fileName);
> +      return 1;
> +    }
>  
> -  // write text file
> +  XMLdoc = xmlNewDoc(BAD_CAST "1.0");
> +  abwOut = new ABWOutputDev(XMLdoc);
> +  abwOut->setPDFDoc(doc);
>  
> -  if (lastPage == 0) lastPage = doc->getNumPages();
> +  if (lastPage == 0 || lastPage > doc->getNumPages ()) lastPage = doc->getNumPages();
> +  if (firstPage < 1) firstPage = 1;
>  
> -  if (htmlOut->isOk())
> +  if (abwOut->isOk())
>    {
> -    doc->displayPages(htmlOut, 1, lastPage, 72, 72, 0, gTrue, gFalse, gFalse);
> -		htmlOut->createABW();
> +    doc->displayPages(abwOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse);
> +    abwOut->createABW();
>    }
> -  xmlSaveFormatFileEnc(outpName, XMLdoc, "UTF-8", 1);
> +
> +  if (-1 == xmlSaveFormatFileEnc(outpName, XMLdoc, "UTF-8", 1))
> +    {
> +      fprintf (stderr, "Error saving to %s\n", outpName);
> +      result = 1;
> +    }

if (xmlSaveFormatFileEnc(outpName, XMLdoc, "UTF-8", 1) == -1)

is the prefered style for poppler.

> +
>    // clean up
> - error:
>    if(globalParams) delete globalParams;
> -  //if(fileName) delete fileName;
>    if(doc) delete doc;
>    if(XMLdoc) xmlFreeDoc(XMLdoc);
> -  if(htmlOut) delete htmlOut;
> +  if(abwOut) delete abwOut;
>    
>    // check for memory leaks
>    Object::memCheck(stderr);
>    gMemReport(stderr);
>  
> -  return 0;
> +  return result;
>  }
> 
> > _______________________________________________
> > poppler mailing list
> > poppler at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/poppler
> 


More information about the poppler mailing list