[poppler] Linearization support

Hib Eris hib at hiberis.nl
Tue Sep 21 12:30:29 PDT 2010


Hi Albert,

On Thu, Sep 9, 2010 at 7:02 PM, Albert Astals Cid <aacid at kde.org> wrote:
> A Dijous, 9 de setembre de 2010, Hib Eris va escriure:
>> Hi Albert et al,
>>
>> On Sun, Sep 5, 2010 at 5:02 PM, Albert Astals Cid <aacid at kde.org> wrote:
>> > A Dimecres, 11 d'agost de 2010, Hib Eris va escriure:
>> >> Hi again,
>> >>
>> >> On Wed, Aug 4, 2010 at 11:47 PM, Hib Eris <hib at hiberis.nl> wrote:
>> >> > Hi all,
>> >> >
>> >> > On Sat, Jun 12, 2010 at 12:37 PM, Hib Eris <hib at hiberis.nl> wrote:
>> >> >> Hi all,
>> >> >>
>> >> >> Now that 0.14.0 is out and feature freeze is over, I have updated my
>> >> >> linearization patches
>> >> >> (see
>> >> >> http://lists.freedesktop.org/archives/poppler/2010-April/005760.html)
>> >> >> to current master.
>> >> >>
>> >> >> Any comments on it are very welcome.
>> >> >
>> >> > I have updated my patches again as they no longer applied to current
>> >> > git master. I have also fixed some errors I found with test documents.
>> >> > I would appreciate it if anyone could test these patches against other
>> >> > PDF documents.
>> >>
>> >> I found out that I was leaking some memory with these patches, so here
>> >> is another update. Sorry for all the noise.
>> >
>> > Hi Hib, the patches do not apply cleanly anymore (my fault for taking so
>> > much to review), could you please rebase against the current master?
>> >
>> > Thanks and sorry,
>> >  Albert
>>
>> I have updated the patches. I am very curious to know if they pass
>> your regression tests.
>
> Found a regression already, will mail you the file in private as it's 3.1 MB.

Thank you for running the regression tests. I have updated my patches
again to pass the document you send me. Please test them again if you
can find some time.

Hib Eris
-------------- next part --------------
From c92b03b890e02f85499692fcb1d2469391bf8d88 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 6 Apr 2010 19:24:42 +0200
Subject: [PATCH 01/12] Cleanup XRef constructors

---
 poppler/XRef.cc |   14 ++++++--------
 poppler/XRef.h  |    1 +
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 0cd4be0..ceb8efe 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -258,7 +258,7 @@ Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
 // XRef
 //------------------------------------------------------------------------
 
-XRef::XRef() {
+void XRef::init() {
   ok = gTrue;
   errCode = errNone;
   entries = NULL;
@@ -268,17 +268,15 @@ XRef::XRef() {
   objStrs = new PopplerCache(5);
 }
 
+XRef::XRef() {
+  init();
+}
+
 XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   Guint pos;
   Object obj;
 
-  ok = gTrue;
-  errCode = errNone;
-  size = 0;
-  entries = NULL;
-  streamEnds = NULL;
-  streamEndsLen = 0;
-  objStrs = new PopplerCache(5);
+  init();
 
   encrypted = gFalse;
   permFlags = defPermFlags;
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 1f4ec6a..f18fa0e 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -157,6 +157,7 @@ private:
   Guchar fileKey[16];		// file decryption key
   GBool ownerPasswordOk;	// true if owner password is correct
 
+  void init();
   Guint getStartXref();
   GBool readXRef(Guint *pos, GooVector<Guint> *followedXRefStm);
   GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
-- 
1.7.0.4


From fa63cc0bfdfaa72075f68a61321df27b6657ced3 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 6 Apr 2010 19:16:45 +0200
Subject: [PATCH 02/12] Create no more XRef entries than specified

---
 poppler/XRef.cc |  136 ++++++++++++++++++++++++++++---------------------------
 poppler/XRef.h  |    5 ++-
 2 files changed, 73 insertions(+), 68 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index ceb8efe..59b0640 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -262,6 +262,7 @@ void XRef::init() {
   ok = gTrue;
   errCode = errNone;
   entries = NULL;
+  capacity = 0;
   size = 0;
   streamEnds = NULL;
   streamEndsLen = 0;
@@ -351,6 +352,56 @@ XRef::~XRef() {
   }
 }
 
+int XRef::reserve(int newSize)
+{
+  if (newSize > capacity) {
+
+    int realNewSize;
+    for (realNewSize = capacity ? 2 * capacity : 1024;
+          newSize > realNewSize && realNewSize > 0;
+          realNewSize <<= 1) ;
+    if ((realNewSize < 0) ||
+        (realNewSize >= INT_MAX / (int)sizeof(XRefEntry))) {
+      return 0;
+    }
+
+    void *p = greallocn_checkoverflow(entries, realNewSize, sizeof(XRefEntry));
+    if (p == NULL) {
+      return 0;
+    }
+
+    entries = (XRefEntry *) p;
+    capacity = realNewSize;
+
+  }
+
+  return capacity;
+}
+
+int XRef::resize(int newSize)
+{
+  if (newSize > size) {
+
+    if (reserve(newSize) < newSize) return size;
+
+    for (int i = size; i < newSize; ++i) {
+      entries[i].offset = 0xffffffff;
+      entries[i].type = xrefEntryFree;
+      entries[i].obj.initNull ();
+      entries[i].updated = false;
+      entries[i].gen = 0;
+    }
+  } else {
+    for (int i = newSize; i < size; i++) {
+      entries[i].obj.free ();
+    }
+  }
+
+  size = newSize;
+
+  return size;
+}
+
 // Read the 'startxref' position.
 Guint XRef::getStartXref() {
   char buf[xrefSearchSize+1];
@@ -438,7 +489,7 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
   GBool more;
   Object obj, obj2;
   Guint pos2;
-  int first, n, newSize, i;
+  int first, n, i;
 
   while (1) {
     parser->getObj(&obj);
@@ -457,29 +508,13 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
     n = obj.getInt();
     obj.free();
     if (first < 0 || n < 0 || first + n < 0) {
-      goto err1;
+      goto err0;
     }
     if (first + n > size) {
-      for (newSize = size ? 2 * size : 1024;
-	   first + n > newSize && newSize > 0;
-	   newSize <<= 1) ;
-      if (newSize < 0) {
-	goto err1;
-      }
-      if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
+      if (resize(first + n) != first + n) {
         error(-1, "Invalid 'obj' parameters'");
-        goto err1;
+        goto err0;
       }
- 
-      entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
-      for (i = size; i < newSize; ++i) {
-	entries[i].offset = 0xffffffff;
-	entries[i].type = xrefEntryFree;
-	entries[i].obj.initNull ();
-	entries[i].updated = false;
-	entries[i].gen = 0;
-      }
-      size = newSize;
     }
     for (i = first; i < first + n; ++i) {
       if (!parser->getObj(&obj)->isInt()) {
@@ -568,6 +603,7 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
 
  err1:
   obj.free();
+ err0:
   ok = gFalse;
   return gFalse;
 }
@@ -590,19 +626,10 @@ GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
     goto err1;
   }
   if (newSize > size) {
-    if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
-      error(-1, "Invalid 'size' parameter.");
-      return gFalse;
-    }
-    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
-    for (i = size; i < newSize; ++i) {
-      entries[i].offset = 0xffffffff;
-      entries[i].type = xrefEntryFree;
-      entries[i].obj.initNull ();
-      entries[i].updated = false;
-      entries[i].gen = 0;
+    if (resize(newSize) != newSize) {
+      error(-1, "Invalid 'size' parameter");
+      goto err0;
     }
-    size = newSize;
   }
 
   if (!dict->lookupNF("W", &obj)->isArray() ||
@@ -675,31 +702,16 @@ GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
 
 GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
   Guint offset;
-  int type, gen, c, newSize, i, j;
+  int type, gen, c, i, j;
 
   if (first + n < 0) {
     return gFalse;
   }
   if (first + n > size) {
-    for (newSize = size ? 2 * size : 1024;
-	 first + n > newSize && newSize > 0;
-	 newSize <<= 1) ;
-    if (newSize < 0) {
-      return gFalse;
-    }
-    if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
-      error(-1, "Invalid 'size' inside xref table.");
+    if (resize(first + n) != size) {
+      error(-1, "Invalid 'size' inside xref table");
       return gFalse;
     }
-    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
-    for (i = size; i < newSize; ++i) {
-      entries[i].offset = 0xffffffff;
-      entries[i].type = xrefEntryFree;
-      entries[i].obj.initNull ();
-      entries[i].updated = false;
-      entries[i].gen = 0;
-    }
-    size = newSize;
   }
   for (i = first; i < first + n; ++i) {
     if (w[0] == 0) {
@@ -760,13 +772,13 @@ GBool XRef::constructXRef(GBool *wasReconstructed) {
   int newSize;
   int streamEndsSize;
   char *p;
-  int i;
   GBool gotRoot;
   char* token = NULL;
   bool oneCycle = true;
   int offset = 0;
 
   gfree(entries);
+  capacity = 0;
   size = 0;
   entries = NULL;
 
@@ -853,23 +865,10 @@ GBool XRef::constructXRef(GBool *wasReconstructed) {
 		      error(-1, "Bad object number");
 		      return gFalse;
 		    }
-		    if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
-		      error(-1, "Invalid 'obj' parameters.");
+		    if (resize(newSize) != newSize) {
+		      error(-1, "Invalid 'obj' parameters");
 		      return gFalse;
 		    }
-		    entries = (XRefEntry *)
-		        greallocn_checkoverflow(entries, newSize, sizeof(XRefEntry));
-		    if (entries == NULL) {
-		      size = 0;
-		      return gFalse;
-		    }
-		    for (i = size; i < newSize; ++i) {
-		      entries[i].offset = 0xffffffff;
-		      entries[i].type = xrefEntryFree;
-		      entries[i].obj.initNull ();
-		      entries[i].updated = false;
-		    }
-		    size = newSize;
 		  }
 		  if (entries[num].type == xrefEntryFree ||
 		      gen >= entries[num].gen) {
@@ -1158,7 +1157,10 @@ Guint XRef::strToUnsigned(char *s) {
 
 void XRef::add(int num, int gen, Guint offs, GBool used) {
   if (num >= size) {
-    entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry));
+    if (num >= capacity) {
+      entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry));
+      capacity = num + 1;
+    }
     for (int i = size; i < num + 1; ++i) {
       entries[i].offset = 0xffffffff;
       entries[i].type = xrefEntryFree;
diff --git a/poppler/XRef.h b/poppler/XRef.h
index f18fa0e..8808485 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -138,7 +138,8 @@ private:
   Guint start;			// offset in file (to allow for garbage
 				//   at beginning of file)
   XRefEntry *entries;		// xref entries
-  int size;			// size of <entries> array
+  int capacity;			// size of <entries> array
+  int size;			// number of entries
   int rootNum, rootGen;		// catalog dict
   GBool ok;			// true if xref table is valid
   int errCode;			// error code (if <ok> is false)
@@ -158,6 +159,8 @@ private:
   GBool ownerPasswordOk;	// true if owner password is correct
 
   void init();
+  int reserve(int newSize);
+  int resize(int newSize);
   Guint getStartXref();
   GBool readXRef(Guint *pos, GooVector<Guint> *followedXRefStm);
   GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
-- 
1.7.0.4


From 59e060cb12df2537669193cd650f154469a50fa0 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 28 Apr 2010 12:45:42 +0200
Subject: [PATCH 03/12] Use XRef::add() in XRef::addIndirectObject()

---
 poppler/XRef.cc |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 59b0640..919e0fa 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -1202,10 +1202,8 @@ Ref XRef::addIndirectObject (Object* o) {
   XRefEntry *e;
   if (entryIndexToUse == -1) {
     entryIndexToUse = size;
-    size++;
-    entries = (XRefEntry *)greallocn(entries, size, sizeof(XRefEntry));
+    add(entryIndexToUse, 0, 0, gFalse);
     e = &entries[entryIndexToUse];
-    e->gen = 0;
   } else {
     //reuse a free entry
     e = &entries[entryIndexToUse];
-- 
1.7.0.4


From 715008a1aaa9aa6339e8280d8a39c2b9acaa0ab3 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 14 Apr 2010 12:20:49 +0200
Subject: [PATCH 04/12] Use XRef::getEntry() to access entries

---
 poppler/XRef.cc |   49 +++++++++++++++++++++++++------------------------
 poppler/XRef.h  |    2 +-
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 919e0fa..05f6190 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -990,7 +990,7 @@ Object *XRef::fetch(int num, int gen, Object *obj) {
     goto err;
   }
 
-  e = &entries[num];
+  e = getEntry(num);
   if(!e->obj.isNull ()) { //check for updated object
     obj = e->obj.copy(obj);
     return obj;
@@ -1122,20 +1122,20 @@ GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
   return gTrue;
 }
 
-int XRef::getNumEntry(Guint offset) const
+int XRef::getNumEntry(Guint offset)
 {
   if (size > 0)
   {
     int res = 0;
-    Guint resOffset = entries[0].offset;
-    XRefEntry e;
+    Guint resOffset = getEntry(0)->offset;
+    XRefEntry *e;
     for (int i = 1; i < size; ++i)
     {
-      e = entries[i];
-      if (e.offset < offset && e.offset >= resOffset)
+      e = getEntry(i);
+      if (e->offset < offset && e->offset >= resOffset)
       {
         res = i;
-        resOffset = e.offset;
+        resOffset = e->offset;
       }
     }
     return res;
@@ -1170,7 +1170,7 @@ void XRef::add(int num, int gen, Guint offs, GBool used) {
     }
     size = num + 1;
   }
-  XRefEntry *e = &entries[num];
+  XRefEntry *e = getEntry(num);
   e->gen = gen;
   e->obj.initNull ();
   e->updated = false;
@@ -1188,25 +1188,26 @@ void XRef::setModifiedObject (Object* o, Ref r) {
     error(-1,"XRef::setModifiedObject on unknown ref: %i, %i\n", r.num, r.gen);
     return;
   }
-  entries[r.num].obj.free();
-  o->copy(&entries[r.num].obj);
-  entries[r.num].updated = true;
+  XRefEntry *e = getEntry(r.num);
+  e->obj.free();
+  o->copy(&(e->obj));
+  e->updated = true;
 }
 
 Ref XRef::addIndirectObject (Object* o) {
   int entryIndexToUse = -1;
   for (int i = 1; entryIndexToUse == -1 && i < size; ++i) {
-    if (entries[i].type == xrefEntryFree) entryIndexToUse = i;
+    if (getEntry(i)->type == xrefEntryFree) entryIndexToUse = i;
   }
 
   XRefEntry *e;
   if (entryIndexToUse == -1) {
     entryIndexToUse = size;
     add(entryIndexToUse, 0, 0, gFalse);
-    e = &entries[entryIndexToUse];
+    e = getEntry(entryIndexToUse);
   } else {
     //reuse a free entry
-    e = &entries[entryIndexToUse];
+    e = getEntry(entryIndexToUse);
     //we don't touch gen number, because it should have been 
     //incremented when the object was deleted
   }
@@ -1222,13 +1223,13 @@ Ref XRef::addIndirectObject (Object* o) {
 
 void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
   //create free entries linked-list
-  if (entries[0].gen != 65535) {
+  if (getEntry(0)->gen != 65535) {
     error(-1, "XRef::writeToFile, entry 0 of the XRef is invalid (gen != 65535)\n");
   }
   int lastFreeEntry = 0;
   for (int i=0; i<size; i++) {
-    if (entries[i].type == xrefEntryFree) {
-      entries[lastFreeEntry].offset = i;
+    if (getEntry(i)->type == xrefEntryFree) {
+      getEntry(lastFreeEntry)->offset = i;
       lastFreeEntry = i;
     }
   }
@@ -1238,10 +1239,10 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
     outStr->printf("xref\r\n");
     outStr->printf("%i %i\r\n", 0, size);
     for (int i=0; i<size; i++) {
-      XRefEntry &e = entries[i];
+      XRefEntry *e = getEntry(i);
 
-      if(e.gen > 65535) e.gen = 65535; //cap generation number to 65535 (required by PDFReference)
-      outStr->printf("%010i %05i %c\r\n", e.offset, e.gen, (e.type==xrefEntryFree)?'f':'n');
+      if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference)
+      outStr->printf("%010i %05i %c\r\n", e->offset, e->gen, (e->type==xrefEntryFree)?'f':'n');
     }
   } else {
     //write the new xref
@@ -1250,16 +1251,16 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
     while (i < size) {
       int j;
       for(j=i; j<size; j++) { //look for consecutive entries
-        if ((entries[j].type == xrefEntryFree) && (entries[j].gen == 0))
+        if ((getEntry(j)->type == xrefEntryFree) && (getEntry(j)->gen == 0))
           break;
       }
       if (j-i != 0)
       {
         outStr->printf("%i %i\r\n", i, j-i);
         for (int k=i; k<j; k++) {
-          XRefEntry &e = entries[k];
-          if(e.gen > 65535) e.gen = 65535; //cap generation number to 65535 (required by PDFReference)
-          outStr->printf("%010i %05i %c\r\n", e.offset, e.gen, (e.type==xrefEntryFree)?'f':'n');
+          XRefEntry *e = getEntry(k);
+          if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference)
+          outStr->printf("%010i %05i %c\r\n", e->offset, e->gen, (e->type==xrefEntryFree)?'f':'n');
         }
         i = j;
       }
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 8808485..260f039 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -119,7 +119,7 @@ public:
   GBool getStreamEnd(Guint streamStart, Guint *streamEnd);
 
   // Retuns the entry that belongs to the offset
-  int getNumEntry(Guint offset) const;
+  int getNumEntry(Guint offset);
 
   // Direct access.
   int getSize() { return size; }
-- 
1.7.0.4


From aa0e27ca94a11313f737bd34a585e9ccb1b2c352 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 15 Apr 2010 17:34:13 +0200
Subject: [PATCH 05/12] Read XRef table sections on demand

---
 poppler/XRef.cc |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 poppler/XRef.h  |    6 +++-
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 05f6190..ec7afa3 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -287,6 +287,7 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   str = strA;
   start = str->getStart();
   pos = getStartXref();
+  prevXRefOffset = pos;
 
   if (reconstruct && !(ok = constructXRef(wasReconstructed)))
   {
@@ -297,7 +298,7 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   {
     // if there was a problem with the 'startxref' position, try to
     // reconstruct the xref table
-    if (pos == 0) {
+    if (prevXRefOffset == 0) {
       if (!(ok = constructXRef(wasReconstructed))) {
         errCode = errDamaged;
         return;
@@ -306,7 +307,7 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
     // read the xref table
     } else {
       GooVector<Guint> followedXRefStm;
-      while (readXRef(&pos, &followedXRefStm)) ;
+      readXRef(&prevXRefOffset, &followedXRefStm);
 
       // if there was a problem with the xref table,
       // try to reconstruct it
@@ -318,6 +319,18 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
       }
     }
 
+    // set size according to trailer dict
+    trailerDict.dictLookupNF("Size", &obj);
+    if (obj.isInt() && (resize(obj.getInt()) == obj.getInt())) {
+      obj.free();
+    } else {
+      obj.free();
+      if (!(ok = constructXRef(wasReconstructed))) {
+        errCode = errDamaged;
+        return;
+      }
+    }
+
     // get the root dictionary (catalog) object
     trailerDict.dictLookupNF("Root", &obj);
     if (obj.isRef()) {
@@ -386,7 +399,7 @@ int XRef::resize(int newSize)
 
     for (int i = size; i < newSize; ++i) {
       entries[i].offset = 0xffffffff;
-      entries[i].type = xrefEntryFree;
+      entries[i].type = xrefEntryNone;
       entries[i].obj.initNull ();
       entries[i].updated = false;
       entries[i].gen = 0;
@@ -1269,3 +1282,42 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
   }
 }
 
+XRefEntry *XRef::getEntry(int i)
+{
+  if (entries[i].type == xrefEntryNone) {
+
+      GooVector<Guint> followedPrev;
+      while (prevXRefOffset && entries[i].type == xrefEntryNone) {
+        bool ok = true;
+        for (size_t j = 0; j < followedPrev.size(); j++) {
+          if (followedPrev.at(j) == prevXRefOffset) {
+            ok = false;
+            break;
+          }
+        }
+        if (!ok) {
+          error(-1, "Circular XRef");
+          if (!(ok = constructXRef(NULL))) {
+            errCode = errDamaged;
+          }
+          break;
+        }
+
+        followedPrev.push_back (prevXRefOffset);
+
+        GooVector<Guint> followedXRefStm;
+        if (!readXRef(&prevXRefOffset, &followedXRefStm)) {
+            prevXRefOffset = 0;
+        }
+      }
+
+      if (entries[i].type == xrefEntryNone) {
+         error(-1, "Invalid XRef entry");
+         entries[i].type = xrefEntryFree;
+      }
+  }
+
+  return &entries[i];
+}
+
+
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 260f039..d37e31d 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -47,7 +47,8 @@ class PopplerCache;
 enum XRefEntryType {
   xrefEntryFree,
   xrefEntryUncompressed,
-  xrefEntryCompressed
+  xrefEntryCompressed,
+  xrefEntryNone
 };
 
 struct XRefEntry {
@@ -123,7 +124,7 @@ public:
 
   // Direct access.
   int getSize() { return size; }
-  XRefEntry *getEntry(int i) { return &entries[i]; }
+  XRefEntry *getEntry(int i);
   Object *getTrailerDict() { return &trailerDict; }
 
   // Write access
@@ -157,6 +158,7 @@ private:
   int permFlags;		// permission bits
   Guchar fileKey[16];		// file decryption key
   GBool ownerPasswordOk;	// true if owner password is correct
+  Guint prevXRefOffset;		// position of prev XRef section (= next to read)
 
   void init();
   int reserve(int newSize);
-- 
1.7.0.4


From ca677432392d67d82a034a887fd802db90b42ed2 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 18:26:17 +0100
Subject: [PATCH 06/12] Add Linearization dictionary support

---
 CMakeLists.txt           |    2 +
 poppler/Linearization.cc |  225 ++++++++++++++++++++++++++++++++++++++++++++++
 poppler/Linearization.h  |   45 +++++++++
 poppler/Makefile.am      |    2 +
 poppler/PDFDoc.cc        |   13 +++
 poppler/PDFDoc.h         |    5 +
 6 files changed, 292 insertions(+), 0 deletions(-)
 create mode 100644 poppler/Linearization.cc
 create mode 100644 poppler/Linearization.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 406ba88..7c25c45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -251,6 +251,7 @@ set(poppler_SRCS
   poppler/JBIG2Stream.cc
   poppler/Lexer.cc
   poppler/Link.cc
+  poppler/Linearization.cc
   poppler/LocalPDFDocBuilder.cc
   poppler/NameToCharCode.cc
   poppler/Object.cc
@@ -397,6 +398,7 @@ if(ENABLE_XPDF_HEADERS)
     poppler/JBIG2Stream.h
     poppler/Lexer.h
     poppler/Link.h
+    poppler/Linearization.h
     poppler/LocalPDFDocBuilder.h
     poppler/Movie.h
     poppler/NameToCharCode.h
diff --git a/poppler/Linearization.cc b/poppler/Linearization.cc
new file mode 100644
index 0000000..23c77f2
--- /dev/null
+++ b/poppler/Linearization.cc
@@ -0,0 +1,225 @@
+//========================================================================
+//
+// Linearization.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#include "Linearization.h"
+#include "Parser.h"
+#include "Lexer.h"
+
+//------------------------------------------------------------------------
+// Linearization
+//------------------------------------------------------------------------
+
+Linearization::Linearization (BaseStream *str)
+{
+  Parser *parser;
+  Object obj1, obj2, obj3, obj4, obj5;
+
+  linDict.initNull();
+
+  str->reset();
+  obj1.initNull();
+  parser = new Parser(NULL,
+      new Lexer(NULL, str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
+      gFalse);
+  parser->getObj(&obj1);
+  parser->getObj(&obj2);
+  parser->getObj(&obj3);
+  parser->getObj(&linDict);
+  parser->getObj(&obj4);
+  if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) {
+    linDict.dictLookup("Linearized", &obj5);
+    if (!(obj5.isNum() && obj5.getNum() > 0)) {
+       linDict.free();
+       linDict.initNull();
+    }
+    obj5.free();
+  }
+  obj4.free();
+  obj4.free();
+  obj3.free();
+  obj2.free();
+  obj1.free();
+  delete parser;
+}
+
+Linearization:: ~Linearization()
+{
+  linDict.free();
+}
+
+Guint Linearization::getLength()
+{
+  if (!linDict.isDict()) return 0;
+
+  int length;
+  if (linDict.getDict()->lookupInt("L", NULL, &length) &&
+      length > 0) {
+    return length;
+  } else {
+    error(-1, "Length in linearization table is invalid");
+    return 0;
+  }
+}
+
+Guint Linearization::getHintsOffset()
+{
+  int hintsOffset;
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=2 &&
+      obj1.arrayGet(0, &obj2)->isInt() &&
+      obj2.getInt() > 0) {
+    hintsOffset = obj2.getInt();
+  } else {
+    error(-1, "Hints table offset in linearization table is invalid");
+    hintsOffset = 0;
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsOffset;
+}
+
+Guint Linearization::getHintsLength()
+{
+  int hintsLength;
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=2 &&
+      obj1.arrayGet(1, &obj2)->isInt() &&
+      obj2.getInt() > 0) {
+    hintsLength = obj2.getInt();
+  } else {
+    error(-1, "Hints table length in linearization table is invalid");
+    hintsLength = 0;
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsLength;
+}
+
+Guint Linearization::getHintsOffset2()
+{
+  int hintsOffset2 = 0; // default to 0
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=4) {
+    if (obj1.arrayGet(2, &obj2)->isInt() &&
+        obj2.getInt() > 0) {
+      hintsOffset2 = obj2.getInt();
+    } else {
+      error(-1, "Second hints table offset in linearization table is invalid");
+      hintsOffset2 = 0;
+    }
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsOffset2;
+}
+
+Guint Linearization::getHintsLength2()
+{
+  int hintsLength2 = 0; // default to 0
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=4) {
+    if (obj1.arrayGet(3, &obj2)->isInt() &&
+        obj2.getInt() > 0) {
+      hintsLength2 = obj2.getInt();
+    } else {
+      error(-1, "Second hints table length in linearization table is invalid");
+      hintsLength2 = 0;
+    }
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsLength2;
+}
+
+int Linearization::getObjectNumberFirst()
+{
+  int objectNumberFirst = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("O", NULL, &objectNumberFirst) &&
+      objectNumberFirst > 0) {
+    return objectNumberFirst;
+  } else {
+    error(-1, "Object number of first page in linearization table is invalid");
+    return 0;
+  }
+}
+
+Guint Linearization::getEndFirst()
+{
+  int pageEndFirst = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("E", NULL, &pageEndFirst) &&
+      pageEndFirst > 0) {
+    return pageEndFirst;
+  } else {
+    error(-1, "First page end offset in linearization table is invalid");
+    return 0;
+  }
+}
+
+int Linearization::getNumPages()
+{
+  int numPages = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("N", NULL, &numPages) &&
+      numPages > 0) {
+    return numPages;
+  } else {
+    error(-1, "Page count in linearization table is invalid");
+    return 0;
+  }
+}
+
+Guint Linearization::getMainXRefEntriesOffset()
+{
+  int mainXRefEntriesOffset = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("T", NULL, &mainXRefEntriesOffset) &&
+      mainXRefEntriesOffset > 0) {
+    return mainXRefEntriesOffset;
+  } else {
+    error(-1, "Main Xref offset in linearization table is invalid");
+    return 0;
+  }
+}
+
+int Linearization::getPageFirst()
+{
+  int pageFirst = 0; // Optional, defaults to 0.
+
+  if (linDict.isDict()) {
+    linDict.getDict()->lookupInt("P", NULL, &pageFirst);
+  }
+
+  if (pageFirst < 0) {
+    error(-1, "First page in linearization table is invalid");
+    return 0;
+  }
+
+  return pageFirst;
+}
+
+
diff --git a/poppler/Linearization.h b/poppler/Linearization.h
new file mode 100644
index 0000000..6728a75
--- /dev/null
+++ b/poppler/Linearization.h
@@ -0,0 +1,45 @@
+//========================================================================
+//
+// Linearization.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#ifndef LINEARIZATION_H
+#define LINEARIZATION_H
+
+#include "goo/gtypes.h"
+#include "Object.h"
+class BaseStream;
+
+//------------------------------------------------------------------------
+// Linearization
+//------------------------------------------------------------------------
+
+class Linearization {
+public:
+
+  Linearization(BaseStream *str);
+  ~Linearization();
+
+  Guint getLength();
+  Guint getHintsOffset();
+  Guint getHintsLength();
+  Guint getHintsOffset2();
+  Guint getHintsLength2();
+  int getObjectNumberFirst();
+  Guint getEndFirst();
+  int getNumPages();
+  Guint getMainXRefEntriesOffset();
+  int getPageFirst();
+
+private:
+
+  Object linDict;
+
+};
+
+#endif
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index ccc388f..bb6daa6 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -209,6 +209,7 @@ poppler_include_HEADERS =	\
 	JArithmeticDecoder.h	\
 	JBIG2Stream.h		\
 	Lexer.h			\
+	Linearization.h 	\
 	Link.h			\
 	LocalPDFDocBuilder.h	\
 	Movie.h                 \
@@ -287,6 +288,7 @@ libpoppler_la_SOURCES =		\
 	JArithmeticDecoder.cc	\
 	JBIG2Stream.cc		\
 	Lexer.cc 		\
+	Linearization.cc 	\
 	Link.cc 		\
 	LocalPDFDocBuilder.cc	\
 	Movie.cc                \
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 87334e4..0f1eb42 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -55,6 +55,7 @@
 #include "Catalog.h"
 #include "Stream.h"
 #include "XRef.h"
+#include "Linearization.h"
 #include "Link.h"
 #include "OutputDev.h"
 #include "Error.h"
@@ -86,6 +87,7 @@ void PDFDoc::init()
   file = NULL;
   str = NULL;
   xref = NULL;
+  linearization = NULL;
   catalog = NULL;
 #ifndef DISABLE_OUTLINE
   outline = NULL;
@@ -259,6 +261,9 @@ PDFDoc::~PDFDoc() {
   if (xref) {
     delete xref;
   }
+  if (linearization) {
+    delete linearization;
+  }
   if (str) {
     delete str;
   }
@@ -433,6 +438,14 @@ void PDFDoc::processLinks(OutputDev *out, int page) {
     catalog->getPage(page)->processLinks(out, catalog);
 }
 
+Linearization *PDFDoc::getLinearization()
+{
+  if (!linearization) {
+    linearization = new Linearization(str);
+  }
+  return linearization;
+}
+
 GBool PDFDoc::isLinearized() {
   Parser *parser;
   Object obj1, obj2, obj3, obj4, obj5;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 8fa2dcf..5359ddb 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -49,6 +49,7 @@ class Links;
 class LinkAction;
 class LinkDest;
 class Outline;
+class Linearization;
 
 enum PDFWriteMode {
   writeStandard,
@@ -90,6 +91,9 @@ public:
   // Get file name.
   GooString *getFileName() { return fileName; }
 
+  // Get the linearization table.
+  Linearization *getLinearization();
+
   // Get the xref table.
   XRef *getXRef() { return xref; }
 
@@ -246,6 +250,7 @@ private:
   void *guiData;
   int pdfMajorVersion;
   int pdfMinorVersion;
+  Linearization *linearization;
   XRef *xref;
   Catalog *catalog;
 #ifndef DISABLE_OUTLINE
-- 
1.7.0.4


From 4a8cea714fdd5b17a8a0fe380f5228fa2f4d8387 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 13 Apr 2010 18:51:40 +0200
Subject: [PATCH 07/12] Add getLength() to BaseStream

---
 poppler/Stream.cc |   11 ++++++-----
 poppler/Stream.h  |   11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index 0fb3884..fbf2b33 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -372,8 +372,9 @@ void FileOutStream::printf(const char *format, ...)
 // BaseStream
 //------------------------------------------------------------------------
 
-BaseStream::BaseStream(Object *dictA) {
+BaseStream::BaseStream(Object *dictA, Guint lengthA) {
   dict = *dictA;
+  length = lengthA;
 }
 
 BaseStream::~BaseStream() {
@@ -694,7 +695,7 @@ GBool StreamPredictor::getNextLine() {
 
 FileStream::FileStream(FILE *fA, Guint startA, GBool limitedA,
 		       Guint lengthA, Object *dictA):
-    BaseStream(dictA) {
+    BaseStream(dictA, lengthA) {
   f = fA;
   start = startA;
   limited = limitedA;
@@ -819,7 +820,7 @@ void FileStream::moveStart(int delta) {
 
 CachedFileStream::CachedFileStream(CachedFile *ccA, Guint startA,
         GBool limitedA, Guint lengthA, Object *dictA)
-  : BaseStream(dictA)
+  : BaseStream(dictA, lengthA)
 {
   cc = ccA;
   start = startA;
@@ -917,7 +918,7 @@ void CachedFileStream::moveStart(int delta)
 //------------------------------------------------------------------------
 
 MemStream::MemStream(char *bufA, Guint startA, Guint lengthA, Object *dictA):
-    BaseStream(dictA) {
+    BaseStream(dictA, lengthA) {
   buf = bufA;
   start = startA;
   length = lengthA;
@@ -981,7 +982,7 @@ void MemStream::moveStart(int delta) {
 
 EmbedStream::EmbedStream(Stream *strA, Object *dictA,
 			 GBool limitedA, Guint lengthA):
-    BaseStream(dictA) {
+    BaseStream(dictA, lengthA) {
   str = strA;
   limited = limitedA;
   length = lengthA;
diff --git a/poppler/Stream.h b/poppler/Stream.h
index 583278f..e99f03b 100644
--- a/poppler/Stream.h
+++ b/poppler/Stream.h
@@ -293,7 +293,7 @@ private:
 class BaseStream: public Stream {
 public:
 
-  BaseStream(Object *dictA);
+  BaseStream(Object *dictA, Guint lengthA);
   virtual ~BaseStream();
   virtual Stream *makeSubStream(Guint start, GBool limited,
 				Guint length, Object *dict) = 0;
@@ -303,11 +303,16 @@ public:
   virtual Stream *getUndecodedStream() { return this; }
   virtual Dict *getDict() { return dict.getDict(); }
   virtual GooString *getFileName() { return NULL; }
+  virtual Guint getLength() { return length; }
 
   // Get/set position of first byte of stream within the file.
   virtual Guint getStart() = 0;
   virtual void moveStart(int delta) = 0;
 
+protected:
+
+  Guint length;
+
 private:
 
   Object dict;
@@ -478,7 +483,6 @@ private:
   FILE *f;
   Guint start;
   GBool limited;
-  Guint length;
   char buf[fileStreamBufSize];
   char *bufPtr;
   char *bufEnd;
@@ -523,7 +527,6 @@ private:
   CachedFile *cc;
   Guint start;
   GBool limited;
-  Guint length;
   char buf[cachedStreamBufSize];
   char *bufPtr;
   char *bufEnd;
@@ -567,7 +570,6 @@ private:
 
   char *buf;
   Guint start;
-  Guint length;
   char *bufEnd;
   char *bufPtr;
   GBool needFree;
@@ -607,7 +609,6 @@ private:
 
   Stream *str;
   GBool limited;
-  Guint length;
 };
 
 //------------------------------------------------------------------------
-- 
1.7.0.4


From 0d1f61617fe1edb3947667c7743e5a1acf3f41d3 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 19:16:14 +0100
Subject: [PATCH 08/12] Pass size of file when creating FileStream

---
 poppler/PDFDoc.cc |   19 +++++++++++++++++--
 1 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 0f1eb42..31f9e39 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -47,6 +47,7 @@
 #ifdef _WIN32
 #  include <windows.h>
 #endif
+#include <sys/stat.h>
 #include "goo/gstrtod.h"
 #include "goo/GooString.h"
 #include "poppler-config.h"
@@ -102,12 +103,18 @@ PDFDoc::PDFDoc()
 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 	       GooString *userPassword, void *guiDataA) {
   Object obj;
+  int size = 0;
 
   init();
 
   fileName = fileNameA;
   guiData = guiDataA;
 
+  struct stat buf;
+  if (stat(fileName->getCString(), &buf) == 0) {
+     size = buf.st_size;
+  }
+
   // try to open file
 #ifdef VMS
   file = fopen(fileName->getCString(), "rb", "ctx=stm");
@@ -127,7 +134,7 @@ PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 
   // create stream
   obj.initNull();
-  str = new FileStream(file, 0, gFalse, 0, &obj);
+  str = new FileStream(file, 0, gFalse, size, &obj);
 
   ok = setup(ownerPassword, userPassword);
 }
@@ -158,11 +165,19 @@ PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 
   // try to open file
   // NB: _wfopen is only available in NT
+  struct stat buf;
+  int size;
   version.dwOSVersionInfoSize = sizeof(version);
   GetVersionEx(&version);
   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+    if (_wstat(fileName2, &buf) == 0) {
+      size = buf.st_size;
+    }
     file = _wfopen(fileName2, L"rb");
   } else {
+    if (_wstat(fileName->getCString(), &buf) == 0) {
+      size = buf.st_size;
+    }
     file = fopen(fileName->getCString(), "rb");
   }
   if (!file) {
@@ -173,7 +188,7 @@ PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 
   // create stream
   obj.initNull();
-  str = new FileStream(file, 0, gFalse, 0, &obj);
+  str = new FileStream(file, 0, gFalse, size, &obj);
 
   ok = setup(ownerPassword, userPassword);
 }
-- 
1.7.0.4


From eb5b1914908f63afcc448558571110dec4f78b14 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 19:32:59 +0100
Subject: [PATCH 09/12] Improve linearization check

---
 poppler/PDFDoc.cc |   33 +++++----------------------------
 1 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 31f9e39..160c361 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -462,34 +462,11 @@ Linearization *PDFDoc::getLinearization()
 }
 
 GBool PDFDoc::isLinearized() {
-  Parser *parser;
-  Object obj1, obj2, obj3, obj4, obj5;
-  GBool lin;
-
-  lin = gFalse;
-  obj1.initNull();
-  parser = new Parser(xref,
-	     new Lexer(xref,
-	       str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
-	     gTrue);
-  parser->getObj(&obj1);
-  parser->getObj(&obj2);
-  parser->getObj(&obj3);
-  parser->getObj(&obj4);
-  if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
-      obj4.isDict()) {
-    obj4.dictLookup("Linearized", &obj5);
-    if (obj5.isNum() && obj5.getNum() > 0) {
-      lin = gTrue;
-    }
-    obj5.free();
-  }
-  obj4.free();
-  obj3.free();
-  obj2.free();
-  obj1.free();
-  delete parser;
-  return lin;
+  if ((str->getLength()) &&
+      (getLinearization()->getLength() == str->getLength()))
+    return gTrue;
+  else
+    return gFalse;
 }
 
 static GBool
-- 
1.7.0.4


From e1fff916ad4d1e34b272091794bcfc06f2b58a9b Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 7 Apr 2010 12:05:56 +0200
Subject: [PATCH 10/12] Move getStartXref from XRef to PDFDoc

---
 poppler/PDFDoc.cc |   63 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 poppler/PDFDoc.h  |    5 ++++
 poppler/XRef.cc   |   52 +------------------------------------------
 poppler/XRef.h    |    6 +----
 4 files changed, 67 insertions(+), 59 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 160c361..59caa4b 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -37,6 +37,7 @@
 #pragma implementation
 #endif
 
+#include <ctype.h>
 #include <locale.h>
 #include <stdio.h>
 #include <errno.h>
@@ -76,6 +77,9 @@
 				//   file to look for '%PDF'
 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
 
+#define xrefSearchSize 1024	// read this many bytes at end of file
+				//   to look for 'startxref'
+
 //------------------------------------------------------------------------
 // PDFDoc
 //------------------------------------------------------------------------
@@ -93,6 +97,7 @@ void PDFDoc::init()
 #ifndef DISABLE_OUTLINE
   outline = NULL;
 #endif
+  startXRefPos = ~(Guint)0;
 }
 
 PDFDoc::PDFDoc()
@@ -228,7 +233,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
   GBool wasReconstructed = false;
 
   // read xref table
-  xref = new XRef(str, &wasReconstructed);
+  xref = new XRef(str, getStartXRef(), &wasReconstructed);
   if (!xref->isOk()) {
     error(-1, "Couldn't read xref table");
     errCode = xref->getErrorCode();
@@ -249,7 +254,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
       // try one more time to contruct the Catalog, maybe the problem is damaged XRef 
       delete catalog;
       delete xref;
-      xref = new XRef(str, NULL, true);
+      xref = new XRef(str, 0, NULL, true);
       catalog = new Catalog(xref);
     }
 
@@ -974,7 +979,7 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
   trailerDict->set("Root", &obj1);
 
   if (incrUpdate) { 
-    obj1.initInt(xref->getLastXRefPos());
+    obj1.initInt(getStartXRef());
     trailerDict->set("Prev", &obj1);
   }
   
@@ -1012,3 +1017,55 @@ PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
 
   return doc;
 }
+
+Guint PDFDoc::strToUnsigned(char *s) {
+  Guint x;
+  char *p;
+  int i;
+
+  x = 0;
+  for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
+    x = 10 * x + (*p - '0');
+  }
+  return x;
+}
+
+// Read the 'startxref' position.
+Guint PDFDoc::getStartXRef()
+{
+  if (startXRefPos == ~(Guint)0) {
+
+    {
+      char buf[xrefSearchSize+1];
+      char *p;
+      int c, n, i;
+
+      // read last xrefSearchSize bytes
+      str->setPos(xrefSearchSize, -1);
+      for (n = 0; n < xrefSearchSize; ++n) {
+        if ((c = str->getChar()) == EOF) {
+          break;
+        }
+        buf[n] = c;
+      }
+      buf[n] = '\0';
+
+      // find startxref
+      for (i = n - 9; i >= 0; --i) {
+        if (!strncmp(&buf[i], "startxref", 9)) {
+          break;
+        }
+      }
+      if (i < 0) {
+        startXRefPos = 0;
+      }
+      for (p = &buf[i+9]; isspace(*p); ++p) ;
+      startXRefPos =  strToUnsigned(p);
+    }
+
+  }
+
+  return startXRefPos;
+}
+
+
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 5359ddb..457d41b 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -243,6 +243,9 @@ private:
   GBool checkFooter();
   void checkHeader();
   GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
+  // Get the offset of the start xref table.
+  Guint getStartXRef();
+  Guint strToUnsigned(char *s);
 
   GooString *fileName;
   FILE *file;
@@ -262,6 +265,8 @@ private:
   //If there is an error opening the PDF file with fopen() in the constructor, 
   //then the POSIX errno will be here.
   int fopenErrno;
+
+  Guint startXRefPos;		// offset of last xref table
 };
 
 #endif
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index ec7afa3..df23539 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -48,11 +48,6 @@
 #include "PopplerCache.h"
 
 //------------------------------------------------------------------------
-
-#define xrefSearchSize 1024	// read this many bytes at end of file
-				//   to look for 'startxref'
-
-//------------------------------------------------------------------------
 // Permission bits
 // Note that the PDF spec uses 1 base (eg bit 3 is 1<<2)
 //------------------------------------------------------------------------
@@ -273,8 +268,7 @@ XRef::XRef() {
   init();
 }
 
-XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
-  Guint pos;
+XRef::XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed, GBool reconstruct) {
   Object obj;
 
   init();
@@ -286,7 +280,6 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   // read the trailer
   str = strA;
   start = str->getStart();
-  pos = getStartXref();
   prevXRefOffset = pos;
 
   if (reconstruct && !(ok = constructXRef(wasReconstructed)))
@@ -415,37 +408,6 @@ int XRef::resize(int newSize)
   return size;
 }
 
-// Read the 'startxref' position.
-Guint XRef::getStartXref() {
-  char buf[xrefSearchSize+1];
-  char *p;
-  int c, n, i;
-
-  // read last xrefSearchSize bytes
-  str->setPos(xrefSearchSize, -1);
-  for (n = 0; n < xrefSearchSize; ++n) {
-    if ((c = str->getChar()) == EOF) {
-      break;
-    }
-    buf[n] = c;
-  }
-  buf[n] = '\0';
-
-  // find startxref
-  for (i = n - 9; i >= 0; --i) {
-    if (!strncmp(&buf[i], "startxref", 9)) {
-      break;
-    }
-  }
-  if (i < 0) {
-    return 0;
-  }
-  for (p = &buf[i+9]; isspace(*p); ++p) ;
-  lastXRefPos = strToUnsigned(p);
-
-  return lastXRefPos;
-}
-
 // Read one xref table section.  Also reads the associated trailer
 // dictionary, and returns the prev pointer (if any).
 GBool XRef::readXRef(Guint *pos, GooVector<Guint> *followedXRefStm) {
@@ -1156,18 +1118,6 @@ int XRef::getNumEntry(Guint offset)
   else return -1;
 }
 
-Guint XRef::strToUnsigned(char *s) {
-  Guint x;
-  char *p;
-  int i;
-
-  x = 0;
-  for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
-    x = 10 * x + (*p - '0');
-  }
-  return x;
-}
-
 void XRef::add(int num, int gen, Guint offs, GBool used) {
   if (num >= size) {
     if (num >= capacity) {
diff --git a/poppler/XRef.h b/poppler/XRef.h
index d37e31d..75b065a 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -65,7 +65,7 @@ public:
   // Constructor, create an empty XRef, used for PDF writing
   XRef();
   // Constructor.  Read xref table from stream.
-  XRef(BaseStream *strA, GBool *wasReconstructed = NULL, GBool reconstruct = false);
+  XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed = NULL, GBool reconstruct = false);
 
   // Destructor.
   ~XRef();
@@ -108,9 +108,6 @@ public:
   // Return the number of objects in the xref table.
   int getNumObjects() { return size; }
 
-  // Return the offset of the last xref table.
-  Guint getLastXRefPos() { return lastXRefPos; }
-
   // Return the catalog object reference.
   int getRootNum() { return rootNum; }
   int getRootGen() { return rootGen; }
@@ -145,7 +142,6 @@ private:
   GBool ok;			// true if xref table is valid
   int errCode;			// error code (if <ok> is false)
   Object trailerDict;		// trailer dictionary
-  Guint lastXRefPos;		// offset of last xref table
   Guint *streamEnds;		// 'endstream' positions - only used in
 				//   damaged files
   int streamEndsLen;		// number of valid entries in streamEnds
-- 
1.7.0.4


From 4a36767ac3041c04ac108729fc9fc643bfa16c68 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 7 Apr 2010 12:35:05 +0200
Subject: [PATCH 11/12] Use XRef table at start of linearized document

---
 poppler/PDFDoc.cc |   27 ++++++++++++++++++++++++++-
 1 files changed, 26 insertions(+), 1 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 59caa4b..027e159 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -77,6 +77,10 @@
 				//   file to look for '%PDF'
 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
 
+#define linearizationSearchSize 1024	// read this many bytes at beginning of
+					// file to look for linearization
+					// dictionary
+
 #define xrefSearchSize 1024	// read this many bytes at end of file
 				//   to look for 'startxref'
 
@@ -1035,7 +1039,28 @@ Guint PDFDoc::getStartXRef()
 {
   if (startXRefPos == ~(Guint)0) {
 
-    {
+    if (isLinearized()) {
+      char buf[linearizationSearchSize+1];
+      int c, n, i;
+
+      str->setPos(0);
+      for (n = 0; n < linearizationSearchSize; ++n) {
+        if ((c = str->getChar()) == EOF) {
+          break;
+        }
+        buf[n] = c;
+      }
+      buf[n] = '\0';
+
+      // find end of first obj
+      startXRefPos = 0;
+      for (i = 0; i < n; i++) {
+        if (!strncmp("endobj", &buf[i], 6)) {
+           startXRefPos = i+6;
+           break;
+        }
+      }
+    } else {
       char buf[xrefSearchSize+1];
       char *p;
       int c, n, i;
-- 
1.7.0.4


From d7e72c666f0f9430bf272a724853d03f4bec90ee Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sun, 25 Apr 2010 17:34:49 +0200
Subject: [PATCH 12/12] Use linearization data to parse XRef entries

---
 poppler/PDFDoc.cc |   14 ++++++++++++--
 poppler/PDFDoc.h  |    3 +++
 poppler/XRef.cc   |   43 ++++++++++++++++++++++++++++++++++++++++++-
 poppler/XRef.h    |    6 +++++-
 4 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 027e159..138ae7a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -237,7 +237,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
   GBool wasReconstructed = false;
 
   // read xref table
-  xref = new XRef(str, getStartXRef(), &wasReconstructed);
+  xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
   if (!xref->isOk()) {
     error(-1, "Couldn't read xref table");
     errCode = xref->getErrorCode();
@@ -258,7 +258,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
       // try one more time to contruct the Catalog, maybe the problem is damaged XRef 
       delete catalog;
       delete xref;
-      xref = new XRef(str, 0, NULL, true);
+      xref = new XRef(str, 0, 0, NULL, true);
       catalog = new Catalog(xref);
     }
 
@@ -1093,4 +1093,14 @@ Guint PDFDoc::getStartXRef()
   return startXRefPos;
 }
 
+Guint PDFDoc::getMainXRefEntriesOffset()
+{
+  Guint mainXRefEntriesOffset = 0;
+
+  if (isLinearized()) {
+    mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
+  }
+
+  return mainXRefEntriesOffset;
+}
 
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 457d41b..21f1864 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -245,6 +245,9 @@ private:
   GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
   // Get the offset of the start xref table.
   Guint getStartXRef();
+  // Get the offset of the entries in the main XRef table of a
+  // linearized document (0 for non linearized documents).
+  Guint getMainXRefEntriesOffset();
   Guint strToUnsigned(char *s);
 
   GooString *fileName;
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index df23539..d9591e7 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -262,16 +262,19 @@ void XRef::init() {
   streamEnds = NULL;
   streamEndsLen = 0;
   objStrs = new PopplerCache(5);
+  mainXRefEntriesOffset = 0;
+  xRefStream = gFalse;
 }
 
 XRef::XRef() {
   init();
 }
 
-XRef::XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed, GBool reconstruct) {
+XRef::XRef(BaseStream *strA, Guint pos, Guint mainXRefEntriesOffsetA, GBool *wasReconstructed, GBool reconstruct) {
   Object obj;
 
   init();
+  mainXRefEntriesOffset = mainXRefEntriesOffsetA;
 
   encrypted = gFalse;
   permFlags = defPermFlags;
@@ -442,6 +445,9 @@ GBool XRef::readXRef(Guint *pos, GooVector<Guint> *followedXRefStm) {
     if (!parser->getObj(&obj)->isStream()) {
       goto err1;
     }
+    if (trailerDict.isNone()) {
+      xRefStream = gTrue;
+    }
     more = readXRefStream(obj.getStream(), pos);
     obj.free();
 
@@ -1232,10 +1238,44 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
   }
 }
 
+GBool XRef::parseEntry(Guint offset, XRefEntry *entry)
+{
+  GBool r;
+
+  Object obj;
+  obj.initNull();
+  Parser parser = Parser(NULL, new Lexer(NULL,
+     str->makeSubStream(offset, gFalse, 20, &obj)), gTrue);
+
+  Object obj1, obj2, obj3;
+  if ((parser.getObj(&obj1)->isInt()) &&
+      (parser.getObj(&obj2)->isInt()) &&
+      (parser.getObj(&obj3)->isCmd("n") || obj3.isCmd("f"))) {
+    entry->offset = (Guint) obj1.getInt();
+    entry->gen = obj2.getInt();
+    entry->type = obj3.isCmd("n") ? xrefEntryUncompressed : xrefEntryFree;
+    entry->obj.initNull ();
+    entry->updated = false;
+    r = gTrue;
+  } else {
+    r = gFalse;
+  }
+  obj1.free();
+  obj2.free();
+  obj3.free();
+
+  return r;
+}
+
 XRefEntry *XRef::getEntry(int i)
 {
   if (entries[i].type == xrefEntryNone) {
 
+    if ((!xRefStream) && mainXRefEntriesOffset) {
+      if (!parseEntry(mainXRefEntriesOffset + 20*i, &entries[i])) {
+        error(-1, "Failed to parse XRef entry [%d].", i);
+      }
+    } else {
       GooVector<Guint> followedPrev;
       while (prevXRefOffset && entries[i].type == xrefEntryNone) {
         bool ok = true;
@@ -1265,6 +1305,7 @@ XRefEntry *XRef::getEntry(int i)
          error(-1, "Invalid XRef entry");
          entries[i].type = xrefEntryFree;
       }
+    }
   }
 
   return &entries[i];
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 75b065a..2537757 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -65,7 +65,7 @@ public:
   // Constructor, create an empty XRef, used for PDF writing
   XRef();
   // Constructor.  Read xref table from stream.
-  XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed = NULL, GBool reconstruct = false);
+  XRef(BaseStream *strA, Guint pos, Guint mainXRefEntriesOffsetA = 0, GBool *wasReconstructed = NULL, GBool reconstruct = false);
 
   // Destructor.
   ~XRef();
@@ -155,6 +155,8 @@ private:
   Guchar fileKey[16];		// file decryption key
   GBool ownerPasswordOk;	// true if owner password is correct
   Guint prevXRefOffset;		// position of prev XRef section (= next to read)
+  Guint mainXRefEntriesOffset;	// offset of entries in main XRef table
+  GBool xRefStream;		// true if last XRef section is a stream
 
   void init();
   int reserve(int newSize);
@@ -166,6 +168,8 @@ private:
   GBool readXRefStream(Stream *xrefStr, Guint *pos);
   GBool constructXRef(GBool *wasReconstructed);
   Guint strToUnsigned(char *s);
+  GBool parseEntry(Guint offset, XRefEntry *entry);
+
 };
 
 #endif
-- 
1.7.0.4
-------------- next part --------------
From 6368e51e92ed0ce75184e7c4ac43488d149dcc72 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:03:54 +0200
Subject: [PATCH 01/17] add PDFDoc::getPage()

---
 poppler/PDFDoc.cc |    8 ++++++++
 poppler/PDFDoc.h  |    3 +++
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 138ae7a..f2dfbe6 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1104,3 +1104,11 @@ Guint PDFDoc::getMainXRefEntriesOffset()
   return mainXRefEntriesOffset;
 }
 
+Page *PDFDoc::getPage(int page)
+{
+  if ((page < 1) || page > getNumPages()) return NULL;
+
+  {
+    return catalog->getPage(page);
+  }
+}
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 21f1864..13b3c2f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -128,6 +128,9 @@ public:
   // Return the structure tree root object.
   Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
 
+  // Get page.
+  Page *getPage(int page);
+
   // Display a page.
   void displayPage(OutputDev *out, int page,
 		   double hDPI, double vDPI, int rotate,
-- 
1.7.0.4


From fb3aa271435e25e663b4666217abf5556d92fb89 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:36:08 +0200
Subject: [PATCH 02/17] Use PDFDoc::getPage() in PDFDoc

---
 poppler/PDFDoc.cc |   24 ++++++++++++++++--------
 poppler/PDFDoc.h  |   10 +++++-----
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index f2dfbe6..45b47d2 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -413,11 +413,13 @@ void PDFDoc::displayPage(OutputDev *out, int page,
   if (globalParams->getPrintCommands()) {
     printf("***** page %d *****\n", page);
   }
-  if (catalog->getPage(page))
-    catalog->getPage(page)->display(out, hDPI, vDPI,
+
+  if (getPage(page))
+    getPage(page)->display(out, hDPI, vDPI,
 				    rotate, useMediaBox, crop, printing, catalog,
 				    abortCheckCbk, abortCheckCbkData,
 				    annotDisplayDecideCbk, annotDisplayDecideCbkData);
+
 }
 
 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
@@ -444,8 +446,8 @@ void PDFDoc::displayPageSlice(OutputDev *out, int page,
 			      void *abortCheckCbkData,
                               GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
                               void *annotDisplayDecideCbkData) {
-  if (catalog->getPage(page))
-    catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
+  if (getPage(page))
+    getPage(page)->displaySlice(out, hDPI, vDPI,
 					 rotate, useMediaBox, crop,
 					 sliceX, sliceY, sliceW, sliceH,
 					 printing, catalog,
@@ -454,12 +456,18 @@ void PDFDoc::displayPageSlice(OutputDev *out, int page,
 }
 
 Links *PDFDoc::getLinks(int page) {
-  return catalog->getPage(page) ? catalog->getPage(page)->getLinks(catalog) : NULL;
+  Page *p = getPage(page);
+  if (!p) {
+    Object obj;
+    obj.initNull();
+    return new Links (&obj, NULL);
+  }
+  return p->getLinks(catalog);
 }
-  
+
 void PDFDoc::processLinks(OutputDev *out, int page) {
-  if (catalog->getPage(page))
-    catalog->getPage(page)->processLinks(out, catalog);
+  if (getPage(page))
+    getPage(page)->processLinks(out, catalog);
 }
 
 Linearization *PDFDoc::getLinearization()
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 13b3c2f..ed0828c 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -108,15 +108,15 @@ public:
 
   // Get page parameters.
   double getPageMediaWidth(int page)
-    { return catalog->getPage(page)->getMediaWidth(); }
+    { return getPage(page) ? getPage(page)->getMediaWidth() : 0.0 ; }
   double getPageMediaHeight(int page)
-    { return catalog->getPage(page)->getMediaHeight(); }
+    { return getPage(page) ? getPage(page)->getMediaHeight() : 0.0 ; }
   double getPageCropWidth(int page)
-    { return catalog->getPage(page)->getCropWidth(); }
+    { return getPage(page) ? getPage(page)->getCropWidth() : 0.0 ; }
   double getPageCropHeight(int page)
-    { return catalog->getPage(page)->getCropHeight(); }
+    { return getPage(page) ? getPage(page)->getCropHeight() : 0.0 ; }
   int getPageRotate(int page)
-    { return catalog->getPage(page)->getRotate(); }
+    { return getPage(page) ? getPage(page)->getRotate() : 0 ; }
 
   // Get number of pages.
   int getNumPages() { return catalog->getNumPages(); }
-- 
1.7.0.4


From aee2ffb814935bfeb5e834bc56d69ed13e74ce97 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 20:48:30 +0200
Subject: [PATCH 03/17] Use PDFDoc::getPage() in FontInfo

---
 poppler/FontInfo.cc |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/poppler/FontInfo.cc b/poppler/FontInfo.cc
index 0037e07..c348d14 100644
--- a/poppler/FontInfo.cc
+++ b/poppler/FontInfo.cc
@@ -70,7 +70,9 @@ GooList *FontInfoScanner::scan(int nPages) {
   }
 
   for (int pg = currentPage; pg < lastPage; ++pg) {
-    page = doc->getCatalog()->getPage(pg);
+    page = doc->getPage(pg);
+    if (!page) continue;
+
     if ((resDict = page->getResourceDict())) {
       scanFonts(resDict, result);
     }
-- 
1.7.0.4


From b85838af37ce617c19645e15d9cbb976caded207 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 11:11:11 +0200
Subject: [PATCH 04/17] Use PDFDoc::getPage() in pdfinfo

---
 utils/pdfinfo.cc |   22 +++++++++++++++-------
 1 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index 2abe8b4..a94e4e8 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -257,7 +257,11 @@ int main(int argc, char *argv[]) {
   if (printBoxes) {
     if (multiPage) {
       for (pg = firstPage; pg <= lastPage; ++pg) {
-	page = doc->getCatalog()->getPage(pg);
+	page = doc->getPage(pg);
+	if (!page) {
+          error(-1, "Failed to print boxes for page %d", pg);
+	  continue;
+	}
 	sprintf(buf, "Page %4d MediaBox: ", pg);
 	printBox(buf, page->getMediaBox());
 	sprintf(buf, "Page %4d CropBox:  ", pg);
@@ -270,12 +274,16 @@ int main(int argc, char *argv[]) {
 	printBox(buf, page->getArtBox());
       }
     } else {
-      page = doc->getCatalog()->getPage(firstPage);
-      printBox("MediaBox:       ", page->getMediaBox());
-      printBox("CropBox:        ", page->getCropBox());
-      printBox("BleedBox:       ", page->getBleedBox());
-      printBox("TrimBox:        ", page->getTrimBox());
-      printBox("ArtBox:         ", page->getArtBox());
+      page = doc->getPage(firstPage);
+      if (!page) {
+        error(-1, "Failed to print boxes for page %d", firstPage);
+      } else {
+        printBox("MediaBox:       ", page->getMediaBox());
+        printBox("CropBox:        ", page->getCropBox());
+        printBox("BleedBox:       ", page->getBleedBox());
+        printBox("TrimBox:        ", page->getTrimBox());
+        printBox("ArtBox:         ", page->getArtBox());
+      }
     }
   }
 
-- 
1.7.0.4


From 6c5f96e4bdb6cd02ca6d822d49c0c03bd88152ce Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 11:19:53 +0200
Subject: [PATCH 05/17] Use PDFDoc::getPage() in pdffonts

---
 utils/pdffonts.cc |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/utils/pdffonts.cc b/utils/pdffonts.cc
index 81b20e4..30e25dc 100644
--- a/utils/pdffonts.cc
+++ b/utils/pdffonts.cc
@@ -166,7 +166,11 @@ int main(int argc, char *argv[]) {
   fonts = NULL;
   fontsLen = fontsSize = 0;
   for (pg = firstPage; pg <= lastPage; ++pg) {
-    page = doc->getCatalog()->getPage(pg);
+    page = doc->getPage(pg);
+    if (!page) {
+      error(-1, "Failed to read fonts from page %d", pg);
+      continue;
+    }
     if ((resDict = page->getResourceDict())) {
       scanFonts(resDict, doc);
     }
-- 
1.7.0.4


From b6c3f58b3a5810aceb598f7e5a4618cf012264a1 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 15:52:20 +0200
Subject: [PATCH 06/17] Use PDFDoc::getPage() in glib

---
 glib/poppler-action.cc   |    4 ++--
 glib/poppler-document.cc |   17 ++++++++++-------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/glib/poppler-action.cc b/glib/poppler-action.cc
index bcb2d36..ca88ca4 100644
--- a/glib/poppler-action.cc
+++ b/glib/poppler-action.cc
@@ -425,13 +425,13 @@ find_annot_movie_for_action (PopplerDocument *document,
 
     xref->fetch (ref->num, ref->gen, &annotObj);
   } else if (link->hasAnnotTitle ()) {
-    Catalog *catalog = document->doc->getCatalog ();
     Object annots;
     GooString *title = link->getAnnotTitle ();
     int i;
 
     for (i = 1; i <= document->doc->getNumPages (); ++i) {
-      Page *p = catalog->getPage (i);
+      Page *p = document->doc->getPage (i);
+      if (!p) continue;
 
       if (p->getAnnots (&annots)->isArray ()) {
         int j;
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 873fddb..d4301fe 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -442,15 +442,14 @@ PopplerPage *
 poppler_document_get_page (PopplerDocument  *document,
 			   int               index)
 {
-  Catalog *catalog;
   Page *page;
 
   g_return_val_if_fail (0 <= index &&
 			index < poppler_document_get_n_pages (document),
 			NULL);
 
-  catalog = document->doc->getCatalog();
-  page = catalog->getPage (index + 1);
+  page = document->doc->getPage (index + 1);
+  if (!page) return NULL;
 
   return _poppler_page_new (document, page, index);
 }
@@ -2489,18 +2488,22 @@ PopplerFormField *
 poppler_document_get_form_field (PopplerDocument *document,
 				 gint             id)
 {
-  Catalog *catalog = document->doc->getCatalog();
+  Page *page;
   unsigned pageNum;
   unsigned fieldNum;
   FormPageWidgets *widgets;
   FormWidget *field;
 
   FormWidget::decodeID (id, &pageNum, &fieldNum);
-  
-  widgets = catalog->getPage (pageNum)->getPageWidgets ();
+
+  page = document->doc->getPage (pageNum);
+  if (!page)
+    return NULL;
+
+  widgets = page->getPageWidgets ();
   if (!widgets)
     return NULL;
-  
+
   field = widgets->getWidget (fieldNum);
   if (field)
     return _poppler_form_field_new (document, field);
-- 
1.7.0.4


From 72dccfc2d1d395000b79eb63ad3e58a240ab65a9 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 17:59:01 +0200
Subject: [PATCH 07/17] Use PDFDoc::getPage() in qt4

Note API change: With this patch, Document::Page(int index) can now return NULL
when poppler fails to create a page. Any application using these bindings
should check the return value.
---
 qt4/src/poppler-document.cc |    8 +++++++-
 qt4/src/poppler-link.cc     |    6 ++++--
 qt4/src/poppler-page.cc     |    3 ++-
 qt4/src/poppler-qt4.h       |    3 +++
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/qt4/src/poppler-document.cc b/qt4/src/poppler-document.cc
index 41d35b6..dc0ce97 100644
--- a/qt4/src/poppler-document.cc
+++ b/qt4/src/poppler-document.cc
@@ -98,7 +98,13 @@ namespace Poppler {
 
     Page *Document::page(int index) const
     {
-	return new Page(m_doc, index);
+	Page *page = new Page(m_doc, index);
+	if (!page->isOk()) {
+	  delete page;
+	  return NULL;
+	}
+
+	return page;
     }
 
     bool Document::isLocked() const
diff --git a/qt4/src/poppler-link.cc b/qt4/src/poppler-link.cc
index de06242..4f54201 100644
--- a/qt4/src/poppler-link.cc
+++ b/qt4/src/poppler-link.cc
@@ -232,9 +232,11 @@ class LinkMoviePrivate : public LinkPrivate
 		
 		int leftAux = 0, topAux = 0, rightAux = 0, bottomAux = 0;
 		
-		if (d->pageNum > 0 && d->pageNum <= data.doc->doc->getNumPages())
+		::Page *page;
+		if (d->pageNum > 0 &&
+		    d->pageNum <= data.doc->doc->getNumPages() &&
+		    (page = data.doc->doc->getPage( d->pageNum )))
 		{
-			::Page *page = data.doc->doc->getCatalog()->getPage( d->pageNum );
 			cvtUserToDev( page, left, top, &leftAux, &topAux );
 			cvtUserToDev( page, right, bottom, &rightAux, &bottomAux );
 			
diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc
index 293d09b..e408877 100644
--- a/qt4/src/poppler-page.cc
+++ b/qt4/src/poppler-page.cc
@@ -190,8 +190,9 @@ Page::Page(DocumentData *doc, int index) {
   m_page = new PageData();
   m_page->index = index;
   m_page->parentDoc = doc;
-  m_page->page = doc->doc->getCatalog()->getPage(m_page->index + 1);
+  m_page->page = doc->doc->getPage(m_page->index + 1);
   m_page->transition = 0;
+  ok = m_page->page ? true : false;
 }
 
 Page::~Page()
diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h
index 5ddaaf8..a3bc366 100644
--- a/qt4/src/poppler-qt4.h
+++ b/qt4/src/poppler-qt4.h
@@ -609,11 +609,14 @@ delete it;
 	**/
 	QString label() const;
 	
+	bool isOk() { return ok; };
+
     private:
 	Q_DISABLE_COPY(Page)
 
 	Page(DocumentData *doc, int index);
 	PageData *m_page;
+        bool ok;
     };
 
 /**
-- 
1.7.0.4


From 48d7c7b4caba98998d839830eb15968610404096 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Fri, 23 Apr 2010 09:21:23 +0200
Subject: [PATCH 08/17] Use PDFDoc::getPage() in qt

Note API change: With this patch, Document::getPage(int index) can now
return NULL when poppler fails to create a page. Any application using
these bindings should check the return value.
---
 qt/poppler-document.cc |   11 +++++++++++
 qt/poppler-page.cc     |   11 +++++++----
 qt/poppler-qt.h        |    6 +++++-
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/qt/poppler-document.cc b/qt/poppler-document.cc
index bade1d1..1a5892b 100644
--- a/qt/poppler-document.cc
+++ b/qt/poppler-document.cc
@@ -113,6 +113,17 @@ int Document::getNumPages() const
   return data->doc.getNumPages();
 }
 
+Page *Document::getPage(int index) const
+{
+  Page *p = new Page(this, index);
+  if (!p->isOk()) {
+    delete p;
+    return NULL;
+  }
+
+  return p;
+}
+
 QValueList<FontInfo> Document::fonts() const
 {
   QValueList<FontInfo> ourList;
diff --git a/qt/poppler-page.cc b/qt/poppler-page.cc
index a42aa15..ef077a7 100644
--- a/qt/poppler-page.cc
+++ b/qt/poppler-page.cc
@@ -47,6 +47,7 @@ class PageData {
   const Document *doc;
   int index;
   PageTransition *transition;
+  ::Page *page;
 };
 
 Page::Page(const Document *doc, int index) {
@@ -54,6 +55,8 @@ Page::Page(const Document *doc, int index) {
   data->index = index;
   data->doc = doc;
   data->transition = 0;
+  data->page = doc->data->doc.getPage(data->index + 1);
+  ok = data->page ? true : false;
 }
 
 Page::~Page()
@@ -132,7 +135,7 @@ QString Page::getText(const Rectangle &r) const
   output_dev = new TextOutputDev(0, gFalse, gFalse, gFalse);
   data->doc->data->doc.displayPageSlice(output_dev, data->index + 1, 72, 72,
       0, false, false, false, -1, -1, -1, -1);
-  p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+  p = data->page;
   if (r.isNull())
   {
     rect = p->getCropBox();
@@ -197,7 +200,7 @@ PageTransition *Page::getTransition() const
   {
     Object o;
     PageTransitionParams params;
-    params.dictObj = data->doc->data->doc.getCatalog()->getPage(data->index + 1)->getTrans(&o);
+    params.dictObj = data->page->getTrans(&o);
     data->transition = new PageTransition(params);
     o.free();
   }
@@ -208,7 +211,7 @@ QSize Page::pageSize() const
 {
   ::Page *p;
 
-  p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+  p = data->page;
   if ( ( Page::Landscape == orientation() ) || (Page::Seascape == orientation() ) ) {
     return QSize( (int)p->getCropHeight(), (int)p->getCropWidth() );
   } else {
@@ -218,7 +221,7 @@ QSize Page::pageSize() const
 
 Page::Orientation Page::orientation() const
 {
-  ::Page *p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+  ::Page *p = data->page;
 
   int rotation = p->getRotate();
   switch (rotation) {
diff --git a/qt/poppler-qt.h b/qt/poppler-qt.h
index a6b1e6e..549ffd2 100644
--- a/qt/poppler-qt.h
+++ b/qt/poppler-qt.h
@@ -31,6 +31,7 @@
 #include <qdom.h>
 #include <qpixmap.h>
 
+
 namespace Poppler {
 
 class Document;
@@ -198,9 +199,12 @@ class Page {
     */
     QValueList<Link*> links() const;
 
+    bool isOk() { return ok; };
+
   private:
     Page(const Document *doc, int index);
     PageData *data;
+    bool ok;
 };
 
 class DocumentData;
@@ -219,7 +223,7 @@ public:
   
   static Document *load(const QString & filePath);
   
-  Page *getPage(int index) const{ return new Page(this, index); }
+  Page *getPage(int index) const;
   
   int getNumPages() const;
   
-- 
1.7.0.4


From 8fb153a620d1434c97f9e034fad7e8918bad9308 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Fri, 23 Apr 2010 12:07:39 +0200
Subject: [PATCH 09/17] Use PDFDoc::getPage() in PSOutputDev

---
 glib/poppler-page.cc            |    1 +
 poppler/PSOutputDev.cc          |   37 ++++++++++++++++++++++---------------
 poppler/PSOutputDev.h           |   13 ++++++++-----
 qt/poppler-document.cc          |    2 +-
 qt4/src/poppler-ps-converter.cc |    1 +
 utils/pdftohtml.cc              |    2 +-
 utils/pdftops.cc                |    2 +-
 7 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 0e5de5c..16c1485 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -1220,6 +1220,7 @@ poppler_page_render_to_ps (PopplerPage   *page,
 
   if (!ps_file->out)
     ps_file->out = new PSOutputDev (ps_file->filename,
+                                    ps_file->document->doc,
                                     ps_file->document->doc->getXRef(),
                                     ps_file->document->doc->getCatalog(),
                                     NULL,
diff --git a/poppler/PSOutputDev.cc b/poppler/PSOutputDev.cc
index 179a494..5e5d3d0 100644
--- a/poppler/PSOutputDev.cc
+++ b/poppler/PSOutputDev.cc
@@ -70,6 +70,7 @@
 #  include "SplashOutputDev.h"
 #endif
 #include "PSOutputDev.h"
+#include "PDFDoc.h"
 
 #ifdef MACOS
 // needed for setting type/creator of MacOS files
@@ -972,7 +973,7 @@ static void outputToFile(void *stream, char *data, int len) {
   fwrite(data, 1, len, (FILE *)stream);
 }
 
-PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
+PSOutputDev::PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 			 char *psTitle,
 			 int firstPage, int lastPage, PSOutMode modeA,
 			 int paperWidthA, int paperHeightA, GBool duplexA,
@@ -1033,13 +1034,14 @@ PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
   }
 
   init(outputToFile, f, fileTypeA, psTitle,
-       xrefA, catalog, firstPage, lastPage, modeA,
+       doc, xrefA, catalog, firstPage, lastPage, modeA,
        imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
        paperWidthA, paperHeightA, duplexA);
 }
 
 PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
 			 char *psTitle,
+			 PDFDoc *doc,
 			 XRef *xrefA, Catalog *catalog,
 			 int firstPage, int lastPage, PSOutMode modeA,
 			 int paperWidthA, int paperHeightA, GBool duplexA,
@@ -1068,18 +1070,17 @@ PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
   forceRasterize = forceRasterizeA;
 
   init(outputFuncA, outputStreamA, psGeneric, psTitle,
-       xrefA, catalog, firstPage, lastPage, modeA,
+       doc, xrefA, catalog, firstPage, lastPage, modeA,
        imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
        paperWidthA, paperHeightA, duplexA);
 }
 
 void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
-		       PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
+		       PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 		       int firstPage, int lastPage, PSOutMode modeA,
 		       int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
 		       GBool manualCtrlA, int paperWidthA, int paperHeightA,
 		       GBool duplexA) {
-  Page *page;
   PDFRectangle *box;
 
   // initialize
@@ -1099,12 +1100,12 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
   imgURX = imgURXA;
   imgURY = imgURYA;
   if (paperWidth < 0 || paperHeight < 0) {
-    // this check is needed in case the document has zero pages
-    if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
-      page = catalog->getPage(firstPage);
+    Page *page;
+    if ((page = doc->getPage(firstPage))) {
       paperWidth = (int)ceil(page->getMediaWidth());
       paperHeight = (int)ceil(page->getMediaHeight());
     } else {
+      error(-1, "Invalid page %d", firstPage);
       paperWidth = 1;
       paperHeight = 1;
     }
@@ -1170,14 +1171,16 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
   embFontList = new GooString();
 
   if (!manualCtrl) {
+    Page *page;
     // this check is needed in case the document has zero pages
-    if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
+    if ((page = doc->getPage(firstPage))) {
       writeHeader(firstPage, lastPage,
-		  catalog->getPage(firstPage)->getMediaBox(),
-		  catalog->getPage(firstPage)->getCropBox(),
-		  catalog->getPage(firstPage)->getRotate(),
+		  page->getMediaBox(),
+		  page->getCropBox(),
+		  page->getRotate(),
 		  pstitle);
     } else {
+      error(-1, "Invalid page %d", firstPage);
       box = new PDFRectangle(0, 0, 1, 1);
       writeHeader(firstPage, lastPage, box, box, 0, pstitle);
       delete box;
@@ -1190,7 +1193,7 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
       writePS("%%EndProlog\n");
       writePS("%%BeginSetup\n");
     }
-    writeDocSetup(catalog, firstPage, lastPage, duplexA);
+    writeDocSetup(doc, catalog, firstPage, lastPage, duplexA);
     if (mode != psModeForm) {
       writePS("%%EndSetup\n");
     }
@@ -1400,7 +1403,7 @@ void PSOutputDev::writeXpdfProcset() {
   }
 }
 
-void PSOutputDev::writeDocSetup(Catalog *catalog,
+void PSOutputDev::writeDocSetup(PDFDoc *doc, Catalog *catalog,
 				int firstPage, int lastPage,
                                 GBool duplexA) {
   Page *page;
@@ -1416,7 +1419,11 @@ void PSOutputDev::writeDocSetup(Catalog *catalog,
     writePS("xpdf begin\n");
   }
   for (pg = firstPage; pg <= lastPage; ++pg) {
-    page = catalog->getPage(pg);
+    page = doc->getPage(pg);
+    if (!page) {
+      error(-1, "Failed writing resources for page %d", pg);
+      continue;
+    }
     if ((resDict = page->getResourceDict())) {
       setupResources(resDict);
     }
diff --git a/poppler/PSOutputDev.h b/poppler/PSOutputDev.h
index 38c838c..a84a638 100644
--- a/poppler/PSOutputDev.h
+++ b/poppler/PSOutputDev.h
@@ -50,6 +50,7 @@ struct PSFont8Info;
 struct PSFont16Enc;
 class PSOutCustomColor;
 class Function;
+class PDFDoc;
 
 //------------------------------------------------------------------------
 // PSOutputDev
@@ -75,7 +76,7 @@ class PSOutputDev: public OutputDev {
 public:
 
   // Open a PostScript output file, and write the prolog.
-  PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
+  PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 	      char *psTitle,
 	      int firstPage, int lastPage, PSOutMode modeA,
 	      int paperWidthA = -1, int paperHeightA = -1,
@@ -88,6 +89,7 @@ public:
   // Open a PSOutputDev that will write to a generic stream.
   PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
 	      char *psTitle,
+	      PDFDoc *doc,
 	      XRef *xrefA, Catalog *catalog,
 	      int firstPage, int lastPage, PSOutMode modeA,
 	      int paperWidthA = -1, int paperHeightA = -1,
@@ -145,9 +147,6 @@ public:
   // Write the Xpdf procset.
   void writeXpdfProcset();
 
-  // Write the document-level setup.
-  void writeDocSetup(Catalog *catalog, int firstPage, int lastPage, GBool duplexA);
-
   // Write the trailer for the current page.
   void writePageTrailer();
 
@@ -287,7 +286,7 @@ public:
 private:
 
   void init(PSOutputFunc outputFuncA, void *outputStreamA,
-	    PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
+	    PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 	    int firstPage, int lastPage, PSOutMode modeA,
 	    int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
 	    GBool manualCtrlA, int paperWidthA, int paperHeightA,
@@ -341,6 +340,10 @@ private:
 		    double *x1, double *y1);
 #endif
   void cvtFunction(Function *func);
+
+  // Write the document-level setup.
+  void writeDocSetup(PDFDoc *doc, Catalog *catalog, int firstPage, int lastPage, GBool duplexA);
+
   void writePSChar(char c);
   void writePS(char *s);
   void writePSFmt(const char *fmt, ...);
diff --git a/qt/poppler-document.cc b/qt/poppler-document.cc
index 1a5892b..03d01fa 100644
--- a/qt/poppler-document.cc
+++ b/qt/poppler-document.cc
@@ -325,7 +325,7 @@ bool Document::print(const QString &fileName, QValueList<int> pageList, double h
 
 bool Document::print(const QString &file, QValueList<int> pageList, double hDPI, double vDPI, int rotate, int paperWidth, int paperHeight)
 {
-  PSOutputDev *psOut = new PSOutputDev(file.latin1(), data->doc.getXRef(), data->doc.getCatalog(), NULL, 1, data->doc.getNumPages(), psModePS, paperWidth, paperHeight);
+  PSOutputDev *psOut = new PSOutputDev(file.latin1(), &(data->doc), data->doc.getXRef(), data->doc.getCatalog(), NULL, 1, data->doc.getNumPages(), psModePS, paperWidth, paperHeight);
   
   if (psOut->isOk()) {
     QValueList<int>::iterator it;
diff --git a/qt4/src/poppler-ps-converter.cc b/qt4/src/poppler-ps-converter.cc
index 7a1957b..9dc82ec 100644
--- a/qt4/src/poppler-ps-converter.cc
+++ b/qt4/src/poppler-ps-converter.cc
@@ -195,6 +195,7 @@ bool PSConverter::convert()
 	
 	PSOutputDev *psOut = new PSOutputDev(outputToQIODevice, dev,
 	                                     pstitlechar,
+	                                     d->document->doc,
 	                                     d->document->doc->getXRef(),
 	                                     d->document->doc->getCatalog(),
 	                                     1,
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 5323b6e..3723b44 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -457,7 +457,7 @@ int main(int argc, char *argv[]) {
       psFileName = new GooString(htmlFileName->getCString());
       psFileName->append(".ps");
 
-      psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+      psOut = new PSOutputDev(psFileName->getCString(), doc, doc->getXRef(),
           doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
       psOut->setDisplayText(gFalse);
       doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
diff --git a/utils/pdftops.cc b/utils/pdftops.cc
index 0bc43a1..8231458 100644
--- a/utils/pdftops.cc
+++ b/utils/pdftops.cc
@@ -359,7 +359,7 @@ int main(int argc, char *argv[]) {
   }
 
   // write PostScript file
-  psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+  psOut = new PSOutputDev(psFileName->getCString(), doc, doc->getXRef(),
 			  doc->getCatalog(), NULL, firstPage, lastPage, mode,
 			  paperWidth,
 			  paperHeight,
-- 
1.7.0.4


From 625c502791dcc7b85684184e3188f207e7312c2a Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sat, 24 Apr 2010 10:17:56 +0200
Subject: [PATCH 10/17] Use PDFDoc::getPage() in HtmlOutputDev

---
 utils/HtmlOutputDev.cc |    2 +-
 utils/HtmlOutputDev.h  |    2 ++
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 8ff8f08..1e3a3ef 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -1116,7 +1116,7 @@ void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
 
 
 void HtmlOutputDev::endPage() {
-  Links *linksList = catalog->getPage(pageNum)->getLinks(catalog);
+  Links *linksList = docPage->getLinks(catalog);
   for (int i = 0; i < linksList->getNumLinks(); ++i)
   {
       doProcessLink(linksList->getLink(i));
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index 24ccfd1..48b04c6 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -256,6 +256,7 @@ public:
                                GBool (* abortCheckCbk)(void *data) = NULL,
                                void * abortCheckCbkData = NULL)
   {
+   docPage = page;
    catalog = catalogA;
    return gTrue;
   }
@@ -323,6 +324,7 @@ private:
   GooString *docTitle;
   GooList *glMetaVars;
   Catalog *catalog;
+  Page *docPage;
   friend class HtmlPage;
 };
 
-- 
1.7.0.4


From 0716686b26c538d9ef5987b9d1b5fadf2b421c48 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 31 Mar 2010 14:39:57 +0200
Subject: [PATCH 11/17] Parse page tree on demand

---
 poppler/Catalog.cc |  283 +++++++++++++++++++++++++++++++++++-----------------
 poppler/Catalog.h  |   12 ++-
 2 files changed, 199 insertions(+), 96 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index dbf9af2..d524c8c 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -59,9 +59,6 @@ Catalog::Catalog(XRef *xrefA) {
   Object catDict, pagesDict, pagesDictRef;
   Object obj, obj2;
   Object optContentProps;
-  char *alreadyRead;
-  int numPages0;
-  int i;
 
   ok = gTrue;
   xref = xrefA;
@@ -78,6 +75,12 @@ Catalog::Catalog(XRef *xrefA) {
   embeddedFileNameTree = NULL;
   jsNameTree = NULL;
 
+  pagesList = NULL;
+  pagesRefList = NULL;
+  attrsList = NULL;
+  kidsIdxList = NULL;
+  lastCachedPage = 0;
+
   xref->getCatalog(&catDict);
   if (!catDict.isDict()) {
     error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
@@ -100,31 +103,11 @@ Catalog::Catalog(XRef *xrefA) {
   if (!obj.isNum()) {
     error(-1, "Page count in top-level pages object is wrong type (%s)",
 	  obj.getTypeName());
-    pagesSize = numPages0 = 0;
+    numPages = 0;
   } else {
-    pagesSize = numPages0 = (int)obj.getNum();
+    numPages = (int)obj.getNum();
   }
   obj.free();
-  pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
-  pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
-  for (i = 0; i < pagesSize; ++i) {
-    pages[i] = NULL;
-    pageRefs[i].num = -1;
-    pageRefs[i].gen = -1;
-  }
-  alreadyRead = (char *)gmalloc(xref->getNumObjects());
-  memset(alreadyRead, 0, xref->getNumObjects());
-  if (catDict.dictLookupNF("Pages", &pagesDictRef)->isRef() &&
-      pagesDictRef.getRefNum() >= 0 &&
-      pagesDictRef.getRefNum() < xref->getNumObjects()) {
-    alreadyRead[pagesDictRef.getRefNum()] = 1;
-  }
-  pagesDictRef.free();
-  numPages = readPageTree(pagesDict.getDict(), NULL, 0, alreadyRead);
-  gfree(alreadyRead);
-  if (numPages != numPages0) {
-    error(-1, "Page count in top-level pages object is incorrect");
-  }
   pagesDict.free();
 
   // read base URI
@@ -161,10 +144,24 @@ Catalog::Catalog(XRef *xrefA) {
 }
 
 Catalog::~Catalog() {
-  int i;
-
+  delete kidsIdxList;
+  if (attrsList) {
+    GooVector<PageAttrs *>::iterator it;
+    for (it = attrsList->begin() ; it < attrsList->end(); it++ ) {
+      delete *it;
+    }
+    delete attrsList;
+  }
+  delete pagesRefList;
+  if (pagesList) {
+    GooVector<Dict *>::iterator it;
+    for (it = pagesList->begin() ; it < pagesList->end(); it++ ) {
+      delete *it;
+    }
+    delete pagesList;
+  }
   if (pages) {
-    for (i = 0; i < pagesSize; ++i) {
+    for (int i = 0; i < pagesSize; ++i) {
       if (pages[i]) {
 	delete pages[i];
       }
@@ -221,91 +218,193 @@ GooString *Catalog::readMetadata() {
   return s;
 }
 
-int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start,
-			  char *alreadyRead) {
-  Object kids;
-  Object kid;
-  Object kidRef;
-  PageAttrs *attrs1, *attrs2;
-  Page *page;
-  int i, j;
-
-  attrs1 = new PageAttrs(attrs, pagesDict);
-  pagesDict->lookup("Kids", &kids);
-  if (!kids.isArray()) {
-    error(-1, "Kids object (page %d) is wrong type (%s)",
-	  start+1, kids.getTypeName());
-    return start;
-  }
-  for (i = 0; i < kids.arrayGetLength(); ++i) {
-    kids.arrayGetNF(i, &kidRef);
-    if (kidRef.isRef() &&
-	kidRef.getRefNum() >= 0 &&
-	kidRef.getRefNum() < xref->getNumObjects()) {
-      if (alreadyRead[kidRef.getRefNum()]) {
-	error(-1, "Loop in Pages tree");
-	kidRef.free();
-	continue;
+Page *Catalog::getPage(int i)
+{
+  if (i < 1) return NULL;
+
+  if (i > lastCachedPage) {
+     if (cachePageTree(i) == gFalse) return NULL;
+  }
+  return pages[i-1];
+}
+
+Ref *Catalog::getPageRef(int i)
+{
+  if (i < 1) return NULL;
+
+  if (i > lastCachedPage) {
+     if (cachePageTree(i) == gFalse) return NULL;
+  }
+  return &pageRefs[i-1];
+}
+
+GBool Catalog::cachePageTree(int page)
+{
+  Dict *pagesDict;
+
+  if (pagesList == NULL) {
+
+    Object catDict;
+    Ref pagesRef;
+
+    xref->getCatalog(&catDict);
+
+    Object pagesDictRef;
+    if (catDict.dictLookupNF("Pages", &pagesDictRef)->isRef() &&
+        pagesDictRef.getRefNum() >= 0 &&
+        pagesDictRef.getRefNum() < xref->getNumObjects()) {
+      pagesRef = pagesDictRef.getRef();
+      pagesDictRef.free();
+    } else {
+       error(-1, "Catalog dictionary does not contain a valid \"Pages\" entry");
+       pagesDictRef.free();
+       return gFalse;
+    }
+
+    Object obj;
+    catDict.dictLookup("Pages", &obj);
+    catDict.free();
+    // This should really be isDict("Pages"), but I've seen at least one
+    // PDF file where the /Type entry is missing.
+    if (obj.isDict()) {
+      obj.getDict()->incRef();
+      pagesDict = obj.getDict();
+      obj.free();
+    }
+    else {
+      error(-1, "Top-level pages object is wrong type (%s)", obj.getTypeName());
+      obj.free();
+      return gFalse;
+    }
+
+    pagesSize = numPages;
+    pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
+    pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
+    for (int i = 0; i < pagesSize; ++i) {
+      pages[i] = NULL;
+      pageRefs[i].num = -1;
+      pageRefs[i].gen = -1;
+    }
+
+    pagesList = new GooVector<Dict *>();
+    pagesList->push_back(pagesDict);
+    pagesRefList = new GooVector<Ref>();
+    pagesRefList->push_back(pagesRef);
+    attrsList = new GooVector<PageAttrs *>();
+    attrsList->push_back(new PageAttrs(NULL, pagesDict));
+    kidsIdxList = new GooVector<int>();
+    kidsIdxList->push_back(0);
+    lastCachedPage = 0;
+
+  }
+
+  while(1) {
+
+    if (page <= lastCachedPage) return gTrue;
+
+    if (pagesList->empty()) return gFalse;
+
+    pagesDict = pagesList->back();
+    Object kids;
+    pagesDict->lookup("Kids", &kids);
+    if (!kids.isArray()) {
+      error(-1, "Kids object (page %d) is wrong type (%s)",
+            lastCachedPage+1, kids.getTypeName());
+      kids.free();
+      return gFalse;
+    }
+
+    int kidsIdx = kidsIdxList->back();
+    if (kidsIdx >= kids.arrayGetLength()) {
+       delete pagesList->back();
+       pagesList->pop_back();
+       pagesRefList->pop_back();
+       delete attrsList->back();
+       attrsList->pop_back();
+       kidsIdxList->pop_back();
+       if (!kidsIdxList->empty()) kidsIdxList->back()++;
+       kids.free();
+       continue;
+    }
+
+    Object kidRef;
+    kids.arrayGetNF(kidsIdx, &kidRef);
+    if (!kidRef.isRef()) {
+      error(-1, "Kid object (page %d) is not an indirect reference (%s)",
+            lastCachedPage+1, kidRef.getTypeName());
+      kidRef.free();
+      kids.free();
+      return gFalse;
+    }
+
+    for (size_t i = 0; i < pagesRefList->size(); i++) {
+      if (((*pagesRefList)[i]).num == kidRef.getRefNum()) {
+         error(-1, "Loop in Pages tree");
+         kidRef.free();
+         kids.free();
+         kidsIdxList->back()++;
+         continue;
       }
-      alreadyRead[kidRef.getRefNum()] = 1;
     }
-    kids.arrayGet(i, &kid);
+
+    Object kid;
+    kids.arrayGet(kidsIdx, &kid);
+    kids.free();
     if (kid.isDict("Page")) {
-      attrs2 = new PageAttrs(attrs1, kid.getDict());
-      page = new Page(xref, start+1, kid.getDict(), kidRef.getRef(), attrs2, getForm());
-      if (!page->isOk()) {
-	++start;
-	goto err3;
+      PageAttrs *attrs = new PageAttrs(attrsList->back(), kid.getDict());
+      Page *p = new Page(xref, lastCachedPage+1, kid.getDict(),
+                     kidRef.getRef(), attrs, form);
+      if (!p->isOk()) {
+        error(-1, "Failed to create page (page %d)", lastCachedPage+1);
+        delete p;
+        kidRef.free();
+        kid.free();
+        return gFalse;
       }
-      if (start >= pagesSize) {
-	pagesSize += 32;
-	pages = (Page **)greallocn(pages, pagesSize, sizeof(Page *));
-	pageRefs = (Ref *)greallocn(pageRefs, pagesSize, sizeof(Ref));
-	for (j = pagesSize - 32; j < pagesSize; ++j) {
-	  pages[j] = NULL;
-	  pageRefs[j].num = -1;
-	  pageRefs[j].gen = -1;
-	}
-      }
-      pages[start] = page;
-      if (kidRef.isRef()) {
-	pageRefs[start].num = kidRef.getRefNum();
-	pageRefs[start].gen = kidRef.getRefGen();
+
+      if (lastCachedPage >= numPages) {
+        error(-1, "Page count in top-level pages object is incorrect");
+        kidRef.free();
+        kid.free();
+        return gFalse;
       }
-      ++start;
+
+      pages[lastCachedPage] = p;
+      pageRefs[lastCachedPage].num = kidRef.getRefNum();
+      pageRefs[lastCachedPage].gen = kidRef.getRefGen();
+
+      lastCachedPage++;
+      kidsIdxList->back()++;
+
     // This should really be isDict("Pages"), but I've seen at least one
     // PDF file where the /Type entry is missing.
     } else if (kid.isDict()) {
-      if ((start = readPageTree(kid.getDict(), attrs1, start, alreadyRead))
-	  < 0)
-	goto err2;
+      attrsList->push_back(new PageAttrs(attrsList->back(), kid.getDict()));
+      pagesRefList->push_back(kidRef.getRef());
+      kid.getDict()->incRef();
+      pagesList->push_back(kid.getDict());
+      kidsIdxList->push_back(0);
     } else {
       error(-1, "Kid object (page %d) is wrong type (%s)",
-	    start+1, kid.getTypeName());
+            lastCachedPage+1, kid.getTypeName());
+      kidRef.free();
+      kid.free();
+      return gFalse;
     }
-    kid.free();
     kidRef.free();
+    kid.free();
+
   }
-  delete attrs1;
-  kids.free();
-  return start;
 
- err3:
-  delete page;
- err2:
-  kid.free();
-  kidRef.free();
-  kids.free();
-  delete attrs1;
-  ok = gFalse;
-  return -1;
+  return gFalse;
 }
 
 int Catalog::findPage(int num, int gen) {
   int i;
 
   for (i = 0; i < numPages; ++i) {
-    if (pageRefs[i].num == num && pageRefs[i].gen == gen)
+    Ref *ref = getPageRef(i+1);
+    if (ref->num == num && ref->gen == gen)
       return i + 1;
   }
   return 0;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 2cab80a..5a25109 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -151,10 +151,10 @@ public:
   int getNumPages() { return numPages; }
 
   // Get a page.
-  Page *getPage(int i) { return pages[i-1]; }
+  Page *getPage(int i);
 
   // Get the reference for a page object.
-  Ref *getPageRef(int i) { return &pageRefs[i-1]; }
+  Ref *getPageRef(int i);
 
   // Return base URI, or NULL if none.
   GooString *getBaseURI() { return baseURI; }
@@ -232,6 +232,11 @@ private:
   XRef *xref;			// the xref table for this PDF file
   Page **pages;			// array of pages
   Ref *pageRefs;		// object ID for each page
+  int lastCachedPage;
+  GooVector<Dict *> *pagesList;
+  GooVector<Ref> *pagesRefList;
+  GooVector<PageAttrs *> *attrsList;
+  GooVector<int> *kidsIdxList;
   Form *form;
   int numPages;			// number of pages
   int pagesSize;		// size of pages array
@@ -251,8 +256,7 @@ private:
   PageMode pageMode;		// page mode
   PageLayout pageLayout;	// page layout
 
-  int readPageTree(Dict *pages, PageAttrs *attrs, int start,
-		   char *alreadyRead);
+  GBool cachePageTree(int page); // Cache first <page> pages.
   Object *findDestInTree(Object *tree, GooString *name, Object *obj);
 
   Object *getNames();
-- 
1.7.0.4


From f127699551a03278c59dc7f90abe2c444745d2b2 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 22:01:41 +0100
Subject: [PATCH 12/17] Parse number of pages on demand

---
 poppler/Catalog.cc |   70 +++++++++++++++++++++++++++++++--------------------
 poppler/Catalog.h  |    2 +-
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index d524c8c..8a6f7d5 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -64,7 +64,8 @@ Catalog::Catalog(XRef *xrefA) {
   xref = xrefA;
   pages = NULL;
   pageRefs = NULL;
-  numPages = pagesSize = 0;
+  numPages = -1;
+  pagesSize = 0;
   baseURI = NULL;
   pageLabelInfo = NULL;
   form = NULL;
@@ -89,27 +90,6 @@ Catalog::Catalog(XRef *xrefA) {
   // get the AcroForm dictionary
   catDict.dictLookup("AcroForm", &acroForm);
 
-  // read page tree
-  catDict.dictLookup("Pages", &pagesDict);
-  // This should really be isDict("Pages"), but I've seen at least one
-  // PDF file where the /Type entry is missing.
-  if (!pagesDict.isDict()) {
-    error(-1, "Top-level pages object is wrong type (%s)",
-	  pagesDict.getTypeName());
-    goto err2;
-  }
-  pagesDict.dictLookup("Count", &obj);
-  // some PDF files actually use real numbers here ("/Count 9.0")
-  if (!obj.isNum()) {
-    error(-1, "Page count in top-level pages object is wrong type (%s)",
-	  obj.getTypeName());
-    numPages = 0;
-  } else {
-    numPages = (int)obj.getNum();
-  }
-  obj.free();
-  pagesDict.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -136,8 +116,6 @@ Catalog::Catalog(XRef *xrefA) {
   catDict.free();
   return;
 
- err2:
-  pagesDict.free();
  err1:
   catDict.free();
   ok = gFalse;
@@ -277,7 +255,7 @@ GBool Catalog::cachePageTree(int page)
       return gFalse;
     }
 
-    pagesSize = numPages;
+    pagesSize = getNumPages();
     pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
     pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
     for (int i = 0; i < pagesSize; ++i) {
@@ -402,7 +380,7 @@ GBool Catalog::cachePageTree(int page)
 int Catalog::findPage(int num, int gen) {
   int i;
 
-  for (i = 0; i < numPages; ++i) {
+  for (i = 0; i < getNumPages(); ++i) {
     Ref *ref = getPageRef(i+1);
     if (ref->num == num && ref->gen == gen)
       return i + 1;
@@ -722,7 +700,7 @@ GBool Catalog::labelToIndex(GooString *label, int *index)
       return gFalse;
   }
 
-  if (*index < 0 || *index >= numPages)
+  if (*index < 0 || *index >= getNumPages())
     return gFalse;
 
   return gTrue;
@@ -732,7 +710,7 @@ GBool Catalog::indexToLabel(int index, GooString *label)
 {
   char buffer[32];
 
-  if (index < 0 || index >= numPages)
+  if (index < 0 || index >= getNumPages())
     return gFalse;
 
   PageLabelInfo *pli = getPageLabelInfo();
@@ -848,6 +826,42 @@ EmbFile::EmbFile(Object *efDict, GooString *description)
     m_mimetype = new GooString();
 }
 
+int Catalog::getNumPages()
+{
+  if (numPages == -1)
+  {
+    Object catDict, pagesDict, obj;
+
+    xref->getCatalog(&catDict);
+    catDict.dictLookup("Pages", &pagesDict);
+    catDict.free();
+
+    // This should really be isDict("Pages"), but I've seen at least one
+    // PDF file where the /Type entry is missing.
+    if (!pagesDict.isDict()) {
+      error(-1, "Top-level pages object is wrong type (%s)",
+          pagesDict.getTypeName());
+      pagesDict.free();
+      return 0;
+    }
+
+    pagesDict.dictLookup("Count", &obj);
+    // some PDF files actually use real numbers here ("/Count 9.0")
+    if (!obj.isNum()) {
+      error(-1, "Page count in top-level pages object is wrong type (%s)",
+         obj.getTypeName());
+      numPages = 0;
+    } else {
+      numPages = (int)obj.getNum();
+    }
+
+    obj.free();
+    pagesDict.free();
+  }
+
+  return numPages;
+}
+
 PageLabelInfo *Catalog::getPageLabelInfo()
 {
   if (!pageLabelInfo) {
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 5a25109..8bca80b 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -148,7 +148,7 @@ public:
   GBool isOk() { return ok; }
 
   // Get number of pages.
-  int getNumPages() { return numPages; }
+  int getNumPages();
 
   // Get a page.
   Page *getPage(int i);
-- 
1.7.0.4


From d17e7a3b5529e48e33f67284142165e5dc7f2ed9 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 18:53:54 +0100
Subject: [PATCH 13/17] Get number of pages from linearization table

---
 poppler/PDFDoc.cc |    9 +++++++++
 poppler/PDFDoc.h  |    2 +-
 2 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 45b47d2..7c6db2a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1112,6 +1112,15 @@ Guint PDFDoc::getMainXRefEntriesOffset()
   return mainXRefEntriesOffset;
 }
 
+int PDFDoc::getNumPages()
+{
+  if (isLinearized()) {
+    return getLinearization()->getNumPages();
+  } else {
+    return catalog->getNumPages();
+  }
+}
+
 Page *PDFDoc::getPage(int page)
 {
   if ((page < 1) || page > getNumPages()) return NULL;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index ed0828c..ef1646f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -119,7 +119,7 @@ public:
     { return getPage(page) ? getPage(page)->getRotate() : 0 ; }
 
   // Get number of pages.
-  int getNumPages() { return catalog->getNumPages(); }
+  int getNumPages();
 
   // Return the contents of the metadata stream, or NULL if there is
   // no metadata.
-- 
1.7.0.4


From 70e8cce91ae4a94bca7ef14677340dda20aeaef7 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 4 Aug 2010 18:09:36 +0200
Subject: [PATCH 14/17] Keep security handler available in PDFDoc

---
 poppler/PDFDoc.cc |    4 ++--
 poppler/PDFDoc.h  |    2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 7c6db2a..963a11b 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -102,6 +102,7 @@ void PDFDoc::init()
   outline = NULL;
 #endif
   startXRefPos = ~(Guint)0;
+  secHdlr = NULL;
 }
 
 PDFDoc::PDFDoc()
@@ -274,6 +275,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 }
 
 PDFDoc::~PDFDoc() {
+  delete secHdlr;
 #ifndef DISABLE_OUTLINE
   if (outline) {
     delete outline;
@@ -370,7 +372,6 @@ void PDFDoc::checkHeader() {
 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
   Object encrypt;
   GBool encrypted;
-  SecurityHandler *secHdlr;
   GBool ret;
 
   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
@@ -390,7 +391,6 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword)
 	// authorization failed
 	ret = gFalse;
       }
-      delete secHdlr;
     } else {
       // couldn't find the matching security handler
       ret = gFalse;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index ef1646f..33f3c2b 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -50,6 +50,7 @@ class LinkAction;
 class LinkDest;
 class Outline;
 class Linearization;
+class SecurityHandler;
 
 enum PDFWriteMode {
   writeStandard,
@@ -261,6 +262,7 @@ private:
   int pdfMinorVersion;
   Linearization *linearization;
   XRef *xref;
+  SecurityHandler *secHdlr;
   Catalog *catalog;
 #ifndef DISABLE_OUTLINE
   Outline *outline;
-- 
1.7.0.4


From c49fc470391a69e569ddf76990e0b2e447a7802f Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 22:03:27 +0100
Subject: [PATCH 15/17] Add hint tables support

---
 CMakeLists.txt      |    2 +
 poppler/Hints.cc    |  422 +++++++++++++++++++++++++++++++++++++++++++++++++++
 poppler/Hints.h     |   92 +++++++++++
 poppler/Makefile.am |    2 +
 poppler/PDFDoc.cc   |   14 ++
 poppler/PDFDoc.h    |    5 +
 6 files changed, 537 insertions(+), 0 deletions(-)
 create mode 100644 poppler/Hints.cc
 create mode 100644 poppler/Hints.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c25c45..b70c76c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,6 +247,7 @@ set(poppler_SRCS
   poppler/GfxFont.cc
   poppler/GfxState.cc
   poppler/GlobalParams.cc
+  poppler/Hints.cc
   poppler/JArithmeticDecoder.cc
   poppler/JBIG2Stream.cc
   poppler/Lexer.cc
@@ -394,6 +395,7 @@ if(ENABLE_XPDF_HEADERS)
     poppler/GfxState.h
     poppler/GfxState_helpers.h
     poppler/GlobalParams.h
+    poppler/Hints.h
     poppler/JArithmeticDecoder.h
     poppler/JBIG2Stream.h
     poppler/Lexer.h
diff --git a/poppler/Hints.cc b/poppler/Hints.cc
new file mode 100644
index 0000000..3281800
--- /dev/null
+++ b/poppler/Hints.cc
@@ -0,0 +1,422 @@
+//========================================================================
+//
+// Hints.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#include <config.h>
+
+#include "Hints.h"
+
+#include "Linearization.h"
+#include "Object.h"
+#include "Stream.h"
+#include "XRef.h"
+#include "Parser.h"
+#include "Lexer.h"
+#include "SecurityHandler.h"
+
+#include <limits.h>
+
+//------------------------------------------------------------------------
+// Hints
+//------------------------------------------------------------------------
+
+Hints::Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr)
+{
+  mainXRefEntriesOffset = linearization->getMainXRefEntriesOffset();
+  nPages = linearization->getNumPages();
+  pageFirst = linearization->getPageFirst();
+  pageEndFirst = linearization->getEndFirst();
+  pageOffsetFirst = xref->getEntry(linearization->getObjectNumberFirst())->offset;
+
+  if (nPages >= INT_MAX / (int)sizeof(Guint)) {
+     error(-1, "Invalid number of pages (%d) for hints table", nPages);
+     nPages = 0;
+  }
+  nObjects = (Guint *) gmallocn(nPages, sizeof(Guint));
+  xRefOffset = (Guint *) gmallocn(nPages, sizeof(Guint));
+  pageLength = (Guint *) gmallocn(nPages, sizeof(Guint));
+  pageOffset = (Guint *) gmallocn(nPages, sizeof(Guint));
+  numSharedObject = (Guint *) gmallocn(nPages, sizeof(Guint));
+  sharedObjectId = (Guint **) gmallocn(nPages, sizeof(Guint*));
+  if (!nObjects || !xRefOffset || !pageLength || !pageOffset ||
+      !numSharedObject || !sharedObjectId) {
+    error(-1, "Failed to allocate memory for hints tabel");
+    nPages = 0;
+  }
+
+  memset(numSharedObject, 0, nPages);
+
+  nSharedGroups = 0;
+  groupLength = NULL;
+  groupOffset = NULL;
+  groupHasSignature = NULL;
+  groupNumObjects = NULL;
+  groupXRefOffset = NULL;
+
+  readTables(str, linearization, xref, secHdlr);
+}
+
+Hints::~Hints()
+{
+  gfree(nObjects);
+  gfree(xRefOffset);
+  gfree(pageLength);
+  gfree(pageOffset);
+  for (int i=0; i< nPages; i++) {
+    if (numSharedObject[i]) {
+       gfree(sharedObjectId[i]);
+    }
+  }
+  gfree(sharedObjectId);
+  gfree(numSharedObject);
+
+  gfree(groupLength);
+  gfree(groupOffset);
+  gfree(groupHasSignature);
+  gfree(groupNumObjects);
+  gfree(groupXRefOffset);
+}
+
+void Hints::readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr)
+{
+  hintsOffset = linearization->getHintsOffset();
+  hintsLength = linearization->getHintsLength();
+  hintsOffset2 = linearization->getHintsOffset2();
+  hintsLength2 = linearization->getHintsLength2();
+
+  Parser *parser;
+  Object obj;
+
+  int bufLength = hintsLength + hintsLength2;
+
+  char buf[bufLength];
+  char *p = buf;
+
+  obj.initNull();
+  Stream *s = str->makeSubStream(hintsOffset, gFalse, hintsLength, &obj);
+  s->reset();
+  for (Guint i=0; i < hintsLength; i++) { *p++ = s->getChar(); }
+  delete s;
+
+  if (hintsOffset2 && hintsLength2) {
+    obj.initNull();
+    s = str->makeSubStream(hintsOffset2, gFalse, hintsLength2, &obj);
+    s->reset();
+    for (Guint i=0; i < hintsLength2; i++) { *p++ = s->getChar(); }
+    delete s;
+  }
+
+  obj.initNull();
+  MemStream *memStream = new MemStream (buf, 0, bufLength, &obj);
+
+  obj.initNull();
+  parser = new Parser(xref, new Lexer(xref, memStream), gTrue);
+
+  int num, gen;
+  if (parser->getObj(&obj)->isInt() &&
+     (num = obj.getInt(), obj.free(), parser->getObj(&obj)->isInt()) &&
+     (gen = obj.getInt(), obj.free(), parser->getObj(&obj)->isCmd("obj")) &&
+     (obj.free(), parser->getObj(&obj,
+         secHdlr ? secHdlr->getFileKey() : (Guchar *)NULL,
+         secHdlr ? secHdlr->getEncAlgorithm() : cryptRC4,
+         secHdlr ? secHdlr->getFileKeyLength() : 0,
+         num, gen)->isStream())) {
+    Stream *hintsStream = obj.getStream();
+    Dict *hintsDict = obj.streamGetDict();
+
+    int sharedStreamOffset = 0;
+    if (hintsDict->lookupInt("S", NULL, &sharedStreamOffset) &&
+        sharedStreamOffset > 0) {
+
+        hintsStream->reset();
+        readPageOffsetTable(hintsStream);
+
+        hintsStream->reset();
+        for (int i=0; i<sharedStreamOffset; i++) hintsStream->getChar();
+        readSharedObjectsTable(hintsStream);
+    } else {
+      error(-1, "Invalid shared object hint table offset");
+    }
+  } else {
+    error(-1, "Failed parsing hints table object");
+  }
+  obj.free();
+
+  delete parser;
+}
+
+void Hints::readPageOffsetTable(Stream *str)
+{
+  if (nPages < 1) {
+    error(-1, "Invalid number of pages reading page offset hints table");
+    return;
+  }
+
+  inputBits = 0; // reset on byte boundary.
+
+  nObjectLeast = readBits(32, str);
+
+  objectOffsetFirst = readBits(32, str);
+  if (objectOffsetFirst >= hintsOffset) objectOffsetFirst += hintsLength;
+
+  nBitsDiffObjects = readBits(16, str);
+
+  pageLengthLeast = readBits(32, str);
+
+  nBitsDiffPageLength = readBits(16, str);
+
+  OffsetStreamLeast = readBits(32, str);
+
+  nBitsOffsetStream = readBits(16, str);
+
+  lengthStreamLeast = readBits(32, str);
+
+  nBitsLengthStream = readBits(16, str);
+
+  nBitsNumShared = readBits(16, str);
+
+  nBitsShared = readBits(16, str);
+
+  nBitsNumerator = readBits(16, str);
+
+  denominator = readBits(16, str);
+
+  for (int i=0; i<nPages; i++) {
+    nObjects[i] = nObjectLeast + readBits(nBitsDiffObjects, str);
+  }
+
+  nObjects[0] = 0;
+  xRefOffset[0] = mainXRefEntriesOffset + 20;
+  for (int i=1; i<nPages; i++) {
+    xRefOffset[i] = xRefOffset[i-1] + 20*nObjects[i-1];
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (int i=0; i<nPages; i++) {
+    pageLength[i] = pageLengthLeast + readBits(nBitsDiffPageLength, str);
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  numSharedObject[0] = readBits(nBitsNumShared, str);
+  numSharedObject[0] = 0; // Do not trust the read value to be 0.
+  sharedObjectId[0] = NULL;
+  for (int i=1; i<nPages; i++) {
+    numSharedObject[i] = readBits(nBitsNumShared, str);
+    if (numSharedObject[i] >= INT_MAX / (int)sizeof(Guint)) {
+       error(-1, "Invalid number of shared objects");
+       numSharedObject[i] = 0;
+       return;
+    }
+    sharedObjectId[i] = (Guint *) gmallocn(numSharedObject[i], sizeof(Guint));
+    if (numSharedObject[i] && !sharedObjectId[i]) {
+       error(-1, "Failed to allocate memory for shared object IDs");
+       numSharedObject[i] = 0;
+       return;
+    }
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (int i=1; i<nPages; i++) {
+    for (Guint j=0; j < numSharedObject[i]; j++) {
+      sharedObjectId[i][j] = readBits(nBitsShared, str);
+    }
+  }
+
+  // set fake pageOffset[0].
+  if (hintsOffset < pageEndFirst) {
+    pageOffset[0] = pageEndFirst - pageLength[0];
+  } else {
+    pageOffset[0] = hintsOffset + hintsLength - pageLength[0];
+  }
+  // find pageOffsets.
+  for (int i=1; i<nPages; i++) {
+    pageOffset[i] = pageOffset[i-1] + pageLength[i-1];
+  }
+  // set correct pageOffset[0].
+  pageOffset[0] = pageOffsetFirst;
+
+}
+
+void Hints::readSharedObjectsTable(Stream *str)
+{
+  inputBits = 0; // reset on byte boundary.
+
+  Guint firstSharedObjectNumber = readBits(32, str);
+
+  Guint firstSharedObjectOffset = readBits(32, str);
+  firstSharedObjectOffset += hintsLength;
+
+  Guint nSharedGroupsFirst = readBits(32, str);
+
+  Guint nSharedGroups = readBits(32, str);
+
+  Guint nBitsNumObjects = readBits(16, str);
+
+  Guint groupLengthLeast = readBits(32, str);
+
+  Guint nBitsDiffGroupLength = readBits(16, str);
+
+  if ((!nSharedGroups) || (nSharedGroups >= INT_MAX / (int)sizeof(Guint))) {
+     error(-1, "Invalid number of shared object groups");
+     nSharedGroups = 0;
+     return;
+  }
+  if ((!nSharedGroupsFirst) || (nSharedGroupsFirst > nSharedGroups)) {
+     error(-1, "Invalid number of first page shared object groups");
+     nSharedGroupsFirst = nSharedGroups;
+  }
+
+  groupLength = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupOffset = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupHasSignature = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupNumObjects = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupXRefOffset = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  if (!groupLength || !groupOffset || !groupHasSignature ||
+      !groupNumObjects || !groupXRefOffset) {
+     error(-1, "Failed to allocate memory for shared object groups");
+     nSharedGroups = 0;
+     return;
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    groupLength[i] = groupLengthLeast + readBits(nBitsDiffGroupLength, str);
+  }
+
+  groupOffset[0] = objectOffsetFirst;
+  for (Guint i=1; i<nSharedGroupsFirst; i++) {
+    groupOffset[i] = groupOffset[i-1] + groupLength[i-1];
+  }
+  if (nSharedGroups > nSharedGroupsFirst ) {
+    groupOffset[nSharedGroupsFirst] = firstSharedObjectOffset;
+    for (Guint i=nSharedGroupsFirst+1; i<nSharedGroups; i++) {
+      groupOffset[i] = groupOffset[i-1] + groupLength[i-1];
+    }
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    groupHasSignature[i] = readBits(1, str);
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    if (groupHasSignature[i]) {
+       readBits(128, str);
+    }
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    groupNumObjects[i] =
+       nBitsNumObjects ? 1 + readBits(nBitsNumObjects, str) : 1;
+  }
+
+  for (Guint i=0; i<nSharedGroupsFirst; i++) {
+    groupNumObjects[i] = 0;
+    groupXRefOffset[i] = 0;
+  }
+  if (nSharedGroups > nSharedGroupsFirst ) {
+    groupXRefOffset[nSharedGroupsFirst] =
+        mainXRefEntriesOffset + 20*firstSharedObjectNumber;
+    for (Guint i=nSharedGroupsFirst+1; i<nSharedGroups; i++) {
+      groupXRefOffset[i] = groupXRefOffset[i-1] + 20*groupNumObjects[i-1];
+    }
+  }
+}
+
+Guint Hints::getPageOffset(int page)
+{
+  if ((page < 1) || (page > nPages)) return 0;
+
+  if (page-1 > pageFirst)
+    return pageOffset[page-1];
+  else if (page-1 < pageFirst)
+    return pageOffset[page];
+  else
+    return pageOffset[0];
+}
+
+GooVector<ByteRange>* Hints::getPageRanges(int page)
+{
+  if ((page < 1) || (page > nPages)) return NULL;
+
+  int idx;
+  if (page-1 > pageFirst)
+     idx = page-1;
+  else if (page-1 < pageFirst)
+     idx = page;
+  else
+     idx = 0;
+
+  ByteRange pageRange;
+  GooVector<ByteRange> *v = new GooVector<ByteRange>;
+
+  pageRange.offset = pageOffset[idx];
+  pageRange.length = pageLength[idx];
+  v->push_back(pageRange);
+
+  pageRange.offset = xRefOffset[idx];
+  pageRange.length = 20*nObjects[idx];
+  v->push_back(pageRange);
+
+  for (Guint j=0; j<numSharedObject[idx]; j++) {
+     Guint k = sharedObjectId[idx][j];
+
+     pageRange.offset = groupOffset[k];
+     pageRange.length = groupLength[k];
+     v->push_back(pageRange);
+
+     pageRange.offset = groupXRefOffset[k];
+     pageRange.length = 20*groupNumObjects[k];
+     v->push_back(pageRange);
+  }
+
+  return v;
+}
+
+Guint Hints::readBit(Stream *str)
+{
+  Guint bit;
+  int c;
+
+  if (inputBits == 0) {
+    if ((c = str->getChar()) == EOF) {
+      return (Guint) -1;
+    }
+    bitsBuffer = c;
+    inputBits = 8;
+  }
+  bit = (bitsBuffer >> (inputBits - 1)) & 1;
+  --inputBits;
+  return bit;
+}
+
+Guint Hints::readBits(int n, Stream *str)
+{
+  Guint bit, bits;
+
+  if (n < 0) return -1;
+  if (n == 0) return 0;
+
+  if (n == 1)
+    return readBit(str);
+
+  bit = (readBit(str) << (n-1));
+  if (bit == (Guint) -1)
+    return -1;
+
+  bits = readBits(n-1, str);
+  if (bits == (Guint) -1)
+    return -1;
+
+  return bit | bits;
+}
+
+
diff --git a/poppler/Hints.h b/poppler/Hints.h
new file mode 100644
index 0000000..3a52d67
--- /dev/null
+++ b/poppler/Hints.h
@@ -0,0 +1,92 @@
+//========================================================================
+//
+// Hints.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#ifndef HINTS_H
+#define HINTS_H
+
+#include <string.h>
+#include "goo/gtypes.h"
+#include "goo/GooVector.h"
+//#include <vector>
+#include "PDFDoc.h"
+
+class Stream;
+class BaseStream;
+class Linearization;
+class XRef;
+
+//------------------------------------------------------------------------
+// Hints
+//------------------------------------------------------------------------
+
+class Hints {
+public:
+
+  Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
+  ~Hints();
+
+  Guint getPageOffset(int page);
+  GooVector<ByteRange>* getPageRanges(int page);
+
+private:
+
+  void readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
+  void readPageOffsetTable(Stream *str);
+  void readSharedObjectsTable(Stream *str);
+
+  Guint readBit(Stream *str);
+  Guint readBits(int n, Stream *str);
+
+  Guint hintsOffset;
+  Guint hintsLength;
+  Guint hintsOffset2;
+  Guint hintsLength2;
+  Guint mainXRefEntriesOffset;
+
+  int nPages;
+  int pageFirst;
+  Guint pageOffsetFirst;
+  Guint pageEndFirst;
+  int objectNumberFirst;
+
+  Guint nObjectLeast;
+  Guint objectOffsetFirst;
+  Guint nBitsDiffObjects;
+  Guint pageLengthLeast;
+  Guint nBitsDiffPageLength;
+  Guint OffsetStreamLeast;
+  Guint nBitsOffsetStream;
+  Guint lengthStreamLeast;
+  Guint nBitsLengthStream;
+  Guint nBitsNumShared;
+  Guint nBitsShared;
+  Guint nBitsNumerator;
+  Guint denominator;
+
+  Guint *nObjects;
+  Guint *xRefOffset;
+  Guint *pageLength;
+  Guint *pageOffset;
+  Guint *numSharedObject;
+  Guint **sharedObjectId;
+
+  Guint nSharedGroups;
+  Guint *groupLength;
+  Guint *groupOffset;
+  Guint *groupHasSignature;
+  Guint *groupNumObjects;
+  Guint *groupXRefOffset;
+
+  int inputBits;
+  char bitsBuffer;
+
+};
+
+#endif
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index bb6daa6..4147a92 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -206,6 +206,7 @@ poppler_include_HEADERS =	\
 	GfxState.h		\
 	GfxState_helpers.h	\
 	GlobalParams.h		\
+	Hints.h			\
 	JArithmeticDecoder.h	\
 	JBIG2Stream.h		\
 	Lexer.h			\
@@ -285,6 +286,7 @@ libpoppler_la_SOURCES =		\
 	GfxFont.cc 		\
 	GfxState.cc		\
 	GlobalParams.cc		\
+	Hints.cc		\
 	JArithmeticDecoder.cc	\
 	JBIG2Stream.cc		\
 	Lexer.cc 		\
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 963a11b..147ce5c 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -70,6 +70,7 @@
 #include "Outline.h"
 #endif
 #include "PDFDoc.h"
+#include "Hints.h"
 
 //------------------------------------------------------------------------
 
@@ -98,6 +99,7 @@ void PDFDoc::init()
   xref = NULL;
   linearization = NULL;
   catalog = NULL;
+  hints = NULL;
 #ifndef DISABLE_OUTLINE
   outline = NULL;
 #endif
@@ -287,6 +289,9 @@ PDFDoc::~PDFDoc() {
   if (xref) {
     delete xref;
   }
+  if (hints) {
+    delete hints;
+  }
   if (linearization) {
     delete linearization;
   }
@@ -550,6 +555,15 @@ GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
   return gFalse;
 }
 
+Hints *PDFDoc::getHints()
+{
+  if (!hints && isLinearized()) {
+    hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
+  }
+
+  return hints;
+}
+
 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
   FILE *f;
   OutStream *outStr;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 33f3c2b..f04e59f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -51,6 +51,7 @@ class LinkDest;
 class Outline;
 class Linearization;
 class SecurityHandler;
+class Hints;
 
 enum PDFWriteMode {
   writeStandard,
@@ -241,6 +242,9 @@ private:
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
+  // Get hints.
+  Hints *getHints();
+
   PDFDoc();
   void init();
   GBool setup(GooString *ownerPassword, GooString *userPassword);
@@ -264,6 +268,7 @@ private:
   XRef *xref;
   SecurityHandler *secHdlr;
   Catalog *catalog;
+  Hints *hints;
 #ifndef DISABLE_OUTLINE
   Outline *outline;
 #endif
-- 
1.7.0.4


From 48097d29667e9a508101cf9b98bad092ff996d59 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:06:02 +0200
Subject: [PATCH 16/17] Use hint tables for PDFDoc::getPage()

---
 poppler/PDFDoc.cc |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 poppler/PDFDoc.h  |    4 +++
 2 files changed, 79 insertions(+), 1 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 147ce5c..d7c722b 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -105,6 +105,7 @@ void PDFDoc::init()
 #endif
   startXRefPos = ~(Guint)0;
   secHdlr = NULL;
+  pageCache = NULL;
 }
 
 PDFDoc::PDFDoc()
@@ -277,6 +278,14 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 }
 
 PDFDoc::~PDFDoc() {
+  if (pageCache) {
+    for (int i = 0; i < getNumPages(); i++) {
+      if (pageCache[i]) {
+        delete pageCache[i];
+      }
+    }
+    gfree(pageCache);
+  }
   delete secHdlr;
 #ifndef DISABLE_OUTLINE
   if (outline) {
@@ -1135,11 +1144,76 @@ int PDFDoc::getNumPages()
   }
 }
 
+Guint PDFDoc::getPageOffset(int page)
+{
+  if (isLinearized() && (page-1 == getLinearization()->getPageFirst())) {
+    return xref->getEntry(linearization->getObjectNumberFirst())->offset;
+  }
+
+  Guint offset;
+  if (getHints() && (offset = getHints()->getPageOffset(page))) {
+    return offset;
+  } else {
+    error(-1, "Failed getting page offset from hint table");
+    return 0;
+  }
+}
+
+Page *PDFDoc::parsePage(Guint offset, int page)
+{
+  Page *p = NULL;
+  Object obj;
+
+  obj.initNull();
+  Stream *stream = str->makeSubStream(offset, gFalse, 0, &obj);
+  Parser parser = Parser(xref, new Lexer(xref, stream), gTrue);
+
+  Object obj1, obj2, obj3, obj4;
+  if (parser.getObj(&obj1)->isInt() &&
+      parser.getObj(&obj2)->isInt() &&
+      parser.getObj(&obj3)->isCmd("obj") &&
+      parser.getObj(&obj4)->isDict("Page")) {
+    Ref pageRef;
+    Dict *pageDict;
+    pageRef.num = obj1.getInt();
+    pageRef.gen = obj2.getInt();
+    pageDict = obj4.getDict();
+    p = new Page(xref, page, pageDict, pageRef,
+                 new PageAttrs(NULL, pageDict),
+                 catalog->getForm());
+    if (!p->isOk()) {
+      delete p;
+      p = NULL;
+    }
+  }
+  obj4.free();
+  obj3.free();
+  obj2.free();
+  obj1.free();
+
+  return p;
+}
+
 Page *PDFDoc::getPage(int page)
 {
   if ((page < 1) || page > getNumPages()) return NULL;
 
-  {
+  if (isLinearized()) {
+    if (!pageCache) {
+      pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
+      for (int i = 0; i < getNumPages(); i++) {
+        pageCache[i] = NULL;
+      }
+    }
+    if (!pageCache[page-1]) {
+      pageCache[page-1] = parsePage(getPageOffset(page), page);
+      if (!pageCache[page-1]) {
+         error(-1, "Failed parsing page %d at offset %d",
+               page, getPageOffset(page));
+      }
+    }
+    return pageCache[page-1];
+  } else {
     return catalog->getPage(page);
   }
 }
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index f04e59f..c5e3aaf 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -242,6 +242,9 @@ private:
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
+  Guint getPageOffset(int page);
+  Page *parsePage(Guint offset, int page);
+
   // Get hints.
   Hints *getHints();
 
@@ -272,6 +275,7 @@ private:
 #ifndef DISABLE_OUTLINE
   Outline *outline;
 #endif
+  Page **pageCache;
 
   GBool ok;
   int errCode;
-- 
1.7.0.4


From b395bf4b586e2a2617e766eebd68ba5ad5f5c779 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 13:08:11 +0100
Subject: [PATCH 17/17] Fill CachedFileStream buffer in a smarter manner

This avoids downloading too many chunks by buffering on chunk boundries.
---
 poppler/CachedFile.h |    2 +-
 poppler/Stream.cc    |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/poppler/CachedFile.h b/poppler/CachedFile.h
index 897ff4a..e1ff817 100644
--- a/poppler/CachedFile.h
+++ b/poppler/CachedFile.h
@@ -24,7 +24,7 @@
 
 //------------------------------------------------------------------------
 
-#define CachedFileChunkSize 8192
+#define CachedFileChunkSize 8192 // This should be a multiple of cachedStreamBufSize
 
 class GooString;
 class CachedFileLoader;
diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index fbf2b33..93cc27b 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -875,7 +875,7 @@ GBool CachedFileStream::fillBuf()
   if (limited && bufPos + cachedStreamBufSize > start + length) {
     n = start + length - bufPos;
   } else {
-    n = cachedStreamBufSize;
+    n = cachedStreamBufSize - (bufPos % cachedStreamBufSize);
   }
   cc->read(buf, 1, n);
   bufEnd = buf + n;
-- 
1.7.0.4


More information about the poppler mailing list