[poppler] Linearization support

Hib Eris hib at hiberis.nl
Mon Sep 27 07:01:33 PDT 2010


Hi Albert,

On Tue, Sep 21, 2010 at 10:48 PM, Albert Astals Cid <aacid at kde.org> wrote:
> A Dimarts, 21 de setembre de 2010, Hib Eris va escriure:
>> Hi Albert,
>>
>> On Thu, Sep 9, 2010 at 7:02 PM, Albert Astals Cid <aacid at kde.org> wrote:
>> > A Dijous, 9 de setembre de 2010, Hib Eris va escriure:
>> >> Hi Albert et al,
>> >>
>> >> On Sun, Sep 5, 2010 at 5:02 PM, Albert Astals Cid <aacid at kde.org> wrote:
>> >> > A Dimecres, 11 d'agost de 2010, Hib Eris va escriure:
>> >> >> Hi again,
>> >> >>
>> >> >> On Wed, Aug 4, 2010 at 11:47 PM, Hib Eris <hib at hiberis.nl> wrote:
>> >> >> > Hi all,
>> >> >> >
>> >> >> > On Sat, Jun 12, 2010 at 12:37 PM, Hib Eris <hib at hiberis.nl> wrote:
>> >> >> >> Hi all,
>> >> >> >>
>> >> >> >> Now that 0.14.0 is out and feature freeze is over, I have updated
>> >> >> >> my linearization patches
>> >> >> >> (see
>> >> >> >> http://lists.freedesktop.org/archives/poppler/2010-April/005760.ht
>> >> >> >> ml) to current master.
>> >> >> >>
>> >> >> >> Any comments on it are very welcome.
>> >> >> >
>> >> >> > I have updated my patches again as they no longer applied to
>> >> >> > current git master. I have also fixed some errors I found with
>> >> >> > test documents. I would appreciate it if anyone could test these
>> >> >> > patches against other PDF documents.
>> >> >>
>> >> >> I found out that I was leaking some memory with these patches, so
>> >> >> here is another update. Sorry for all the noise.
>> >> >
>> >> > Hi Hib, the patches do not apply cleanly anymore (my fault for taking
>> >> > so much to review), could you please rebase against the current
>> >> > master?
>> >> >
>> >> > Thanks and sorry,
>> >> >  Albert
>> >>
>> >> I have updated the patches. I am very curious to know if they pass
>> >> your regression tests.
>> >
>> > Found a regression already, will mail you the file in private as it's 3.1
>> > MB.
>>
>> Thank you for running the regression tests. I have updated my patches
>> again to pass the document you send me. Please test them again if you
>> can find some time.
>
> Still fails, will send you a pdf that gives me a problem.

Here are updated patches that work well with the document you send me.
Can you run them through the regression tests? By the way, is it
possible for me to run the regression tests myself?

Thanks,

Hib Eris
-------------- next part --------------
From 6edfdd1b437a97d31987ff845e3659ddae8bd059 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 6 Apr 2010 19:24:42 +0200
Subject: [PATCH 01/12] Cleanup XRef constructors

---
 poppler/XRef.cc |   14 ++++++--------
 poppler/XRef.h  |    1 +
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 0cd4be0..ceb8efe 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -258,7 +258,7 @@ Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
 // XRef
 //------------------------------------------------------------------------
 
-XRef::XRef() {
+void XRef::init() {
   ok = gTrue;
   errCode = errNone;
   entries = NULL;
@@ -268,17 +268,15 @@ XRef::XRef() {
   objStrs = new PopplerCache(5);
 }
 
+XRef::XRef() {
+  init();
+}
+
 XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   Guint pos;
   Object obj;
 
-  ok = gTrue;
-  errCode = errNone;
-  size = 0;
-  entries = NULL;
-  streamEnds = NULL;
-  streamEndsLen = 0;
-  objStrs = new PopplerCache(5);
+  init();
 
   encrypted = gFalse;
   permFlags = defPermFlags;
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 1f4ec6a..f18fa0e 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -157,6 +157,7 @@ private:
   Guchar fileKey[16];		// file decryption key
   GBool ownerPasswordOk;	// true if owner password is correct
 
+  void init();
   Guint getStartXref();
   GBool readXRef(Guint *pos, GooVector<Guint> *followedXRefStm);
   GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
-- 
1.6.4.2


From 40ec8afbf3ed0a435bb7d54656a67a2763e1ba71 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 6 Apr 2010 19:16:45 +0200
Subject: [PATCH 02/12] Create no more XRef entries than specified

---
 poppler/XRef.cc |  136 ++++++++++++++++++++++++++++---------------------------
 poppler/XRef.h  |    5 ++-
 2 files changed, 73 insertions(+), 68 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index ceb8efe..59b0640 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -262,6 +262,7 @@ void XRef::init() {
   ok = gTrue;
   errCode = errNone;
   entries = NULL;
+  capacity = 0;
   size = 0;
   streamEnds = NULL;
   streamEndsLen = 0;
@@ -351,6 +352,56 @@ XRef::~XRef() {
   }
 }
 
+int XRef::reserve(int newSize)
+{
+  if (newSize > capacity) {
+
+    int realNewSize;
+    for (realNewSize = capacity ? 2 * capacity : 1024;
+          newSize > realNewSize && realNewSize > 0;
+          realNewSize <<= 1) ;
+    if ((realNewSize < 0) ||
+        (realNewSize >= INT_MAX / (int)sizeof(XRefEntry))) {
+      return 0;
+    }
+
+    void *p = greallocn_checkoverflow(entries, realNewSize, sizeof(XRefEntry));
+    if (p == NULL) {
+      return 0;
+    }
+
+    entries = (XRefEntry *) p;
+    capacity = realNewSize;
+
+  }
+
+  return capacity;
+}
+
+int XRef::resize(int newSize)
+{
+  if (newSize > size) {
+
+    if (reserve(newSize) < newSize) return size;
+
+    for (int i = size; i < newSize; ++i) {
+      entries[i].offset = 0xffffffff;
+      entries[i].type = xrefEntryFree;
+      entries[i].obj.initNull ();
+      entries[i].updated = false;
+      entries[i].gen = 0;
+    }
+  } else {
+    for (int i = newSize; i < size; i++) {
+      entries[i].obj.free ();
+    }
+  }
+
+  size = newSize;
+
+  return size;
+}
+
 // Read the 'startxref' position.
 Guint XRef::getStartXref() {
   char buf[xrefSearchSize+1];
@@ -438,7 +489,7 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
   GBool more;
   Object obj, obj2;
   Guint pos2;
-  int first, n, newSize, i;
+  int first, n, i;
 
   while (1) {
     parser->getObj(&obj);
@@ -457,29 +508,13 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
     n = obj.getInt();
     obj.free();
     if (first < 0 || n < 0 || first + n < 0) {
-      goto err1;
+      goto err0;
     }
     if (first + n > size) {
-      for (newSize = size ? 2 * size : 1024;
-	   first + n > newSize && newSize > 0;
-	   newSize <<= 1) ;
-      if (newSize < 0) {
-	goto err1;
-      }
-      if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
+      if (resize(first + n) != first + n) {
         error(-1, "Invalid 'obj' parameters'");
-        goto err1;
+        goto err0;
       }
- 
-      entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
-      for (i = size; i < newSize; ++i) {
-	entries[i].offset = 0xffffffff;
-	entries[i].type = xrefEntryFree;
-	entries[i].obj.initNull ();
-	entries[i].updated = false;
-	entries[i].gen = 0;
-      }
-      size = newSize;
     }
     for (i = first; i < first + n; ++i) {
       if (!parser->getObj(&obj)->isInt()) {
@@ -568,6 +603,7 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
 
  err1:
   obj.free();
+ err0:
   ok = gFalse;
   return gFalse;
 }
@@ -590,19 +626,10 @@ GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
     goto err1;
   }
   if (newSize > size) {
-    if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
-      error(-1, "Invalid 'size' parameter.");
-      return gFalse;
-    }
-    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
-    for (i = size; i < newSize; ++i) {
-      entries[i].offset = 0xffffffff;
-      entries[i].type = xrefEntryFree;
-      entries[i].obj.initNull ();
-      entries[i].updated = false;
-      entries[i].gen = 0;
+    if (resize(newSize) != newSize) {
+      error(-1, "Invalid 'size' parameter");
+      goto err0;
     }
-    size = newSize;
   }
 
   if (!dict->lookupNF("W", &obj)->isArray() ||
@@ -675,31 +702,16 @@ GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
 
 GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
   Guint offset;
-  int type, gen, c, newSize, i, j;
+  int type, gen, c, i, j;
 
   if (first + n < 0) {
     return gFalse;
   }
   if (first + n > size) {
-    for (newSize = size ? 2 * size : 1024;
-	 first + n > newSize && newSize > 0;
-	 newSize <<= 1) ;
-    if (newSize < 0) {
-      return gFalse;
-    }
-    if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
-      error(-1, "Invalid 'size' inside xref table.");
+    if (resize(first + n) != size) {
+      error(-1, "Invalid 'size' inside xref table");
       return gFalse;
     }
-    entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
-    for (i = size; i < newSize; ++i) {
-      entries[i].offset = 0xffffffff;
-      entries[i].type = xrefEntryFree;
-      entries[i].obj.initNull ();
-      entries[i].updated = false;
-      entries[i].gen = 0;
-    }
-    size = newSize;
   }
   for (i = first; i < first + n; ++i) {
     if (w[0] == 0) {
@@ -760,13 +772,13 @@ GBool XRef::constructXRef(GBool *wasReconstructed) {
   int newSize;
   int streamEndsSize;
   char *p;
-  int i;
   GBool gotRoot;
   char* token = NULL;
   bool oneCycle = true;
   int offset = 0;
 
   gfree(entries);
+  capacity = 0;
   size = 0;
   entries = NULL;
 
@@ -853,23 +865,10 @@ GBool XRef::constructXRef(GBool *wasReconstructed) {
 		      error(-1, "Bad object number");
 		      return gFalse;
 		    }
-		    if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
-		      error(-1, "Invalid 'obj' parameters.");
+		    if (resize(newSize) != newSize) {
+		      error(-1, "Invalid 'obj' parameters");
 		      return gFalse;
 		    }
-		    entries = (XRefEntry *)
-		        greallocn_checkoverflow(entries, newSize, sizeof(XRefEntry));
-		    if (entries == NULL) {
-		      size = 0;
-		      return gFalse;
-		    }
-		    for (i = size; i < newSize; ++i) {
-		      entries[i].offset = 0xffffffff;
-		      entries[i].type = xrefEntryFree;
-		      entries[i].obj.initNull ();
-		      entries[i].updated = false;
-		    }
-		    size = newSize;
 		  }
 		  if (entries[num].type == xrefEntryFree ||
 		      gen >= entries[num].gen) {
@@ -1158,7 +1157,10 @@ Guint XRef::strToUnsigned(char *s) {
 
 void XRef::add(int num, int gen, Guint offs, GBool used) {
   if (num >= size) {
-    entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry));
+    if (num >= capacity) {
+      entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry));
+      capacity = num + 1;
+    }
     for (int i = size; i < num + 1; ++i) {
       entries[i].offset = 0xffffffff;
       entries[i].type = xrefEntryFree;
diff --git a/poppler/XRef.h b/poppler/XRef.h
index f18fa0e..8808485 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -138,7 +138,8 @@ private:
   Guint start;			// offset in file (to allow for garbage
 				//   at beginning of file)
   XRefEntry *entries;		// xref entries
-  int size;			// size of <entries> array
+  int capacity;			// size of <entries> array
+  int size;			// number of entries
   int rootNum, rootGen;		// catalog dict
   GBool ok;			// true if xref table is valid
   int errCode;			// error code (if <ok> is false)
@@ -158,6 +159,8 @@ private:
   GBool ownerPasswordOk;	// true if owner password is correct
 
   void init();
+  int reserve(int newSize);
+  int resize(int newSize);
   Guint getStartXref();
   GBool readXRef(Guint *pos, GooVector<Guint> *followedXRefStm);
   GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
-- 
1.6.4.2


From 3dc1ff16257ece6acd432137cdbd1ba9a1788cf0 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 28 Apr 2010 12:45:42 +0200
Subject: [PATCH 03/12] Use XRef::add() in XRef::addIndirectObject()

---
 poppler/XRef.cc |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 59b0640..919e0fa 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -1202,10 +1202,8 @@ Ref XRef::addIndirectObject (Object* o) {
   XRefEntry *e;
   if (entryIndexToUse == -1) {
     entryIndexToUse = size;
-    size++;
-    entries = (XRefEntry *)greallocn(entries, size, sizeof(XRefEntry));
+    add(entryIndexToUse, 0, 0, gFalse);
     e = &entries[entryIndexToUse];
-    e->gen = 0;
   } else {
     //reuse a free entry
     e = &entries[entryIndexToUse];
-- 
1.6.4.2


From 733f610a0a28abde59b67b1b3cafa3eee46705b2 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 14 Apr 2010 12:20:49 +0200
Subject: [PATCH 04/12] Use XRef::getEntry() to access entries

---
 poppler/XRef.cc |   49 +++++++++++++++++++++++++------------------------
 poppler/XRef.h  |    2 +-
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 919e0fa..05f6190 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -990,7 +990,7 @@ Object *XRef::fetch(int num, int gen, Object *obj) {
     goto err;
   }
 
-  e = &entries[num];
+  e = getEntry(num);
   if(!e->obj.isNull ()) { //check for updated object
     obj = e->obj.copy(obj);
     return obj;
@@ -1122,20 +1122,20 @@ GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
   return gTrue;
 }
 
-int XRef::getNumEntry(Guint offset) const
+int XRef::getNumEntry(Guint offset)
 {
   if (size > 0)
   {
     int res = 0;
-    Guint resOffset = entries[0].offset;
-    XRefEntry e;
+    Guint resOffset = getEntry(0)->offset;
+    XRefEntry *e;
     for (int i = 1; i < size; ++i)
     {
-      e = entries[i];
-      if (e.offset < offset && e.offset >= resOffset)
+      e = getEntry(i);
+      if (e->offset < offset && e->offset >= resOffset)
       {
         res = i;
-        resOffset = e.offset;
+        resOffset = e->offset;
       }
     }
     return res;
@@ -1170,7 +1170,7 @@ void XRef::add(int num, int gen, Guint offs, GBool used) {
     }
     size = num + 1;
   }
-  XRefEntry *e = &entries[num];
+  XRefEntry *e = getEntry(num);
   e->gen = gen;
   e->obj.initNull ();
   e->updated = false;
@@ -1188,25 +1188,26 @@ void XRef::setModifiedObject (Object* o, Ref r) {
     error(-1,"XRef::setModifiedObject on unknown ref: %i, %i\n", r.num, r.gen);
     return;
   }
-  entries[r.num].obj.free();
-  o->copy(&entries[r.num].obj);
-  entries[r.num].updated = true;
+  XRefEntry *e = getEntry(r.num);
+  e->obj.free();
+  o->copy(&(e->obj));
+  e->updated = true;
 }
 
 Ref XRef::addIndirectObject (Object* o) {
   int entryIndexToUse = -1;
   for (int i = 1; entryIndexToUse == -1 && i < size; ++i) {
-    if (entries[i].type == xrefEntryFree) entryIndexToUse = i;
+    if (getEntry(i)->type == xrefEntryFree) entryIndexToUse = i;
   }
 
   XRefEntry *e;
   if (entryIndexToUse == -1) {
     entryIndexToUse = size;
     add(entryIndexToUse, 0, 0, gFalse);
-    e = &entries[entryIndexToUse];
+    e = getEntry(entryIndexToUse);
   } else {
     //reuse a free entry
-    e = &entries[entryIndexToUse];
+    e = getEntry(entryIndexToUse);
     //we don't touch gen number, because it should have been 
     //incremented when the object was deleted
   }
@@ -1222,13 +1223,13 @@ Ref XRef::addIndirectObject (Object* o) {
 
 void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
   //create free entries linked-list
-  if (entries[0].gen != 65535) {
+  if (getEntry(0)->gen != 65535) {
     error(-1, "XRef::writeToFile, entry 0 of the XRef is invalid (gen != 65535)\n");
   }
   int lastFreeEntry = 0;
   for (int i=0; i<size; i++) {
-    if (entries[i].type == xrefEntryFree) {
-      entries[lastFreeEntry].offset = i;
+    if (getEntry(i)->type == xrefEntryFree) {
+      getEntry(lastFreeEntry)->offset = i;
       lastFreeEntry = i;
     }
   }
@@ -1238,10 +1239,10 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
     outStr->printf("xref\r\n");
     outStr->printf("%i %i\r\n", 0, size);
     for (int i=0; i<size; i++) {
-      XRefEntry &e = entries[i];
+      XRefEntry *e = getEntry(i);
 
-      if(e.gen > 65535) e.gen = 65535; //cap generation number to 65535 (required by PDFReference)
-      outStr->printf("%010i %05i %c\r\n", e.offset, e.gen, (e.type==xrefEntryFree)?'f':'n');
+      if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference)
+      outStr->printf("%010i %05i %c\r\n", e->offset, e->gen, (e->type==xrefEntryFree)?'f':'n');
     }
   } else {
     //write the new xref
@@ -1250,16 +1251,16 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
     while (i < size) {
       int j;
       for(j=i; j<size; j++) { //look for consecutive entries
-        if ((entries[j].type == xrefEntryFree) && (entries[j].gen == 0))
+        if ((getEntry(j)->type == xrefEntryFree) && (getEntry(j)->gen == 0))
           break;
       }
       if (j-i != 0)
       {
         outStr->printf("%i %i\r\n", i, j-i);
         for (int k=i; k<j; k++) {
-          XRefEntry &e = entries[k];
-          if(e.gen > 65535) e.gen = 65535; //cap generation number to 65535 (required by PDFReference)
-          outStr->printf("%010i %05i %c\r\n", e.offset, e.gen, (e.type==xrefEntryFree)?'f':'n');
+          XRefEntry *e = getEntry(k);
+          if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference)
+          outStr->printf("%010i %05i %c\r\n", e->offset, e->gen, (e->type==xrefEntryFree)?'f':'n');
         }
         i = j;
       }
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 8808485..260f039 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -119,7 +119,7 @@ public:
   GBool getStreamEnd(Guint streamStart, Guint *streamEnd);
 
   // Retuns the entry that belongs to the offset
-  int getNumEntry(Guint offset) const;
+  int getNumEntry(Guint offset);
 
   // Direct access.
   int getSize() { return size; }
-- 
1.6.4.2


From dea2a4fe01aa78c3defb92a4453f304cc3cb446c Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 15 Apr 2010 17:34:13 +0200
Subject: [PATCH 05/12] Read XRef table sections on demand

---
 poppler/XRef.cc |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 poppler/XRef.h  |    6 +++-
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 05f6190..ec7afa3 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -287,6 +287,7 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   str = strA;
   start = str->getStart();
   pos = getStartXref();
+  prevXRefOffset = pos;
 
   if (reconstruct && !(ok = constructXRef(wasReconstructed)))
   {
@@ -297,7 +298,7 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   {
     // if there was a problem with the 'startxref' position, try to
     // reconstruct the xref table
-    if (pos == 0) {
+    if (prevXRefOffset == 0) {
       if (!(ok = constructXRef(wasReconstructed))) {
         errCode = errDamaged;
         return;
@@ -306,7 +307,7 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
     // read the xref table
     } else {
       GooVector<Guint> followedXRefStm;
-      while (readXRef(&pos, &followedXRefStm)) ;
+      readXRef(&prevXRefOffset, &followedXRefStm);
 
       // if there was a problem with the xref table,
       // try to reconstruct it
@@ -318,6 +319,18 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
       }
     }
 
+    // set size according to trailer dict
+    trailerDict.dictLookupNF("Size", &obj);
+    if (obj.isInt() && (resize(obj.getInt()) == obj.getInt())) {
+      obj.free();
+    } else {
+      obj.free();
+      if (!(ok = constructXRef(wasReconstructed))) {
+        errCode = errDamaged;
+        return;
+      }
+    }
+
     // get the root dictionary (catalog) object
     trailerDict.dictLookupNF("Root", &obj);
     if (obj.isRef()) {
@@ -386,7 +399,7 @@ int XRef::resize(int newSize)
 
     for (int i = size; i < newSize; ++i) {
       entries[i].offset = 0xffffffff;
-      entries[i].type = xrefEntryFree;
+      entries[i].type = xrefEntryNone;
       entries[i].obj.initNull ();
       entries[i].updated = false;
       entries[i].gen = 0;
@@ -1269,3 +1282,42 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
   }
 }
 
+XRefEntry *XRef::getEntry(int i)
+{
+  if (entries[i].type == xrefEntryNone) {
+
+      GooVector<Guint> followedPrev;
+      while (prevXRefOffset && entries[i].type == xrefEntryNone) {
+        bool ok = true;
+        for (size_t j = 0; j < followedPrev.size(); j++) {
+          if (followedPrev.at(j) == prevXRefOffset) {
+            ok = false;
+            break;
+          }
+        }
+        if (!ok) {
+          error(-1, "Circular XRef");
+          if (!(ok = constructXRef(NULL))) {
+            errCode = errDamaged;
+          }
+          break;
+        }
+
+        followedPrev.push_back (prevXRefOffset);
+
+        GooVector<Guint> followedXRefStm;
+        if (!readXRef(&prevXRefOffset, &followedXRefStm)) {
+            prevXRefOffset = 0;
+        }
+      }
+
+      if (entries[i].type == xrefEntryNone) {
+         error(-1, "Invalid XRef entry");
+         entries[i].type = xrefEntryFree;
+      }
+  }
+
+  return &entries[i];
+}
+
+
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 260f039..d37e31d 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -47,7 +47,8 @@ class PopplerCache;
 enum XRefEntryType {
   xrefEntryFree,
   xrefEntryUncompressed,
-  xrefEntryCompressed
+  xrefEntryCompressed,
+  xrefEntryNone
 };
 
 struct XRefEntry {
@@ -123,7 +124,7 @@ public:
 
   // Direct access.
   int getSize() { return size; }
-  XRefEntry *getEntry(int i) { return &entries[i]; }
+  XRefEntry *getEntry(int i);
   Object *getTrailerDict() { return &trailerDict; }
 
   // Write access
@@ -157,6 +158,7 @@ private:
   int permFlags;		// permission bits
   Guchar fileKey[16];		// file decryption key
   GBool ownerPasswordOk;	// true if owner password is correct
+  Guint prevXRefOffset;		// position of prev XRef section (= next to read)
 
   void init();
   int reserve(int newSize);
-- 
1.6.4.2


From c1595512fe799ad361282e9ba7575b1ddcbd2680 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 18:26:17 +0100
Subject: [PATCH 06/12] Add Linearization dictionary support

---
 CMakeLists.txt           |    2 +
 poppler/Linearization.cc |  225 ++++++++++++++++++++++++++++++++++++++++++++++
 poppler/Linearization.h  |   45 +++++++++
 poppler/Makefile.am      |    2 +
 poppler/PDFDoc.cc        |   13 +++
 poppler/PDFDoc.h         |    5 +
 6 files changed, 292 insertions(+), 0 deletions(-)
 create mode 100644 poppler/Linearization.cc
 create mode 100644 poppler/Linearization.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 406ba88..7c25c45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -251,6 +251,7 @@ set(poppler_SRCS
   poppler/JBIG2Stream.cc
   poppler/Lexer.cc
   poppler/Link.cc
+  poppler/Linearization.cc
   poppler/LocalPDFDocBuilder.cc
   poppler/NameToCharCode.cc
   poppler/Object.cc
@@ -397,6 +398,7 @@ if(ENABLE_XPDF_HEADERS)
     poppler/JBIG2Stream.h
     poppler/Lexer.h
     poppler/Link.h
+    poppler/Linearization.h
     poppler/LocalPDFDocBuilder.h
     poppler/Movie.h
     poppler/NameToCharCode.h
diff --git a/poppler/Linearization.cc b/poppler/Linearization.cc
new file mode 100644
index 0000000..23c77f2
--- /dev/null
+++ b/poppler/Linearization.cc
@@ -0,0 +1,225 @@
+//========================================================================
+//
+// Linearization.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#include "Linearization.h"
+#include "Parser.h"
+#include "Lexer.h"
+
+//------------------------------------------------------------------------
+// Linearization
+//------------------------------------------------------------------------
+
+Linearization::Linearization (BaseStream *str)
+{
+  Parser *parser;
+  Object obj1, obj2, obj3, obj4, obj5;
+
+  linDict.initNull();
+
+  str->reset();
+  obj1.initNull();
+  parser = new Parser(NULL,
+      new Lexer(NULL, str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
+      gFalse);
+  parser->getObj(&obj1);
+  parser->getObj(&obj2);
+  parser->getObj(&obj3);
+  parser->getObj(&linDict);
+  parser->getObj(&obj4);
+  if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) {
+    linDict.dictLookup("Linearized", &obj5);
+    if (!(obj5.isNum() && obj5.getNum() > 0)) {
+       linDict.free();
+       linDict.initNull();
+    }
+    obj5.free();
+  }
+  obj4.free();
+  obj4.free();
+  obj3.free();
+  obj2.free();
+  obj1.free();
+  delete parser;
+}
+
+Linearization:: ~Linearization()
+{
+  linDict.free();
+}
+
+Guint Linearization::getLength()
+{
+  if (!linDict.isDict()) return 0;
+
+  int length;
+  if (linDict.getDict()->lookupInt("L", NULL, &length) &&
+      length > 0) {
+    return length;
+  } else {
+    error(-1, "Length in linearization table is invalid");
+    return 0;
+  }
+}
+
+Guint Linearization::getHintsOffset()
+{
+  int hintsOffset;
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=2 &&
+      obj1.arrayGet(0, &obj2)->isInt() &&
+      obj2.getInt() > 0) {
+    hintsOffset = obj2.getInt();
+  } else {
+    error(-1, "Hints table offset in linearization table is invalid");
+    hintsOffset = 0;
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsOffset;
+}
+
+Guint Linearization::getHintsLength()
+{
+  int hintsLength;
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=2 &&
+      obj1.arrayGet(1, &obj2)->isInt() &&
+      obj2.getInt() > 0) {
+    hintsLength = obj2.getInt();
+  } else {
+    error(-1, "Hints table length in linearization table is invalid");
+    hintsLength = 0;
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsLength;
+}
+
+Guint Linearization::getHintsOffset2()
+{
+  int hintsOffset2 = 0; // default to 0
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=4) {
+    if (obj1.arrayGet(2, &obj2)->isInt() &&
+        obj2.getInt() > 0) {
+      hintsOffset2 = obj2.getInt();
+    } else {
+      error(-1, "Second hints table offset in linearization table is invalid");
+      hintsOffset2 = 0;
+    }
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsOffset2;
+}
+
+Guint Linearization::getHintsLength2()
+{
+  int hintsLength2 = 0; // default to 0
+
+  Object obj1, obj2;
+  if (linDict.isDict() &&
+      linDict.dictLookup("H", &obj1)->isArray() &&
+      obj1.arrayGetLength()>=4) {
+    if (obj1.arrayGet(3, &obj2)->isInt() &&
+        obj2.getInt() > 0) {
+      hintsLength2 = obj2.getInt();
+    } else {
+      error(-1, "Second hints table length in linearization table is invalid");
+      hintsLength2 = 0;
+    }
+  }
+  obj2.free();
+  obj1.free();
+
+  return hintsLength2;
+}
+
+int Linearization::getObjectNumberFirst()
+{
+  int objectNumberFirst = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("O", NULL, &objectNumberFirst) &&
+      objectNumberFirst > 0) {
+    return objectNumberFirst;
+  } else {
+    error(-1, "Object number of first page in linearization table is invalid");
+    return 0;
+  }
+}
+
+Guint Linearization::getEndFirst()
+{
+  int pageEndFirst = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("E", NULL, &pageEndFirst) &&
+      pageEndFirst > 0) {
+    return pageEndFirst;
+  } else {
+    error(-1, "First page end offset in linearization table is invalid");
+    return 0;
+  }
+}
+
+int Linearization::getNumPages()
+{
+  int numPages = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("N", NULL, &numPages) &&
+      numPages > 0) {
+    return numPages;
+  } else {
+    error(-1, "Page count in linearization table is invalid");
+    return 0;
+  }
+}
+
+Guint Linearization::getMainXRefEntriesOffset()
+{
+  int mainXRefEntriesOffset = 0;
+  if (linDict.isDict() &&
+      linDict.getDict()->lookupInt("T", NULL, &mainXRefEntriesOffset) &&
+      mainXRefEntriesOffset > 0) {
+    return mainXRefEntriesOffset;
+  } else {
+    error(-1, "Main Xref offset in linearization table is invalid");
+    return 0;
+  }
+}
+
+int Linearization::getPageFirst()
+{
+  int pageFirst = 0; // Optional, defaults to 0.
+
+  if (linDict.isDict()) {
+    linDict.getDict()->lookupInt("P", NULL, &pageFirst);
+  }
+
+  if (pageFirst < 0) {
+    error(-1, "First page in linearization table is invalid");
+    return 0;
+  }
+
+  return pageFirst;
+}
+
+
diff --git a/poppler/Linearization.h b/poppler/Linearization.h
new file mode 100644
index 0000000..6728a75
--- /dev/null
+++ b/poppler/Linearization.h
@@ -0,0 +1,45 @@
+//========================================================================
+//
+// Linearization.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#ifndef LINEARIZATION_H
+#define LINEARIZATION_H
+
+#include "goo/gtypes.h"
+#include "Object.h"
+class BaseStream;
+
+//------------------------------------------------------------------------
+// Linearization
+//------------------------------------------------------------------------
+
+class Linearization {
+public:
+
+  Linearization(BaseStream *str);
+  ~Linearization();
+
+  Guint getLength();
+  Guint getHintsOffset();
+  Guint getHintsLength();
+  Guint getHintsOffset2();
+  Guint getHintsLength2();
+  int getObjectNumberFirst();
+  Guint getEndFirst();
+  int getNumPages();
+  Guint getMainXRefEntriesOffset();
+  int getPageFirst();
+
+private:
+
+  Object linDict;
+
+};
+
+#endif
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index ccc388f..bb6daa6 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -209,6 +209,7 @@ poppler_include_HEADERS =	\
 	JArithmeticDecoder.h	\
 	JBIG2Stream.h		\
 	Lexer.h			\
+	Linearization.h 	\
 	Link.h			\
 	LocalPDFDocBuilder.h	\
 	Movie.h                 \
@@ -287,6 +288,7 @@ libpoppler_la_SOURCES =		\
 	JArithmeticDecoder.cc	\
 	JBIG2Stream.cc		\
 	Lexer.cc 		\
+	Linearization.cc 	\
 	Link.cc 		\
 	LocalPDFDocBuilder.cc	\
 	Movie.cc                \
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 8155250..86ce391 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -55,6 +55,7 @@
 #include "Catalog.h"
 #include "Stream.h"
 #include "XRef.h"
+#include "Linearization.h"
 #include "Link.h"
 #include "OutputDev.h"
 #include "Error.h"
@@ -86,6 +87,7 @@ void PDFDoc::init()
   file = NULL;
   str = NULL;
   xref = NULL;
+  linearization = NULL;
   catalog = NULL;
 #ifndef DISABLE_OUTLINE
   outline = NULL;
@@ -259,6 +261,9 @@ PDFDoc::~PDFDoc() {
   if (xref) {
     delete xref;
   }
+  if (linearization) {
+    delete linearization;
+  }
   if (str) {
     delete str;
   }
@@ -433,6 +438,14 @@ void PDFDoc::processLinks(OutputDev *out, int page) {
     catalog->getPage(page)->processLinks(out, catalog);
 }
 
+Linearization *PDFDoc::getLinearization()
+{
+  if (!linearization) {
+    linearization = new Linearization(str);
+  }
+  return linearization;
+}
+
 GBool PDFDoc::isLinearized() {
   Parser *parser;
   Object obj1, obj2, obj3, obj4, obj5;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 8fa2dcf..5359ddb 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -49,6 +49,7 @@ class Links;
 class LinkAction;
 class LinkDest;
 class Outline;
+class Linearization;
 
 enum PDFWriteMode {
   writeStandard,
@@ -90,6 +91,9 @@ public:
   // Get file name.
   GooString *getFileName() { return fileName; }
 
+  // Get the linearization table.
+  Linearization *getLinearization();
+
   // Get the xref table.
   XRef *getXRef() { return xref; }
 
@@ -246,6 +250,7 @@ private:
   void *guiData;
   int pdfMajorVersion;
   int pdfMinorVersion;
+  Linearization *linearization;
   XRef *xref;
   Catalog *catalog;
 #ifndef DISABLE_OUTLINE
-- 
1.6.4.2


From 20e39a13ec666f94b29e5921bd1c533b8954f083 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 13 Apr 2010 18:51:40 +0200
Subject: [PATCH 07/12] Add getLength() to BaseStream

---
 poppler/Stream.cc |   11 ++++++-----
 poppler/Stream.h  |   11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index 0fb3884..fbf2b33 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -372,8 +372,9 @@ void FileOutStream::printf(const char *format, ...)
 // BaseStream
 //------------------------------------------------------------------------
 
-BaseStream::BaseStream(Object *dictA) {
+BaseStream::BaseStream(Object *dictA, Guint lengthA) {
   dict = *dictA;
+  length = lengthA;
 }
 
 BaseStream::~BaseStream() {
@@ -694,7 +695,7 @@ GBool StreamPredictor::getNextLine() {
 
 FileStream::FileStream(FILE *fA, Guint startA, GBool limitedA,
 		       Guint lengthA, Object *dictA):
-    BaseStream(dictA) {
+    BaseStream(dictA, lengthA) {
   f = fA;
   start = startA;
   limited = limitedA;
@@ -819,7 +820,7 @@ void FileStream::moveStart(int delta) {
 
 CachedFileStream::CachedFileStream(CachedFile *ccA, Guint startA,
         GBool limitedA, Guint lengthA, Object *dictA)
-  : BaseStream(dictA)
+  : BaseStream(dictA, lengthA)
 {
   cc = ccA;
   start = startA;
@@ -917,7 +918,7 @@ void CachedFileStream::moveStart(int delta)
 //------------------------------------------------------------------------
 
 MemStream::MemStream(char *bufA, Guint startA, Guint lengthA, Object *dictA):
-    BaseStream(dictA) {
+    BaseStream(dictA, lengthA) {
   buf = bufA;
   start = startA;
   length = lengthA;
@@ -981,7 +982,7 @@ void MemStream::moveStart(int delta) {
 
 EmbedStream::EmbedStream(Stream *strA, Object *dictA,
 			 GBool limitedA, Guint lengthA):
-    BaseStream(dictA) {
+    BaseStream(dictA, lengthA) {
   str = strA;
   limited = limitedA;
   length = lengthA;
diff --git a/poppler/Stream.h b/poppler/Stream.h
index 583278f..e99f03b 100644
--- a/poppler/Stream.h
+++ b/poppler/Stream.h
@@ -293,7 +293,7 @@ private:
 class BaseStream: public Stream {
 public:
 
-  BaseStream(Object *dictA);
+  BaseStream(Object *dictA, Guint lengthA);
   virtual ~BaseStream();
   virtual Stream *makeSubStream(Guint start, GBool limited,
 				Guint length, Object *dict) = 0;
@@ -303,11 +303,16 @@ public:
   virtual Stream *getUndecodedStream() { return this; }
   virtual Dict *getDict() { return dict.getDict(); }
   virtual GooString *getFileName() { return NULL; }
+  virtual Guint getLength() { return length; }
 
   // Get/set position of first byte of stream within the file.
   virtual Guint getStart() = 0;
   virtual void moveStart(int delta) = 0;
 
+protected:
+
+  Guint length;
+
 private:
 
   Object dict;
@@ -478,7 +483,6 @@ private:
   FILE *f;
   Guint start;
   GBool limited;
-  Guint length;
   char buf[fileStreamBufSize];
   char *bufPtr;
   char *bufEnd;
@@ -523,7 +527,6 @@ private:
   CachedFile *cc;
   Guint start;
   GBool limited;
-  Guint length;
   char buf[cachedStreamBufSize];
   char *bufPtr;
   char *bufEnd;
@@ -567,7 +570,6 @@ private:
 
   char *buf;
   Guint start;
-  Guint length;
   char *bufEnd;
   char *bufPtr;
   GBool needFree;
@@ -607,7 +609,6 @@ private:
 
   Stream *str;
   GBool limited;
-  Guint length;
 };
 
 //------------------------------------------------------------------------
-- 
1.6.4.2


From 1a9f4eea4a9f9813391d6f70e0f11fe618eb4fff Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 19:16:14 +0100
Subject: [PATCH 08/12] Pass size of file when creating FileStream

---
 poppler/PDFDoc.cc |   19 +++++++++++++++++--
 1 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 86ce391..a0e92bd 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -47,6 +47,7 @@
 #ifdef _WIN32
 #  include <windows.h>
 #endif
+#include <sys/stat.h>
 #include "goo/gstrtod.h"
 #include "goo/GooString.h"
 #include "poppler-config.h"
@@ -102,12 +103,18 @@ PDFDoc::PDFDoc()
 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 	       GooString *userPassword, void *guiDataA) {
   Object obj;
+  int size = 0;
 
   init();
 
   fileName = fileNameA;
   guiData = guiDataA;
 
+  struct stat buf;
+  if (stat(fileName->getCString(), &buf) == 0) {
+     size = buf.st_size;
+  }
+
   // try to open file
 #ifdef VMS
   file = fopen(fileName->getCString(), "rb", "ctx=stm");
@@ -127,7 +134,7 @@ PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 
   // create stream
   obj.initNull();
-  str = new FileStream(file, 0, gFalse, 0, &obj);
+  str = new FileStream(file, 0, gFalse, size, &obj);
 
   ok = setup(ownerPassword, userPassword);
 }
@@ -158,11 +165,19 @@ PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 
   // try to open file
   // NB: _wfopen is only available in NT
+  struct stat buf;
+  int size;
   version.dwOSVersionInfoSize = sizeof(version);
   GetVersionEx(&version);
   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+    if (_wstat(fileName2, &buf) == 0) {
+      size = buf.st_size;
+    }
     file = _wfopen(fileName2, L"rb");
   } else {
+    if (_wstat(fileName->getCString(), &buf) == 0) {
+      size = buf.st_size;
+    }
     file = fopen(fileName->getCString(), "rb");
   }
   if (!file) {
@@ -173,7 +188,7 @@ PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 
   // create stream
   obj.initNull();
-  str = new FileStream(file, 0, gFalse, 0, &obj);
+  str = new FileStream(file, 0, gFalse, size, &obj);
 
   ok = setup(ownerPassword, userPassword);
 }
-- 
1.6.4.2


From 942af54a643b61c6ace635648981f2638903518e Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 19:32:59 +0100
Subject: [PATCH 09/12] Improve linearization check

---
 poppler/PDFDoc.cc |   33 +++++----------------------------
 1 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index a0e92bd..548d443 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -462,34 +462,11 @@ Linearization *PDFDoc::getLinearization()
 }
 
 GBool PDFDoc::isLinearized() {
-  Parser *parser;
-  Object obj1, obj2, obj3, obj4, obj5;
-  GBool lin;
-
-  lin = gFalse;
-  obj1.initNull();
-  parser = new Parser(xref,
-	     new Lexer(xref,
-	       str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
-	     gTrue);
-  parser->getObj(&obj1);
-  parser->getObj(&obj2);
-  parser->getObj(&obj3);
-  parser->getObj(&obj4);
-  if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
-      obj4.isDict()) {
-    obj4.dictLookup("Linearized", &obj5);
-    if (obj5.isNum() && obj5.getNum() > 0) {
-      lin = gTrue;
-    }
-    obj5.free();
-  }
-  obj4.free();
-  obj3.free();
-  obj2.free();
-  obj1.free();
-  delete parser;
-  return lin;
+  if ((str->getLength()) &&
+      (getLinearization()->getLength() == str->getLength()))
+    return gTrue;
+  else
+    return gFalse;
 }
 
 static GBool
-- 
1.6.4.2


From c9b8b0e18adbfb890305cbff76c2cab0190826f9 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 7 Apr 2010 12:05:56 +0200
Subject: [PATCH 10/12] Move getStartXref from XRef to PDFDoc

---
 poppler/PDFDoc.cc |   63 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 poppler/PDFDoc.h  |    5 ++++
 poppler/XRef.cc   |   52 +------------------------------------------
 poppler/XRef.h    |    6 +----
 4 files changed, 67 insertions(+), 59 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 548d443..51ff07a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -37,6 +37,7 @@
 #pragma implementation
 #endif
 
+#include <ctype.h>
 #include <locale.h>
 #include <stdio.h>
 #include <errno.h>
@@ -76,6 +77,9 @@
 				//   file to look for '%PDF'
 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
 
+#define xrefSearchSize 1024	// read this many bytes at end of file
+				//   to look for 'startxref'
+
 //------------------------------------------------------------------------
 // PDFDoc
 //------------------------------------------------------------------------
@@ -93,6 +97,7 @@ void PDFDoc::init()
 #ifndef DISABLE_OUTLINE
   outline = NULL;
 #endif
+  startXRefPos = ~(Guint)0;
 }
 
 PDFDoc::PDFDoc()
@@ -228,7 +233,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
   GBool wasReconstructed = false;
 
   // read xref table
-  xref = new XRef(str, &wasReconstructed);
+  xref = new XRef(str, getStartXRef(), &wasReconstructed);
   if (!xref->isOk()) {
     error(-1, "Couldn't read xref table");
     errCode = xref->getErrorCode();
@@ -249,7 +254,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
       // try one more time to contruct the Catalog, maybe the problem is damaged XRef 
       delete catalog;
       delete xref;
-      xref = new XRef(str, NULL, true);
+      xref = new XRef(str, 0, NULL, true);
       catalog = new Catalog(xref);
     }
 
@@ -975,7 +980,7 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
   trailerDict->set("Root", &obj1);
 
   if (incrUpdate) { 
-    obj1.initInt(xref->getLastXRefPos());
+    obj1.initInt(getStartXRef());
     trailerDict->set("Prev", &obj1);
   }
   
@@ -1013,3 +1018,55 @@ PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
 
   return doc;
 }
+
+Guint PDFDoc::strToUnsigned(char *s) {
+  Guint x;
+  char *p;
+  int i;
+
+  x = 0;
+  for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
+    x = 10 * x + (*p - '0');
+  }
+  return x;
+}
+
+// Read the 'startxref' position.
+Guint PDFDoc::getStartXRef()
+{
+  if (startXRefPos == ~(Guint)0) {
+
+    {
+      char buf[xrefSearchSize+1];
+      char *p;
+      int c, n, i;
+
+      // read last xrefSearchSize bytes
+      str->setPos(xrefSearchSize, -1);
+      for (n = 0; n < xrefSearchSize; ++n) {
+        if ((c = str->getChar()) == EOF) {
+          break;
+        }
+        buf[n] = c;
+      }
+      buf[n] = '\0';
+
+      // find startxref
+      for (i = n - 9; i >= 0; --i) {
+        if (!strncmp(&buf[i], "startxref", 9)) {
+          break;
+        }
+      }
+      if (i < 0) {
+        startXRefPos = 0;
+      }
+      for (p = &buf[i+9]; isspace(*p); ++p) ;
+      startXRefPos =  strToUnsigned(p);
+    }
+
+  }
+
+  return startXRefPos;
+}
+
+
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 5359ddb..457d41b 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -243,6 +243,9 @@ private:
   GBool checkFooter();
   void checkHeader();
   GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
+  // Get the offset of the start xref table.
+  Guint getStartXRef();
+  Guint strToUnsigned(char *s);
 
   GooString *fileName;
   FILE *file;
@@ -262,6 +265,8 @@ private:
   //If there is an error opening the PDF file with fopen() in the constructor, 
   //then the POSIX errno will be here.
   int fopenErrno;
+
+  Guint startXRefPos;		// offset of last xref table
 };
 
 #endif
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index ec7afa3..df23539 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -48,11 +48,6 @@
 #include "PopplerCache.h"
 
 //------------------------------------------------------------------------
-
-#define xrefSearchSize 1024	// read this many bytes at end of file
-				//   to look for 'startxref'
-
-//------------------------------------------------------------------------
 // Permission bits
 // Note that the PDF spec uses 1 base (eg bit 3 is 1<<2)
 //------------------------------------------------------------------------
@@ -273,8 +268,7 @@ XRef::XRef() {
   init();
 }
 
-XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
-  Guint pos;
+XRef::XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed, GBool reconstruct) {
   Object obj;
 
   init();
@@ -286,7 +280,6 @@ XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   // read the trailer
   str = strA;
   start = str->getStart();
-  pos = getStartXref();
   prevXRefOffset = pos;
 
   if (reconstruct && !(ok = constructXRef(wasReconstructed)))
@@ -415,37 +408,6 @@ int XRef::resize(int newSize)
   return size;
 }
 
-// Read the 'startxref' position.
-Guint XRef::getStartXref() {
-  char buf[xrefSearchSize+1];
-  char *p;
-  int c, n, i;
-
-  // read last xrefSearchSize bytes
-  str->setPos(xrefSearchSize, -1);
-  for (n = 0; n < xrefSearchSize; ++n) {
-    if ((c = str->getChar()) == EOF) {
-      break;
-    }
-    buf[n] = c;
-  }
-  buf[n] = '\0';
-
-  // find startxref
-  for (i = n - 9; i >= 0; --i) {
-    if (!strncmp(&buf[i], "startxref", 9)) {
-      break;
-    }
-  }
-  if (i < 0) {
-    return 0;
-  }
-  for (p = &buf[i+9]; isspace(*p); ++p) ;
-  lastXRefPos = strToUnsigned(p);
-
-  return lastXRefPos;
-}
-
 // Read one xref table section.  Also reads the associated trailer
 // dictionary, and returns the prev pointer (if any).
 GBool XRef::readXRef(Guint *pos, GooVector<Guint> *followedXRefStm) {
@@ -1156,18 +1118,6 @@ int XRef::getNumEntry(Guint offset)
   else return -1;
 }
 
-Guint XRef::strToUnsigned(char *s) {
-  Guint x;
-  char *p;
-  int i;
-
-  x = 0;
-  for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
-    x = 10 * x + (*p - '0');
-  }
-  return x;
-}
-
 void XRef::add(int num, int gen, Guint offs, GBool used) {
   if (num >= size) {
     if (num >= capacity) {
diff --git a/poppler/XRef.h b/poppler/XRef.h
index d37e31d..75b065a 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -65,7 +65,7 @@ public:
   // Constructor, create an empty XRef, used for PDF writing
   XRef();
   // Constructor.  Read xref table from stream.
-  XRef(BaseStream *strA, GBool *wasReconstructed = NULL, GBool reconstruct = false);
+  XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed = NULL, GBool reconstruct = false);
 
   // Destructor.
   ~XRef();
@@ -108,9 +108,6 @@ public:
   // Return the number of objects in the xref table.
   int getNumObjects() { return size; }
 
-  // Return the offset of the last xref table.
-  Guint getLastXRefPos() { return lastXRefPos; }
-
   // Return the catalog object reference.
   int getRootNum() { return rootNum; }
   int getRootGen() { return rootGen; }
@@ -145,7 +142,6 @@ private:
   GBool ok;			// true if xref table is valid
   int errCode;			// error code (if <ok> is false)
   Object trailerDict;		// trailer dictionary
-  Guint lastXRefPos;		// offset of last xref table
   Guint *streamEnds;		// 'endstream' positions - only used in
 				//   damaged files
   int streamEndsLen;		// number of valid entries in streamEnds
-- 
1.6.4.2


From a5fd9f22684ce3983c5e61c03fd1bb0beb71bbfe Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 7 Apr 2010 12:35:05 +0200
Subject: [PATCH 11/12] Use XRef table at start of linearized document

---
 poppler/PDFDoc.cc |   27 ++++++++++++++++++++++++++-
 1 files changed, 26 insertions(+), 1 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 51ff07a..f3449e7 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -77,6 +77,10 @@
 				//   file to look for '%PDF'
 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
 
+#define linearizationSearchSize 1024	// read this many bytes at beginning of
+					// file to look for linearization
+					// dictionary
+
 #define xrefSearchSize 1024	// read this many bytes at end of file
 				//   to look for 'startxref'
 
@@ -1036,7 +1040,28 @@ Guint PDFDoc::getStartXRef()
 {
   if (startXRefPos == ~(Guint)0) {
 
-    {
+    if (isLinearized()) {
+      char buf[linearizationSearchSize+1];
+      int c, n, i;
+
+      str->setPos(0);
+      for (n = 0; n < linearizationSearchSize; ++n) {
+        if ((c = str->getChar()) == EOF) {
+          break;
+        }
+        buf[n] = c;
+      }
+      buf[n] = '\0';
+
+      // find end of first obj
+      startXRefPos = 0;
+      for (i = 0; i < n; i++) {
+        if (!strncmp("endobj", &buf[i], 6)) {
+           startXRefPos = i+6;
+           break;
+        }
+      }
+    } else {
       char buf[xrefSearchSize+1];
       char *p;
       int c, n, i;
-- 
1.6.4.2


From 56bd2c707b5db4b19db9f2bddeba7e85b6ec841c Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sun, 25 Apr 2010 17:34:49 +0200
Subject: [PATCH 12/12] Use linearization data to parse XRef entries

---
 poppler/PDFDoc.cc |   14 ++++++++++++--
 poppler/PDFDoc.h  |    3 +++
 poppler/XRef.cc   |   43 ++++++++++++++++++++++++++++++++++++++++++-
 poppler/XRef.h    |    6 +++++-
 4 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index f3449e7..48b62f7 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -237,7 +237,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
   GBool wasReconstructed = false;
 
   // read xref table
-  xref = new XRef(str, getStartXRef(), &wasReconstructed);
+  xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
   if (!xref->isOk()) {
     error(-1, "Couldn't read xref table");
     errCode = xref->getErrorCode();
@@ -258,7 +258,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
       // try one more time to contruct the Catalog, maybe the problem is damaged XRef 
       delete catalog;
       delete xref;
-      xref = new XRef(str, 0, NULL, true);
+      xref = new XRef(str, 0, 0, NULL, true);
       catalog = new Catalog(xref);
     }
 
@@ -1094,4 +1094,14 @@ Guint PDFDoc::getStartXRef()
   return startXRefPos;
 }
 
+Guint PDFDoc::getMainXRefEntriesOffset()
+{
+  Guint mainXRefEntriesOffset = 0;
+
+  if (isLinearized()) {
+    mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
+  }
+
+  return mainXRefEntriesOffset;
+}
 
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 457d41b..21f1864 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -245,6 +245,9 @@ private:
   GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
   // Get the offset of the start xref table.
   Guint getStartXRef();
+  // Get the offset of the entries in the main XRef table of a
+  // linearized document (0 for non linearized documents).
+  Guint getMainXRefEntriesOffset();
   Guint strToUnsigned(char *s);
 
   GooString *fileName;
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index df23539..d9591e7 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -262,16 +262,19 @@ void XRef::init() {
   streamEnds = NULL;
   streamEndsLen = 0;
   objStrs = new PopplerCache(5);
+  mainXRefEntriesOffset = 0;
+  xRefStream = gFalse;
 }
 
 XRef::XRef() {
   init();
 }
 
-XRef::XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed, GBool reconstruct) {
+XRef::XRef(BaseStream *strA, Guint pos, Guint mainXRefEntriesOffsetA, GBool *wasReconstructed, GBool reconstruct) {
   Object obj;
 
   init();
+  mainXRefEntriesOffset = mainXRefEntriesOffsetA;
 
   encrypted = gFalse;
   permFlags = defPermFlags;
@@ -442,6 +445,9 @@ GBool XRef::readXRef(Guint *pos, GooVector<Guint> *followedXRefStm) {
     if (!parser->getObj(&obj)->isStream()) {
       goto err1;
     }
+    if (trailerDict.isNone()) {
+      xRefStream = gTrue;
+    }
     more = readXRefStream(obj.getStream(), pos);
     obj.free();
 
@@ -1232,10 +1238,44 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
   }
 }
 
+GBool XRef::parseEntry(Guint offset, XRefEntry *entry)
+{
+  GBool r;
+
+  Object obj;
+  obj.initNull();
+  Parser parser = Parser(NULL, new Lexer(NULL,
+     str->makeSubStream(offset, gFalse, 20, &obj)), gTrue);
+
+  Object obj1, obj2, obj3;
+  if ((parser.getObj(&obj1)->isInt()) &&
+      (parser.getObj(&obj2)->isInt()) &&
+      (parser.getObj(&obj3)->isCmd("n") || obj3.isCmd("f"))) {
+    entry->offset = (Guint) obj1.getInt();
+    entry->gen = obj2.getInt();
+    entry->type = obj3.isCmd("n") ? xrefEntryUncompressed : xrefEntryFree;
+    entry->obj.initNull ();
+    entry->updated = false;
+    r = gTrue;
+  } else {
+    r = gFalse;
+  }
+  obj1.free();
+  obj2.free();
+  obj3.free();
+
+  return r;
+}
+
 XRefEntry *XRef::getEntry(int i)
 {
   if (entries[i].type == xrefEntryNone) {
 
+    if ((!xRefStream) && mainXRefEntriesOffset) {
+      if (!parseEntry(mainXRefEntriesOffset + 20*i, &entries[i])) {
+        error(-1, "Failed to parse XRef entry [%d].", i);
+      }
+    } else {
       GooVector<Guint> followedPrev;
       while (prevXRefOffset && entries[i].type == xrefEntryNone) {
         bool ok = true;
@@ -1265,6 +1305,7 @@ XRefEntry *XRef::getEntry(int i)
          error(-1, "Invalid XRef entry");
          entries[i].type = xrefEntryFree;
       }
+    }
   }
 
   return &entries[i];
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 75b065a..2537757 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -65,7 +65,7 @@ public:
   // Constructor, create an empty XRef, used for PDF writing
   XRef();
   // Constructor.  Read xref table from stream.
-  XRef(BaseStream *strA, Guint pos, GBool *wasReconstructed = NULL, GBool reconstruct = false);
+  XRef(BaseStream *strA, Guint pos, Guint mainXRefEntriesOffsetA = 0, GBool *wasReconstructed = NULL, GBool reconstruct = false);
 
   // Destructor.
   ~XRef();
@@ -155,6 +155,8 @@ private:
   Guchar fileKey[16];		// file decryption key
   GBool ownerPasswordOk;	// true if owner password is correct
   Guint prevXRefOffset;		// position of prev XRef section (= next to read)
+  Guint mainXRefEntriesOffset;	// offset of entries in main XRef table
+  GBool xRefStream;		// true if last XRef section is a stream
 
   void init();
   int reserve(int newSize);
@@ -166,6 +168,8 @@ private:
   GBool readXRefStream(Stream *xrefStr, Guint *pos);
   GBool constructXRef(GBool *wasReconstructed);
   Guint strToUnsigned(char *s);
+  GBool parseEntry(Guint offset, XRefEntry *entry);
+
 };
 
 #endif
-- 
1.6.4.2
-------------- next part --------------
From adf4f055f8ac61fe8a189d833a198d0ebb15a698 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:03:54 +0200
Subject: [PATCH 01/17] add PDFDoc::getPage()

---
 poppler/PDFDoc.cc |    6 ++++++
 poppler/PDFDoc.h  |    3 +++
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 48b62f7..e9b656e 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1105,3 +1105,9 @@ Guint PDFDoc::getMainXRefEntriesOffset()
   return mainXRefEntriesOffset;
 }
 
+Page *PDFDoc::getPage(int page)
+{
+  if ((page < 1) || page > getNumPages()) return NULL;
+
+  return catalog->getPage(page);
+}
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 21f1864..13b3c2f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -128,6 +128,9 @@ public:
   // Return the structure tree root object.
   Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
 
+  // Get page.
+  Page *getPage(int page);
+
   // Display a page.
   void displayPage(OutputDev *out, int page,
 		   double hDPI, double vDPI, int rotate,
-- 
1.6.4.2


From 1977652b406fd902d75891bdcb5d69bfd1525a00 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:36:08 +0200
Subject: [PATCH 02/17] Use PDFDoc::getPage() in PDFDoc

---
 poppler/PDFDoc.cc |   24 ++++++++++++++++--------
 poppler/PDFDoc.h  |   10 +++++-----
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index e9b656e..636447d 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -413,11 +413,13 @@ void PDFDoc::displayPage(OutputDev *out, int page,
   if (globalParams->getPrintCommands()) {
     printf("***** page %d *****\n", page);
   }
-  if (catalog->getPage(page))
-    catalog->getPage(page)->display(out, hDPI, vDPI,
+
+  if (getPage(page))
+    getPage(page)->display(out, hDPI, vDPI,
 				    rotate, useMediaBox, crop, printing, catalog,
 				    abortCheckCbk, abortCheckCbkData,
 				    annotDisplayDecideCbk, annotDisplayDecideCbkData);
+
 }
 
 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
@@ -444,8 +446,8 @@ void PDFDoc::displayPageSlice(OutputDev *out, int page,
 			      void *abortCheckCbkData,
                               GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
                               void *annotDisplayDecideCbkData) {
-  if (catalog->getPage(page))
-    catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
+  if (getPage(page))
+    getPage(page)->displaySlice(out, hDPI, vDPI,
 					 rotate, useMediaBox, crop,
 					 sliceX, sliceY, sliceW, sliceH,
 					 printing, catalog,
@@ -454,12 +456,18 @@ void PDFDoc::displayPageSlice(OutputDev *out, int page,
 }
 
 Links *PDFDoc::getLinks(int page) {
-  return catalog->getPage(page) ? catalog->getPage(page)->getLinks(catalog) : NULL;
+  Page *p = getPage(page);
+  if (!p) {
+    Object obj;
+    obj.initNull();
+    return new Links (&obj, NULL);
+  }
+  return p->getLinks(catalog);
 }
-  
+
 void PDFDoc::processLinks(OutputDev *out, int page) {
-  if (catalog->getPage(page))
-    catalog->getPage(page)->processLinks(out, catalog);
+  if (getPage(page))
+    getPage(page)->processLinks(out, catalog);
 }
 
 Linearization *PDFDoc::getLinearization()
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 13b3c2f..ed0828c 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -108,15 +108,15 @@ public:
 
   // Get page parameters.
   double getPageMediaWidth(int page)
-    { return catalog->getPage(page)->getMediaWidth(); }
+    { return getPage(page) ? getPage(page)->getMediaWidth() : 0.0 ; }
   double getPageMediaHeight(int page)
-    { return catalog->getPage(page)->getMediaHeight(); }
+    { return getPage(page) ? getPage(page)->getMediaHeight() : 0.0 ; }
   double getPageCropWidth(int page)
-    { return catalog->getPage(page)->getCropWidth(); }
+    { return getPage(page) ? getPage(page)->getCropWidth() : 0.0 ; }
   double getPageCropHeight(int page)
-    { return catalog->getPage(page)->getCropHeight(); }
+    { return getPage(page) ? getPage(page)->getCropHeight() : 0.0 ; }
   int getPageRotate(int page)
-    { return catalog->getPage(page)->getRotate(); }
+    { return getPage(page) ? getPage(page)->getRotate() : 0 ; }
 
   // Get number of pages.
   int getNumPages() { return catalog->getNumPages(); }
-- 
1.6.4.2


From fa5a810966984bf6a877ee0ada97ac79c82a3d4e Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 20:48:30 +0200
Subject: [PATCH 03/17] Use PDFDoc::getPage() in FontInfo

---
 poppler/FontInfo.cc |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/poppler/FontInfo.cc b/poppler/FontInfo.cc
index 0037e07..c348d14 100644
--- a/poppler/FontInfo.cc
+++ b/poppler/FontInfo.cc
@@ -70,7 +70,9 @@ GooList *FontInfoScanner::scan(int nPages) {
   }
 
   for (int pg = currentPage; pg < lastPage; ++pg) {
-    page = doc->getCatalog()->getPage(pg);
+    page = doc->getPage(pg);
+    if (!page) continue;
+
     if ((resDict = page->getResourceDict())) {
       scanFonts(resDict, result);
     }
-- 
1.6.4.2


From 590ae18a182bffa56772d032aba1d38dafaf9b43 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 11:11:11 +0200
Subject: [PATCH 04/17] Use PDFDoc::getPage() in pdfinfo

---
 utils/pdfinfo.cc |   22 +++++++++++++++-------
 1 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index 2abe8b4..a94e4e8 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -257,7 +257,11 @@ int main(int argc, char *argv[]) {
   if (printBoxes) {
     if (multiPage) {
       for (pg = firstPage; pg <= lastPage; ++pg) {
-	page = doc->getCatalog()->getPage(pg);
+	page = doc->getPage(pg);
+	if (!page) {
+          error(-1, "Failed to print boxes for page %d", pg);
+	  continue;
+	}
 	sprintf(buf, "Page %4d MediaBox: ", pg);
 	printBox(buf, page->getMediaBox());
 	sprintf(buf, "Page %4d CropBox:  ", pg);
@@ -270,12 +274,16 @@ int main(int argc, char *argv[]) {
 	printBox(buf, page->getArtBox());
       }
     } else {
-      page = doc->getCatalog()->getPage(firstPage);
-      printBox("MediaBox:       ", page->getMediaBox());
-      printBox("CropBox:        ", page->getCropBox());
-      printBox("BleedBox:       ", page->getBleedBox());
-      printBox("TrimBox:        ", page->getTrimBox());
-      printBox("ArtBox:         ", page->getArtBox());
+      page = doc->getPage(firstPage);
+      if (!page) {
+        error(-1, "Failed to print boxes for page %d", firstPage);
+      } else {
+        printBox("MediaBox:       ", page->getMediaBox());
+        printBox("CropBox:        ", page->getCropBox());
+        printBox("BleedBox:       ", page->getBleedBox());
+        printBox("TrimBox:        ", page->getTrimBox());
+        printBox("ArtBox:         ", page->getArtBox());
+      }
     }
   }
 
-- 
1.6.4.2


From a57d05b52dc9084a58ed0441e706c0266602554c Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 11:19:53 +0200
Subject: [PATCH 05/17] Use PDFDoc::getPage() in pdffonts

---
 utils/pdffonts.cc |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/utils/pdffonts.cc b/utils/pdffonts.cc
index 81b20e4..30e25dc 100644
--- a/utils/pdffonts.cc
+++ b/utils/pdffonts.cc
@@ -166,7 +166,11 @@ int main(int argc, char *argv[]) {
   fonts = NULL;
   fontsLen = fontsSize = 0;
   for (pg = firstPage; pg <= lastPage; ++pg) {
-    page = doc->getCatalog()->getPage(pg);
+    page = doc->getPage(pg);
+    if (!page) {
+      error(-1, "Failed to read fonts from page %d", pg);
+      continue;
+    }
     if ((resDict = page->getResourceDict())) {
       scanFonts(resDict, doc);
     }
-- 
1.6.4.2


From 8ebe2679a25be432b9f7de0816e499e23f77c677 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 15:52:20 +0200
Subject: [PATCH 06/17] Use PDFDoc::getPage() in glib

---
 glib/poppler-action.cc   |    4 ++--
 glib/poppler-document.cc |   17 ++++++++++-------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/glib/poppler-action.cc b/glib/poppler-action.cc
index bcb2d36..ca88ca4 100644
--- a/glib/poppler-action.cc
+++ b/glib/poppler-action.cc
@@ -425,13 +425,13 @@ find_annot_movie_for_action (PopplerDocument *document,
 
     xref->fetch (ref->num, ref->gen, &annotObj);
   } else if (link->hasAnnotTitle ()) {
-    Catalog *catalog = document->doc->getCatalog ();
     Object annots;
     GooString *title = link->getAnnotTitle ();
     int i;
 
     for (i = 1; i <= document->doc->getNumPages (); ++i) {
-      Page *p = catalog->getPage (i);
+      Page *p = document->doc->getPage (i);
+      if (!p) continue;
 
       if (p->getAnnots (&annots)->isArray ()) {
         int j;
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index e3bda12..ed45aef 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -437,15 +437,14 @@ PopplerPage *
 poppler_document_get_page (PopplerDocument  *document,
 			   int               index)
 {
-  Catalog *catalog;
   Page *page;
 
   g_return_val_if_fail (0 <= index &&
 			index < poppler_document_get_n_pages (document),
 			NULL);
 
-  catalog = document->doc->getCatalog();
-  page = catalog->getPage (index + 1);
+  page = document->doc->getPage (index + 1);
+  if (!page) return NULL;
 
   return _poppler_page_new (document, page, index);
 }
@@ -2484,18 +2483,22 @@ PopplerFormField *
 poppler_document_get_form_field (PopplerDocument *document,
 				 gint             id)
 {
-  Catalog *catalog = document->doc->getCatalog();
+  Page *page;
   unsigned pageNum;
   unsigned fieldNum;
   FormPageWidgets *widgets;
   FormWidget *field;
 
   FormWidget::decodeID (id, &pageNum, &fieldNum);
-  
-  widgets = catalog->getPage (pageNum)->getPageWidgets ();
+
+  page = document->doc->getPage (pageNum);
+  if (!page)
+    return NULL;
+
+  widgets = page->getPageWidgets ();
   if (!widgets)
     return NULL;
-  
+
   field = widgets->getWidget (fieldNum);
   if (field)
     return _poppler_form_field_new (document, field);
-- 
1.6.4.2


From c51c215463fe93a22bb9af88c36275cd14718cd0 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 17:59:01 +0200
Subject: [PATCH 07/17] Use PDFDoc::getPage() in qt4

Note API change: With this patch, Document::Page(int index) can now return NULL
when poppler fails to create a page. Any application using these bindings
should check the return value.
---
 qt4/src/poppler-document.cc |    8 +++++++-
 qt4/src/poppler-link.cc     |    6 ++++--
 qt4/src/poppler-page.cc     |    3 ++-
 qt4/src/poppler-qt4.h       |    3 +++
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/qt4/src/poppler-document.cc b/qt4/src/poppler-document.cc
index 77c23a2..b153248 100644
--- a/qt4/src/poppler-document.cc
+++ b/qt4/src/poppler-document.cc
@@ -98,7 +98,13 @@ namespace Poppler {
 
     Page *Document::page(int index) const
     {
-	return new Page(m_doc, index);
+	Page *page = new Page(m_doc, index);
+	if (!page->isOk()) {
+	  delete page;
+	  return NULL;
+	}
+
+	return page;
     }
 
     bool Document::isLocked() const
diff --git a/qt4/src/poppler-link.cc b/qt4/src/poppler-link.cc
index de06242..4f54201 100644
--- a/qt4/src/poppler-link.cc
+++ b/qt4/src/poppler-link.cc
@@ -232,9 +232,11 @@ class LinkMoviePrivate : public LinkPrivate
 		
 		int leftAux = 0, topAux = 0, rightAux = 0, bottomAux = 0;
 		
-		if (d->pageNum > 0 && d->pageNum <= data.doc->doc->getNumPages())
+		::Page *page;
+		if (d->pageNum > 0 &&
+		    d->pageNum <= data.doc->doc->getNumPages() &&
+		    (page = data.doc->doc->getPage( d->pageNum )))
 		{
-			::Page *page = data.doc->doc->getCatalog()->getPage( d->pageNum );
 			cvtUserToDev( page, left, top, &leftAux, &topAux );
 			cvtUserToDev( page, right, bottom, &rightAux, &bottomAux );
 			
diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc
index 293d09b..e408877 100644
--- a/qt4/src/poppler-page.cc
+++ b/qt4/src/poppler-page.cc
@@ -190,8 +190,9 @@ Page::Page(DocumentData *doc, int index) {
   m_page = new PageData();
   m_page->index = index;
   m_page->parentDoc = doc;
-  m_page->page = doc->doc->getCatalog()->getPage(m_page->index + 1);
+  m_page->page = doc->doc->getPage(m_page->index + 1);
   m_page->transition = 0;
+  ok = m_page->page ? true : false;
 }
 
 Page::~Page()
diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h
index 8a79a2a..6396d56 100644
--- a/qt4/src/poppler-qt4.h
+++ b/qt4/src/poppler-qt4.h
@@ -609,11 +609,14 @@ delete it;
 	**/
 	QString label() const;
 	
+	bool isOk() { return ok; };
+
     private:
 	Q_DISABLE_COPY(Page)
 
 	Page(DocumentData *doc, int index);
 	PageData *m_page;
+        bool ok;
     };
 
 /**
-- 
1.6.4.2


From 6d8b4a0414ed9f37d5447f1619e521d913dd1b8e Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Fri, 23 Apr 2010 09:21:23 +0200
Subject: [PATCH 08/17] Use PDFDoc::getPage() in qt

Note API change: With this patch, Document::getPage(int index) can now
return NULL when poppler fails to create a page. Any application using
these bindings should check the return value.
---
 qt/poppler-document.cc |   11 +++++++++++
 qt/poppler-page.cc     |   11 +++++++----
 qt/poppler-qt.h        |    6 +++++-
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/qt/poppler-document.cc b/qt/poppler-document.cc
index bade1d1..1a5892b 100644
--- a/qt/poppler-document.cc
+++ b/qt/poppler-document.cc
@@ -113,6 +113,17 @@ int Document::getNumPages() const
   return data->doc.getNumPages();
 }
 
+Page *Document::getPage(int index) const
+{
+  Page *p = new Page(this, index);
+  if (!p->isOk()) {
+    delete p;
+    return NULL;
+  }
+
+  return p;
+}
+
 QValueList<FontInfo> Document::fonts() const
 {
   QValueList<FontInfo> ourList;
diff --git a/qt/poppler-page.cc b/qt/poppler-page.cc
index a42aa15..ef077a7 100644
--- a/qt/poppler-page.cc
+++ b/qt/poppler-page.cc
@@ -47,6 +47,7 @@ class PageData {
   const Document *doc;
   int index;
   PageTransition *transition;
+  ::Page *page;
 };
 
 Page::Page(const Document *doc, int index) {
@@ -54,6 +55,8 @@ Page::Page(const Document *doc, int index) {
   data->index = index;
   data->doc = doc;
   data->transition = 0;
+  data->page = doc->data->doc.getPage(data->index + 1);
+  ok = data->page ? true : false;
 }
 
 Page::~Page()
@@ -132,7 +135,7 @@ QString Page::getText(const Rectangle &r) const
   output_dev = new TextOutputDev(0, gFalse, gFalse, gFalse);
   data->doc->data->doc.displayPageSlice(output_dev, data->index + 1, 72, 72,
       0, false, false, false, -1, -1, -1, -1);
-  p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+  p = data->page;
   if (r.isNull())
   {
     rect = p->getCropBox();
@@ -197,7 +200,7 @@ PageTransition *Page::getTransition() const
   {
     Object o;
     PageTransitionParams params;
-    params.dictObj = data->doc->data->doc.getCatalog()->getPage(data->index + 1)->getTrans(&o);
+    params.dictObj = data->page->getTrans(&o);
     data->transition = new PageTransition(params);
     o.free();
   }
@@ -208,7 +211,7 @@ QSize Page::pageSize() const
 {
   ::Page *p;
 
-  p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+  p = data->page;
   if ( ( Page::Landscape == orientation() ) || (Page::Seascape == orientation() ) ) {
     return QSize( (int)p->getCropHeight(), (int)p->getCropWidth() );
   } else {
@@ -218,7 +221,7 @@ QSize Page::pageSize() const
 
 Page::Orientation Page::orientation() const
 {
-  ::Page *p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+  ::Page *p = data->page;
 
   int rotation = p->getRotate();
   switch (rotation) {
diff --git a/qt/poppler-qt.h b/qt/poppler-qt.h
index a6b1e6e..549ffd2 100644
--- a/qt/poppler-qt.h
+++ b/qt/poppler-qt.h
@@ -31,6 +31,7 @@
 #include <qdom.h>
 #include <qpixmap.h>
 
+
 namespace Poppler {
 
 class Document;
@@ -198,9 +199,12 @@ class Page {
     */
     QValueList<Link*> links() const;
 
+    bool isOk() { return ok; };
+
   private:
     Page(const Document *doc, int index);
     PageData *data;
+    bool ok;
 };
 
 class DocumentData;
@@ -219,7 +223,7 @@ public:
   
   static Document *load(const QString & filePath);
   
-  Page *getPage(int index) const{ return new Page(this, index); }
+  Page *getPage(int index) const;
   
   int getNumPages() const;
   
-- 
1.6.4.2


From 1fdcc6a52a897a4df1faf8cc3109a91b164de946 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Fri, 23 Apr 2010 12:07:39 +0200
Subject: [PATCH 09/17] Use PDFDoc::getPage() in PSOutputDev

---
 glib/poppler-page.cc            |    1 +
 poppler/PSOutputDev.cc          |   37 ++++++++++++++++++++++---------------
 poppler/PSOutputDev.h           |   13 ++++++++-----
 qt/poppler-document.cc          |    2 +-
 qt4/src/poppler-ps-converter.cc |    1 +
 utils/pdftohtml.cc              |    2 +-
 utils/pdftops.cc                |    2 +-
 7 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 0e5de5c..16c1485 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -1220,6 +1220,7 @@ poppler_page_render_to_ps (PopplerPage   *page,
 
   if (!ps_file->out)
     ps_file->out = new PSOutputDev (ps_file->filename,
+                                    ps_file->document->doc,
                                     ps_file->document->doc->getXRef(),
                                     ps_file->document->doc->getCatalog(),
                                     NULL,
diff --git a/poppler/PSOutputDev.cc b/poppler/PSOutputDev.cc
index 179a494..5e5d3d0 100644
--- a/poppler/PSOutputDev.cc
+++ b/poppler/PSOutputDev.cc
@@ -70,6 +70,7 @@
 #  include "SplashOutputDev.h"
 #endif
 #include "PSOutputDev.h"
+#include "PDFDoc.h"
 
 #ifdef MACOS
 // needed for setting type/creator of MacOS files
@@ -972,7 +973,7 @@ static void outputToFile(void *stream, char *data, int len) {
   fwrite(data, 1, len, (FILE *)stream);
 }
 
-PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
+PSOutputDev::PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 			 char *psTitle,
 			 int firstPage, int lastPage, PSOutMode modeA,
 			 int paperWidthA, int paperHeightA, GBool duplexA,
@@ -1033,13 +1034,14 @@ PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
   }
 
   init(outputToFile, f, fileTypeA, psTitle,
-       xrefA, catalog, firstPage, lastPage, modeA,
+       doc, xrefA, catalog, firstPage, lastPage, modeA,
        imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
        paperWidthA, paperHeightA, duplexA);
 }
 
 PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
 			 char *psTitle,
+			 PDFDoc *doc,
 			 XRef *xrefA, Catalog *catalog,
 			 int firstPage, int lastPage, PSOutMode modeA,
 			 int paperWidthA, int paperHeightA, GBool duplexA,
@@ -1068,18 +1070,17 @@ PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
   forceRasterize = forceRasterizeA;
 
   init(outputFuncA, outputStreamA, psGeneric, psTitle,
-       xrefA, catalog, firstPage, lastPage, modeA,
+       doc, xrefA, catalog, firstPage, lastPage, modeA,
        imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
        paperWidthA, paperHeightA, duplexA);
 }
 
 void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
-		       PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
+		       PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 		       int firstPage, int lastPage, PSOutMode modeA,
 		       int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
 		       GBool manualCtrlA, int paperWidthA, int paperHeightA,
 		       GBool duplexA) {
-  Page *page;
   PDFRectangle *box;
 
   // initialize
@@ -1099,12 +1100,12 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
   imgURX = imgURXA;
   imgURY = imgURYA;
   if (paperWidth < 0 || paperHeight < 0) {
-    // this check is needed in case the document has zero pages
-    if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
-      page = catalog->getPage(firstPage);
+    Page *page;
+    if ((page = doc->getPage(firstPage))) {
       paperWidth = (int)ceil(page->getMediaWidth());
       paperHeight = (int)ceil(page->getMediaHeight());
     } else {
+      error(-1, "Invalid page %d", firstPage);
       paperWidth = 1;
       paperHeight = 1;
     }
@@ -1170,14 +1171,16 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
   embFontList = new GooString();
 
   if (!manualCtrl) {
+    Page *page;
     // this check is needed in case the document has zero pages
-    if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
+    if ((page = doc->getPage(firstPage))) {
       writeHeader(firstPage, lastPage,
-		  catalog->getPage(firstPage)->getMediaBox(),
-		  catalog->getPage(firstPage)->getCropBox(),
-		  catalog->getPage(firstPage)->getRotate(),
+		  page->getMediaBox(),
+		  page->getCropBox(),
+		  page->getRotate(),
 		  pstitle);
     } else {
+      error(-1, "Invalid page %d", firstPage);
       box = new PDFRectangle(0, 0, 1, 1);
       writeHeader(firstPage, lastPage, box, box, 0, pstitle);
       delete box;
@@ -1190,7 +1193,7 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
       writePS("%%EndProlog\n");
       writePS("%%BeginSetup\n");
     }
-    writeDocSetup(catalog, firstPage, lastPage, duplexA);
+    writeDocSetup(doc, catalog, firstPage, lastPage, duplexA);
     if (mode != psModeForm) {
       writePS("%%EndSetup\n");
     }
@@ -1400,7 +1403,7 @@ void PSOutputDev::writeXpdfProcset() {
   }
 }
 
-void PSOutputDev::writeDocSetup(Catalog *catalog,
+void PSOutputDev::writeDocSetup(PDFDoc *doc, Catalog *catalog,
 				int firstPage, int lastPage,
                                 GBool duplexA) {
   Page *page;
@@ -1416,7 +1419,11 @@ void PSOutputDev::writeDocSetup(Catalog *catalog,
     writePS("xpdf begin\n");
   }
   for (pg = firstPage; pg <= lastPage; ++pg) {
-    page = catalog->getPage(pg);
+    page = doc->getPage(pg);
+    if (!page) {
+      error(-1, "Failed writing resources for page %d", pg);
+      continue;
+    }
     if ((resDict = page->getResourceDict())) {
       setupResources(resDict);
     }
diff --git a/poppler/PSOutputDev.h b/poppler/PSOutputDev.h
index 38c838c..a84a638 100644
--- a/poppler/PSOutputDev.h
+++ b/poppler/PSOutputDev.h
@@ -50,6 +50,7 @@ struct PSFont8Info;
 struct PSFont16Enc;
 class PSOutCustomColor;
 class Function;
+class PDFDoc;
 
 //------------------------------------------------------------------------
 // PSOutputDev
@@ -75,7 +76,7 @@ class PSOutputDev: public OutputDev {
 public:
 
   // Open a PostScript output file, and write the prolog.
-  PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
+  PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 	      char *psTitle,
 	      int firstPage, int lastPage, PSOutMode modeA,
 	      int paperWidthA = -1, int paperHeightA = -1,
@@ -88,6 +89,7 @@ public:
   // Open a PSOutputDev that will write to a generic stream.
   PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
 	      char *psTitle,
+	      PDFDoc *doc,
 	      XRef *xrefA, Catalog *catalog,
 	      int firstPage, int lastPage, PSOutMode modeA,
 	      int paperWidthA = -1, int paperHeightA = -1,
@@ -145,9 +147,6 @@ public:
   // Write the Xpdf procset.
   void writeXpdfProcset();
 
-  // Write the document-level setup.
-  void writeDocSetup(Catalog *catalog, int firstPage, int lastPage, GBool duplexA);
-
   // Write the trailer for the current page.
   void writePageTrailer();
 
@@ -287,7 +286,7 @@ public:
 private:
 
   void init(PSOutputFunc outputFuncA, void *outputStreamA,
-	    PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
+	    PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
 	    int firstPage, int lastPage, PSOutMode modeA,
 	    int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
 	    GBool manualCtrlA, int paperWidthA, int paperHeightA,
@@ -341,6 +340,10 @@ private:
 		    double *x1, double *y1);
 #endif
   void cvtFunction(Function *func);
+
+  // Write the document-level setup.
+  void writeDocSetup(PDFDoc *doc, Catalog *catalog, int firstPage, int lastPage, GBool duplexA);
+
   void writePSChar(char c);
   void writePS(char *s);
   void writePSFmt(const char *fmt, ...);
diff --git a/qt/poppler-document.cc b/qt/poppler-document.cc
index 1a5892b..03d01fa 100644
--- a/qt/poppler-document.cc
+++ b/qt/poppler-document.cc
@@ -325,7 +325,7 @@ bool Document::print(const QString &fileName, QValueList<int> pageList, double h
 
 bool Document::print(const QString &file, QValueList<int> pageList, double hDPI, double vDPI, int rotate, int paperWidth, int paperHeight)
 {
-  PSOutputDev *psOut = new PSOutputDev(file.latin1(), data->doc.getXRef(), data->doc.getCatalog(), NULL, 1, data->doc.getNumPages(), psModePS, paperWidth, paperHeight);
+  PSOutputDev *psOut = new PSOutputDev(file.latin1(), &(data->doc), data->doc.getXRef(), data->doc.getCatalog(), NULL, 1, data->doc.getNumPages(), psModePS, paperWidth, paperHeight);
   
   if (psOut->isOk()) {
     QValueList<int>::iterator it;
diff --git a/qt4/src/poppler-ps-converter.cc b/qt4/src/poppler-ps-converter.cc
index 7a1957b..9dc82ec 100644
--- a/qt4/src/poppler-ps-converter.cc
+++ b/qt4/src/poppler-ps-converter.cc
@@ -195,6 +195,7 @@ bool PSConverter::convert()
 	
 	PSOutputDev *psOut = new PSOutputDev(outputToQIODevice, dev,
 	                                     pstitlechar,
+	                                     d->document->doc,
 	                                     d->document->doc->getXRef(),
 	                                     d->document->doc->getCatalog(),
 	                                     1,
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 5323b6e..3723b44 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -457,7 +457,7 @@ int main(int argc, char *argv[]) {
       psFileName = new GooString(htmlFileName->getCString());
       psFileName->append(".ps");
 
-      psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+      psOut = new PSOutputDev(psFileName->getCString(), doc, doc->getXRef(),
           doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
       psOut->setDisplayText(gFalse);
       doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
diff --git a/utils/pdftops.cc b/utils/pdftops.cc
index 0bc43a1..8231458 100644
--- a/utils/pdftops.cc
+++ b/utils/pdftops.cc
@@ -359,7 +359,7 @@ int main(int argc, char *argv[]) {
   }
 
   // write PostScript file
-  psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+  psOut = new PSOutputDev(psFileName->getCString(), doc, doc->getXRef(),
 			  doc->getCatalog(), NULL, firstPage, lastPage, mode,
 			  paperWidth,
 			  paperHeight,
-- 
1.6.4.2


From 4405e3d03d2df464715f4187b1f702b01377a298 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sat, 24 Apr 2010 10:17:56 +0200
Subject: [PATCH 10/17] Use PDFDoc::getPage() in HtmlOutputDev

---
 utils/HtmlOutputDev.cc |    2 +-
 utils/HtmlOutputDev.h  |    2 ++
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 8ff8f08..1e3a3ef 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -1116,7 +1116,7 @@ void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
 
 
 void HtmlOutputDev::endPage() {
-  Links *linksList = catalog->getPage(pageNum)->getLinks(catalog);
+  Links *linksList = docPage->getLinks(catalog);
   for (int i = 0; i < linksList->getNumLinks(); ++i)
   {
       doProcessLink(linksList->getLink(i));
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index 24ccfd1..48b04c6 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -256,6 +256,7 @@ public:
                                GBool (* abortCheckCbk)(void *data) = NULL,
                                void * abortCheckCbkData = NULL)
   {
+   docPage = page;
    catalog = catalogA;
    return gTrue;
   }
@@ -323,6 +324,7 @@ private:
   GooString *docTitle;
   GooList *glMetaVars;
   Catalog *catalog;
+  Page *docPage;
   friend class HtmlPage;
 };
 
-- 
1.6.4.2


From 26491f24817e15082d4333022146a94ea2e81f7d Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 31 Mar 2010 14:39:57 +0200
Subject: [PATCH 11/17] Parse page tree on demand

---
 poppler/Catalog.cc |  283 +++++++++++++++++++++++++++++++++++-----------------
 poppler/Catalog.h  |   12 ++-
 2 files changed, 199 insertions(+), 96 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index dbf9af2..d524c8c 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -59,9 +59,6 @@ Catalog::Catalog(XRef *xrefA) {
   Object catDict, pagesDict, pagesDictRef;
   Object obj, obj2;
   Object optContentProps;
-  char *alreadyRead;
-  int numPages0;
-  int i;
 
   ok = gTrue;
   xref = xrefA;
@@ -78,6 +75,12 @@ Catalog::Catalog(XRef *xrefA) {
   embeddedFileNameTree = NULL;
   jsNameTree = NULL;
 
+  pagesList = NULL;
+  pagesRefList = NULL;
+  attrsList = NULL;
+  kidsIdxList = NULL;
+  lastCachedPage = 0;
+
   xref->getCatalog(&catDict);
   if (!catDict.isDict()) {
     error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
@@ -100,31 +103,11 @@ Catalog::Catalog(XRef *xrefA) {
   if (!obj.isNum()) {
     error(-1, "Page count in top-level pages object is wrong type (%s)",
 	  obj.getTypeName());
-    pagesSize = numPages0 = 0;
+    numPages = 0;
   } else {
-    pagesSize = numPages0 = (int)obj.getNum();
+    numPages = (int)obj.getNum();
   }
   obj.free();
-  pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
-  pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
-  for (i = 0; i < pagesSize; ++i) {
-    pages[i] = NULL;
-    pageRefs[i].num = -1;
-    pageRefs[i].gen = -1;
-  }
-  alreadyRead = (char *)gmalloc(xref->getNumObjects());
-  memset(alreadyRead, 0, xref->getNumObjects());
-  if (catDict.dictLookupNF("Pages", &pagesDictRef)->isRef() &&
-      pagesDictRef.getRefNum() >= 0 &&
-      pagesDictRef.getRefNum() < xref->getNumObjects()) {
-    alreadyRead[pagesDictRef.getRefNum()] = 1;
-  }
-  pagesDictRef.free();
-  numPages = readPageTree(pagesDict.getDict(), NULL, 0, alreadyRead);
-  gfree(alreadyRead);
-  if (numPages != numPages0) {
-    error(-1, "Page count in top-level pages object is incorrect");
-  }
   pagesDict.free();
 
   // read base URI
@@ -161,10 +144,24 @@ Catalog::Catalog(XRef *xrefA) {
 }
 
 Catalog::~Catalog() {
-  int i;
-
+  delete kidsIdxList;
+  if (attrsList) {
+    GooVector<PageAttrs *>::iterator it;
+    for (it = attrsList->begin() ; it < attrsList->end(); it++ ) {
+      delete *it;
+    }
+    delete attrsList;
+  }
+  delete pagesRefList;
+  if (pagesList) {
+    GooVector<Dict *>::iterator it;
+    for (it = pagesList->begin() ; it < pagesList->end(); it++ ) {
+      delete *it;
+    }
+    delete pagesList;
+  }
   if (pages) {
-    for (i = 0; i < pagesSize; ++i) {
+    for (int i = 0; i < pagesSize; ++i) {
       if (pages[i]) {
 	delete pages[i];
       }
@@ -221,91 +218,193 @@ GooString *Catalog::readMetadata() {
   return s;
 }
 
-int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start,
-			  char *alreadyRead) {
-  Object kids;
-  Object kid;
-  Object kidRef;
-  PageAttrs *attrs1, *attrs2;
-  Page *page;
-  int i, j;
-
-  attrs1 = new PageAttrs(attrs, pagesDict);
-  pagesDict->lookup("Kids", &kids);
-  if (!kids.isArray()) {
-    error(-1, "Kids object (page %d) is wrong type (%s)",
-	  start+1, kids.getTypeName());
-    return start;
-  }
-  for (i = 0; i < kids.arrayGetLength(); ++i) {
-    kids.arrayGetNF(i, &kidRef);
-    if (kidRef.isRef() &&
-	kidRef.getRefNum() >= 0 &&
-	kidRef.getRefNum() < xref->getNumObjects()) {
-      if (alreadyRead[kidRef.getRefNum()]) {
-	error(-1, "Loop in Pages tree");
-	kidRef.free();
-	continue;
+Page *Catalog::getPage(int i)
+{
+  if (i < 1) return NULL;
+
+  if (i > lastCachedPage) {
+     if (cachePageTree(i) == gFalse) return NULL;
+  }
+  return pages[i-1];
+}
+
+Ref *Catalog::getPageRef(int i)
+{
+  if (i < 1) return NULL;
+
+  if (i > lastCachedPage) {
+     if (cachePageTree(i) == gFalse) return NULL;
+  }
+  return &pageRefs[i-1];
+}
+
+GBool Catalog::cachePageTree(int page)
+{
+  Dict *pagesDict;
+
+  if (pagesList == NULL) {
+
+    Object catDict;
+    Ref pagesRef;
+
+    xref->getCatalog(&catDict);
+
+    Object pagesDictRef;
+    if (catDict.dictLookupNF("Pages", &pagesDictRef)->isRef() &&
+        pagesDictRef.getRefNum() >= 0 &&
+        pagesDictRef.getRefNum() < xref->getNumObjects()) {
+      pagesRef = pagesDictRef.getRef();
+      pagesDictRef.free();
+    } else {
+       error(-1, "Catalog dictionary does not contain a valid \"Pages\" entry");
+       pagesDictRef.free();
+       return gFalse;
+    }
+
+    Object obj;
+    catDict.dictLookup("Pages", &obj);
+    catDict.free();
+    // This should really be isDict("Pages"), but I've seen at least one
+    // PDF file where the /Type entry is missing.
+    if (obj.isDict()) {
+      obj.getDict()->incRef();
+      pagesDict = obj.getDict();
+      obj.free();
+    }
+    else {
+      error(-1, "Top-level pages object is wrong type (%s)", obj.getTypeName());
+      obj.free();
+      return gFalse;
+    }
+
+    pagesSize = numPages;
+    pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
+    pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
+    for (int i = 0; i < pagesSize; ++i) {
+      pages[i] = NULL;
+      pageRefs[i].num = -1;
+      pageRefs[i].gen = -1;
+    }
+
+    pagesList = new GooVector<Dict *>();
+    pagesList->push_back(pagesDict);
+    pagesRefList = new GooVector<Ref>();
+    pagesRefList->push_back(pagesRef);
+    attrsList = new GooVector<PageAttrs *>();
+    attrsList->push_back(new PageAttrs(NULL, pagesDict));
+    kidsIdxList = new GooVector<int>();
+    kidsIdxList->push_back(0);
+    lastCachedPage = 0;
+
+  }
+
+  while(1) {
+
+    if (page <= lastCachedPage) return gTrue;
+
+    if (pagesList->empty()) return gFalse;
+
+    pagesDict = pagesList->back();
+    Object kids;
+    pagesDict->lookup("Kids", &kids);
+    if (!kids.isArray()) {
+      error(-1, "Kids object (page %d) is wrong type (%s)",
+            lastCachedPage+1, kids.getTypeName());
+      kids.free();
+      return gFalse;
+    }
+
+    int kidsIdx = kidsIdxList->back();
+    if (kidsIdx >= kids.arrayGetLength()) {
+       delete pagesList->back();
+       pagesList->pop_back();
+       pagesRefList->pop_back();
+       delete attrsList->back();
+       attrsList->pop_back();
+       kidsIdxList->pop_back();
+       if (!kidsIdxList->empty()) kidsIdxList->back()++;
+       kids.free();
+       continue;
+    }
+
+    Object kidRef;
+    kids.arrayGetNF(kidsIdx, &kidRef);
+    if (!kidRef.isRef()) {
+      error(-1, "Kid object (page %d) is not an indirect reference (%s)",
+            lastCachedPage+1, kidRef.getTypeName());
+      kidRef.free();
+      kids.free();
+      return gFalse;
+    }
+
+    for (size_t i = 0; i < pagesRefList->size(); i++) {
+      if (((*pagesRefList)[i]).num == kidRef.getRefNum()) {
+         error(-1, "Loop in Pages tree");
+         kidRef.free();
+         kids.free();
+         kidsIdxList->back()++;
+         continue;
       }
-      alreadyRead[kidRef.getRefNum()] = 1;
     }
-    kids.arrayGet(i, &kid);
+
+    Object kid;
+    kids.arrayGet(kidsIdx, &kid);
+    kids.free();
     if (kid.isDict("Page")) {
-      attrs2 = new PageAttrs(attrs1, kid.getDict());
-      page = new Page(xref, start+1, kid.getDict(), kidRef.getRef(), attrs2, getForm());
-      if (!page->isOk()) {
-	++start;
-	goto err3;
+      PageAttrs *attrs = new PageAttrs(attrsList->back(), kid.getDict());
+      Page *p = new Page(xref, lastCachedPage+1, kid.getDict(),
+                     kidRef.getRef(), attrs, form);
+      if (!p->isOk()) {
+        error(-1, "Failed to create page (page %d)", lastCachedPage+1);
+        delete p;
+        kidRef.free();
+        kid.free();
+        return gFalse;
       }
-      if (start >= pagesSize) {
-	pagesSize += 32;
-	pages = (Page **)greallocn(pages, pagesSize, sizeof(Page *));
-	pageRefs = (Ref *)greallocn(pageRefs, pagesSize, sizeof(Ref));
-	for (j = pagesSize - 32; j < pagesSize; ++j) {
-	  pages[j] = NULL;
-	  pageRefs[j].num = -1;
-	  pageRefs[j].gen = -1;
-	}
-      }
-      pages[start] = page;
-      if (kidRef.isRef()) {
-	pageRefs[start].num = kidRef.getRefNum();
-	pageRefs[start].gen = kidRef.getRefGen();
+
+      if (lastCachedPage >= numPages) {
+        error(-1, "Page count in top-level pages object is incorrect");
+        kidRef.free();
+        kid.free();
+        return gFalse;
       }
-      ++start;
+
+      pages[lastCachedPage] = p;
+      pageRefs[lastCachedPage].num = kidRef.getRefNum();
+      pageRefs[lastCachedPage].gen = kidRef.getRefGen();
+
+      lastCachedPage++;
+      kidsIdxList->back()++;
+
     // This should really be isDict("Pages"), but I've seen at least one
     // PDF file where the /Type entry is missing.
     } else if (kid.isDict()) {
-      if ((start = readPageTree(kid.getDict(), attrs1, start, alreadyRead))
-	  < 0)
-	goto err2;
+      attrsList->push_back(new PageAttrs(attrsList->back(), kid.getDict()));
+      pagesRefList->push_back(kidRef.getRef());
+      kid.getDict()->incRef();
+      pagesList->push_back(kid.getDict());
+      kidsIdxList->push_back(0);
     } else {
       error(-1, "Kid object (page %d) is wrong type (%s)",
-	    start+1, kid.getTypeName());
+            lastCachedPage+1, kid.getTypeName());
+      kidRef.free();
+      kid.free();
+      return gFalse;
     }
-    kid.free();
     kidRef.free();
+    kid.free();
+
   }
-  delete attrs1;
-  kids.free();
-  return start;
 
- err3:
-  delete page;
- err2:
-  kid.free();
-  kidRef.free();
-  kids.free();
-  delete attrs1;
-  ok = gFalse;
-  return -1;
+  return gFalse;
 }
 
 int Catalog::findPage(int num, int gen) {
   int i;
 
   for (i = 0; i < numPages; ++i) {
-    if (pageRefs[i].num == num && pageRefs[i].gen == gen)
+    Ref *ref = getPageRef(i+1);
+    if (ref->num == num && ref->gen == gen)
       return i + 1;
   }
   return 0;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 2cab80a..5a25109 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -151,10 +151,10 @@ public:
   int getNumPages() { return numPages; }
 
   // Get a page.
-  Page *getPage(int i) { return pages[i-1]; }
+  Page *getPage(int i);
 
   // Get the reference for a page object.
-  Ref *getPageRef(int i) { return &pageRefs[i-1]; }
+  Ref *getPageRef(int i);
 
   // Return base URI, or NULL if none.
   GooString *getBaseURI() { return baseURI; }
@@ -232,6 +232,11 @@ private:
   XRef *xref;			// the xref table for this PDF file
   Page **pages;			// array of pages
   Ref *pageRefs;		// object ID for each page
+  int lastCachedPage;
+  GooVector<Dict *> *pagesList;
+  GooVector<Ref> *pagesRefList;
+  GooVector<PageAttrs *> *attrsList;
+  GooVector<int> *kidsIdxList;
   Form *form;
   int numPages;			// number of pages
   int pagesSize;		// size of pages array
@@ -251,8 +256,7 @@ private:
   PageMode pageMode;		// page mode
   PageLayout pageLayout;	// page layout
 
-  int readPageTree(Dict *pages, PageAttrs *attrs, int start,
-		   char *alreadyRead);
+  GBool cachePageTree(int page); // Cache first <page> pages.
   Object *findDestInTree(Object *tree, GooString *name, Object *obj);
 
   Object *getNames();
-- 
1.6.4.2


From 1e7a1752241b958c6f42d55cfc0e54e2b43a2220 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 22:01:41 +0100
Subject: [PATCH 12/17] Parse number of pages on demand

---
 poppler/Catalog.cc |   70 +++++++++++++++++++++++++++++++--------------------
 poppler/Catalog.h  |    2 +-
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index d524c8c..8a6f7d5 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -64,7 +64,8 @@ Catalog::Catalog(XRef *xrefA) {
   xref = xrefA;
   pages = NULL;
   pageRefs = NULL;
-  numPages = pagesSize = 0;
+  numPages = -1;
+  pagesSize = 0;
   baseURI = NULL;
   pageLabelInfo = NULL;
   form = NULL;
@@ -89,27 +90,6 @@ Catalog::Catalog(XRef *xrefA) {
   // get the AcroForm dictionary
   catDict.dictLookup("AcroForm", &acroForm);
 
-  // read page tree
-  catDict.dictLookup("Pages", &pagesDict);
-  // This should really be isDict("Pages"), but I've seen at least one
-  // PDF file where the /Type entry is missing.
-  if (!pagesDict.isDict()) {
-    error(-1, "Top-level pages object is wrong type (%s)",
-	  pagesDict.getTypeName());
-    goto err2;
-  }
-  pagesDict.dictLookup("Count", &obj);
-  // some PDF files actually use real numbers here ("/Count 9.0")
-  if (!obj.isNum()) {
-    error(-1, "Page count in top-level pages object is wrong type (%s)",
-	  obj.getTypeName());
-    numPages = 0;
-  } else {
-    numPages = (int)obj.getNum();
-  }
-  obj.free();
-  pagesDict.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -136,8 +116,6 @@ Catalog::Catalog(XRef *xrefA) {
   catDict.free();
   return;
 
- err2:
-  pagesDict.free();
  err1:
   catDict.free();
   ok = gFalse;
@@ -277,7 +255,7 @@ GBool Catalog::cachePageTree(int page)
       return gFalse;
     }
 
-    pagesSize = numPages;
+    pagesSize = getNumPages();
     pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
     pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
     for (int i = 0; i < pagesSize; ++i) {
@@ -402,7 +380,7 @@ GBool Catalog::cachePageTree(int page)
 int Catalog::findPage(int num, int gen) {
   int i;
 
-  for (i = 0; i < numPages; ++i) {
+  for (i = 0; i < getNumPages(); ++i) {
     Ref *ref = getPageRef(i+1);
     if (ref->num == num && ref->gen == gen)
       return i + 1;
@@ -722,7 +700,7 @@ GBool Catalog::labelToIndex(GooString *label, int *index)
       return gFalse;
   }
 
-  if (*index < 0 || *index >= numPages)
+  if (*index < 0 || *index >= getNumPages())
     return gFalse;
 
   return gTrue;
@@ -732,7 +710,7 @@ GBool Catalog::indexToLabel(int index, GooString *label)
 {
   char buffer[32];
 
-  if (index < 0 || index >= numPages)
+  if (index < 0 || index >= getNumPages())
     return gFalse;
 
   PageLabelInfo *pli = getPageLabelInfo();
@@ -848,6 +826,42 @@ EmbFile::EmbFile(Object *efDict, GooString *description)
     m_mimetype = new GooString();
 }
 
+int Catalog::getNumPages()
+{
+  if (numPages == -1)
+  {
+    Object catDict, pagesDict, obj;
+
+    xref->getCatalog(&catDict);
+    catDict.dictLookup("Pages", &pagesDict);
+    catDict.free();
+
+    // This should really be isDict("Pages"), but I've seen at least one
+    // PDF file where the /Type entry is missing.
+    if (!pagesDict.isDict()) {
+      error(-1, "Top-level pages object is wrong type (%s)",
+          pagesDict.getTypeName());
+      pagesDict.free();
+      return 0;
+    }
+
+    pagesDict.dictLookup("Count", &obj);
+    // some PDF files actually use real numbers here ("/Count 9.0")
+    if (!obj.isNum()) {
+      error(-1, "Page count in top-level pages object is wrong type (%s)",
+         obj.getTypeName());
+      numPages = 0;
+    } else {
+      numPages = (int)obj.getNum();
+    }
+
+    obj.free();
+    pagesDict.free();
+  }
+
+  return numPages;
+}
+
 PageLabelInfo *Catalog::getPageLabelInfo()
 {
   if (!pageLabelInfo) {
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 5a25109..8bca80b 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -148,7 +148,7 @@ public:
   GBool isOk() { return ok; }
 
   // Get number of pages.
-  int getNumPages() { return numPages; }
+  int getNumPages();
 
   // Get a page.
   Page *getPage(int i);
-- 
1.6.4.2


From dca9a449998af19cfba80b8f508fc75e96001d99 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 18:53:54 +0100
Subject: [PATCH 13/17] Get number of pages from linearization table

---
 poppler/PDFDoc.cc |    9 +++++++++
 poppler/PDFDoc.h  |    2 +-
 2 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 636447d..8ba0b1f 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1113,6 +1113,15 @@ Guint PDFDoc::getMainXRefEntriesOffset()
   return mainXRefEntriesOffset;
 }
 
+int PDFDoc::getNumPages()
+{
+  if (isLinearized()) {
+    return getLinearization()->getNumPages();
+  } else {
+    return catalog->getNumPages();
+  }
+}
+
 Page *PDFDoc::getPage(int page)
 {
   if ((page < 1) || page > getNumPages()) return NULL;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index ed0828c..ef1646f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -119,7 +119,7 @@ public:
     { return getPage(page) ? getPage(page)->getRotate() : 0 ; }
 
   // Get number of pages.
-  int getNumPages() { return catalog->getNumPages(); }
+  int getNumPages();
 
   // Return the contents of the metadata stream, or NULL if there is
   // no metadata.
-- 
1.6.4.2


From 4a82d18cc7cda49dc57289414da3ec15b20aa71b Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 4 Aug 2010 18:09:36 +0200
Subject: [PATCH 14/17] Keep security handler available in PDFDoc

---
 poppler/PDFDoc.cc |    4 ++--
 poppler/PDFDoc.h  |    2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 8ba0b1f..1f768d4 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -102,6 +102,7 @@ void PDFDoc::init()
   outline = NULL;
 #endif
   startXRefPos = ~(Guint)0;
+  secHdlr = NULL;
 }
 
 PDFDoc::PDFDoc()
@@ -274,6 +275,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 }
 
 PDFDoc::~PDFDoc() {
+  delete secHdlr;
 #ifndef DISABLE_OUTLINE
   if (outline) {
     delete outline;
@@ -370,7 +372,6 @@ void PDFDoc::checkHeader() {
 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
   Object encrypt;
   GBool encrypted;
-  SecurityHandler *secHdlr;
   GBool ret;
 
   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
@@ -390,7 +391,6 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword)
 	// authorization failed
 	ret = gFalse;
       }
-      delete secHdlr;
     } else {
       // couldn't find the matching security handler
       ret = gFalse;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index ef1646f..33f3c2b 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -50,6 +50,7 @@ class LinkAction;
 class LinkDest;
 class Outline;
 class Linearization;
+class SecurityHandler;
 
 enum PDFWriteMode {
   writeStandard,
@@ -261,6 +262,7 @@ private:
   int pdfMinorVersion;
   Linearization *linearization;
   XRef *xref;
+  SecurityHandler *secHdlr;
   Catalog *catalog;
 #ifndef DISABLE_OUTLINE
   Outline *outline;
-- 
1.6.4.2


From 18b910a07f2f5f7c72e673c993a956f62c2634df Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 22:03:27 +0100
Subject: [PATCH 15/17] Add hint tables support

---
 CMakeLists.txt      |    2 +
 poppler/Hints.cc    |  433 +++++++++++++++++++++++++++++++++++++++++++++++++++
 poppler/Hints.h     |   95 +++++++++++
 poppler/Makefile.am |    2 +
 poppler/PDFDoc.cc   |   14 ++
 poppler/PDFDoc.h    |    5 +
 6 files changed, 551 insertions(+), 0 deletions(-)
 create mode 100644 poppler/Hints.cc
 create mode 100644 poppler/Hints.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c25c45..b70c76c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,6 +247,7 @@ set(poppler_SRCS
   poppler/GfxFont.cc
   poppler/GfxState.cc
   poppler/GlobalParams.cc
+  poppler/Hints.cc
   poppler/JArithmeticDecoder.cc
   poppler/JBIG2Stream.cc
   poppler/Lexer.cc
@@ -394,6 +395,7 @@ if(ENABLE_XPDF_HEADERS)
     poppler/GfxState.h
     poppler/GfxState_helpers.h
     poppler/GlobalParams.h
+    poppler/Hints.h
     poppler/JArithmeticDecoder.h
     poppler/JBIG2Stream.h
     poppler/Lexer.h
diff --git a/poppler/Hints.cc b/poppler/Hints.cc
new file mode 100644
index 0000000..c2393ad
--- /dev/null
+++ b/poppler/Hints.cc
@@ -0,0 +1,433 @@
+//========================================================================
+//
+// Hints.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#include <config.h>
+
+#include "Hints.h"
+
+#include "Linearization.h"
+#include "Object.h"
+#include "Stream.h"
+#include "XRef.h"
+#include "Parser.h"
+#include "Lexer.h"
+#include "SecurityHandler.h"
+
+#include <limits.h>
+
+//------------------------------------------------------------------------
+// Hints
+//------------------------------------------------------------------------
+
+Hints::Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr)
+{
+  mainXRefEntriesOffset = linearization->getMainXRefEntriesOffset();
+  nPages = linearization->getNumPages();
+  pageFirst = linearization->getPageFirst();
+  pageEndFirst = linearization->getEndFirst();
+  pageObjectFirst = linearization->getObjectNumberFirst();
+  pageOffsetFirst = xref->getEntry(pageObjectFirst)->offset;
+
+  if (nPages >= INT_MAX / (int)sizeof(Guint)) {
+     error(-1, "Invalid number of pages (%d) for hints table", nPages);
+     nPages = 0;
+  }
+  nObjects = (Guint *) gmallocn(nPages, sizeof(Guint));
+  pageObjectNum = (Guint *) gmallocn(nPages, sizeof(Guint));
+  xRefOffset = (Guint *) gmallocn(nPages, sizeof(Guint));
+  pageLength = (Guint *) gmallocn(nPages, sizeof(Guint));
+  pageOffset = (Guint *) gmallocn(nPages, sizeof(Guint));
+  numSharedObject = (Guint *) gmallocn(nPages, sizeof(Guint));
+  sharedObjectId = (Guint **) gmallocn(nPages, sizeof(Guint*));
+  if (!nObjects || !pageObjectNum || !xRefOffset || !pageLength || !pageOffset ||
+      !numSharedObject || !sharedObjectId) {
+    error(-1, "Failed to allocate memory for hints tabel");
+    nPages = 0;
+  }
+
+  memset(numSharedObject, 0, nPages);
+
+  nSharedGroups = 0;
+  groupLength = NULL;
+  groupOffset = NULL;
+  groupHasSignature = NULL;
+  groupNumObjects = NULL;
+  groupXRefOffset = NULL;
+
+  readTables(str, linearization, xref, secHdlr);
+}
+
+Hints::~Hints()
+{
+  gfree(nObjects);
+  gfree(pageObjectNum);
+  gfree(xRefOffset);
+  gfree(pageLength);
+  gfree(pageOffset);
+  for (int i=0; i< nPages; i++) {
+    if (numSharedObject[i]) {
+       gfree(sharedObjectId[i]);
+    }
+  }
+  gfree(sharedObjectId);
+  gfree(numSharedObject);
+
+  gfree(groupLength);
+  gfree(groupOffset);
+  gfree(groupHasSignature);
+  gfree(groupNumObjects);
+  gfree(groupXRefOffset);
+}
+
+void Hints::readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr)
+{
+  hintsOffset = linearization->getHintsOffset();
+  hintsLength = linearization->getHintsLength();
+  hintsOffset2 = linearization->getHintsOffset2();
+  hintsLength2 = linearization->getHintsLength2();
+
+  Parser *parser;
+  Object obj;
+
+  int bufLength = hintsLength + hintsLength2;
+
+  char buf[bufLength];
+  char *p = buf;
+
+  obj.initNull();
+  Stream *s = str->makeSubStream(hintsOffset, gFalse, hintsLength, &obj);
+  s->reset();
+  for (Guint i=0; i < hintsLength; i++) { *p++ = s->getChar(); }
+  delete s;
+
+  if (hintsOffset2 && hintsLength2) {
+    obj.initNull();
+    s = str->makeSubStream(hintsOffset2, gFalse, hintsLength2, &obj);
+    s->reset();
+    for (Guint i=0; i < hintsLength2; i++) { *p++ = s->getChar(); }
+    delete s;
+  }
+
+  obj.initNull();
+  MemStream *memStream = new MemStream (buf, 0, bufLength, &obj);
+
+  obj.initNull();
+  parser = new Parser(xref, new Lexer(xref, memStream), gTrue);
+
+  int num, gen;
+  if (parser->getObj(&obj)->isInt() &&
+     (num = obj.getInt(), obj.free(), parser->getObj(&obj)->isInt()) &&
+     (gen = obj.getInt(), obj.free(), parser->getObj(&obj)->isCmd("obj")) &&
+     (obj.free(), parser->getObj(&obj,
+         secHdlr ? secHdlr->getFileKey() : (Guchar *)NULL,
+         secHdlr ? secHdlr->getEncAlgorithm() : cryptRC4,
+         secHdlr ? secHdlr->getFileKeyLength() : 0,
+         num, gen)->isStream())) {
+    Stream *hintsStream = obj.getStream();
+    Dict *hintsDict = obj.streamGetDict();
+
+    int sharedStreamOffset = 0;
+    if (hintsDict->lookupInt("S", NULL, &sharedStreamOffset) &&
+        sharedStreamOffset > 0) {
+
+        hintsStream->reset();
+        readPageOffsetTable(hintsStream);
+
+        hintsStream->reset();
+        for (int i=0; i<sharedStreamOffset; i++) hintsStream->getChar();
+        readSharedObjectsTable(hintsStream);
+    } else {
+      error(-1, "Invalid shared object hint table offset");
+    }
+  } else {
+    error(-1, "Failed parsing hints table object");
+  }
+  obj.free();
+
+  delete parser;
+}
+
+void Hints::readPageOffsetTable(Stream *str)
+{
+  if (nPages < 1) {
+    error(-1, "Invalid number of pages reading page offset hints table");
+    return;
+  }
+
+  inputBits = 0; // reset on byte boundary.
+
+  nObjectLeast = readBits(32, str);
+
+  objectOffsetFirst = readBits(32, str);
+  if (objectOffsetFirst >= hintsOffset) objectOffsetFirst += hintsLength;
+
+  nBitsDiffObjects = readBits(16, str);
+
+  pageLengthLeast = readBits(32, str);
+
+  nBitsDiffPageLength = readBits(16, str);
+
+  OffsetStreamLeast = readBits(32, str);
+
+  nBitsOffsetStream = readBits(16, str);
+
+  lengthStreamLeast = readBits(32, str);
+
+  nBitsLengthStream = readBits(16, str);
+
+  nBitsNumShared = readBits(16, str);
+
+  nBitsShared = readBits(16, str);
+
+  nBitsNumerator = readBits(16, str);
+
+  denominator = readBits(16, str);
+
+  for (int i=0; i<nPages; i++) {
+    nObjects[i] = nObjectLeast + readBits(nBitsDiffObjects, str);
+  }
+
+  nObjects[0] = 0;
+  xRefOffset[0] = mainXRefEntriesOffset + 20;
+  for (int i=1; i<nPages; i++) {
+    xRefOffset[i] = xRefOffset[i-1] + 20*nObjects[i-1];
+  }
+
+  pageObjectNum[0] = 1;
+  for (int i=1; i<nPages; i++) {
+    pageObjectNum[i] = pageObjectNum[i-1] + nObjects[i-1];
+  }
+  pageObjectNum[0] = pageObjectFirst;
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (int i=0; i<nPages; i++) {
+    pageLength[i] = pageLengthLeast + readBits(nBitsDiffPageLength, str);
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  numSharedObject[0] = readBits(nBitsNumShared, str);
+  numSharedObject[0] = 0; // Do not trust the read value to be 0.
+  sharedObjectId[0] = NULL;
+  for (int i=1; i<nPages; i++) {
+    numSharedObject[i] = readBits(nBitsNumShared, str);
+    if (numSharedObject[i] >= INT_MAX / (int)sizeof(Guint)) {
+       error(-1, "Invalid number of shared objects");
+       numSharedObject[i] = 0;
+       return;
+    }
+    sharedObjectId[i] = (Guint *) gmallocn(numSharedObject[i], sizeof(Guint));
+    if (numSharedObject[i] && !sharedObjectId[i]) {
+       error(-1, "Failed to allocate memory for shared object IDs");
+       numSharedObject[i] = 0;
+       return;
+    }
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (int i=1; i<nPages; i++) {
+    for (Guint j=0; j < numSharedObject[i]; j++) {
+      sharedObjectId[i][j] = readBits(nBitsShared, str);
+    }
+  }
+
+  pageOffset[0] = pageOffsetFirst;
+  // find pageOffsets.
+  for (int i=1; i<nPages; i++) {
+    pageOffset[i] = pageOffset[i-1] + pageLength[i-1];
+  }
+
+}
+
+void Hints::readSharedObjectsTable(Stream *str)
+{
+  inputBits = 0; // reset on byte boundary.
+
+  Guint firstSharedObjectNumber = readBits(32, str);
+
+  Guint firstSharedObjectOffset = readBits(32, str);
+  firstSharedObjectOffset += hintsLength;
+
+  Guint nSharedGroupsFirst = readBits(32, str);
+
+  Guint nSharedGroups = readBits(32, str);
+
+  Guint nBitsNumObjects = readBits(16, str);
+
+  Guint groupLengthLeast = readBits(32, str);
+
+  Guint nBitsDiffGroupLength = readBits(16, str);
+
+  if ((!nSharedGroups) || (nSharedGroups >= INT_MAX / (int)sizeof(Guint))) {
+     error(-1, "Invalid number of shared object groups");
+     nSharedGroups = 0;
+     return;
+  }
+  if ((!nSharedGroupsFirst) || (nSharedGroupsFirst > nSharedGroups)) {
+     error(-1, "Invalid number of first page shared object groups");
+     nSharedGroupsFirst = nSharedGroups;
+  }
+
+  groupLength = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupOffset = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupHasSignature = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupNumObjects = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  groupXRefOffset = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+  if (!groupLength || !groupOffset || !groupHasSignature ||
+      !groupNumObjects || !groupXRefOffset) {
+     error(-1, "Failed to allocate memory for shared object groups");
+     nSharedGroups = 0;
+     return;
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    groupLength[i] = groupLengthLeast + readBits(nBitsDiffGroupLength, str);
+  }
+
+  groupOffset[0] = objectOffsetFirst;
+  for (Guint i=1; i<nSharedGroupsFirst; i++) {
+    groupOffset[i] = groupOffset[i-1] + groupLength[i-1];
+  }
+  if (nSharedGroups > nSharedGroupsFirst ) {
+    groupOffset[nSharedGroupsFirst] = firstSharedObjectOffset;
+    for (Guint i=nSharedGroupsFirst+1; i<nSharedGroups; i++) {
+      groupOffset[i] = groupOffset[i-1] + groupLength[i-1];
+    }
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    groupHasSignature[i] = readBits(1, str);
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    if (groupHasSignature[i]) {
+       readBits(128, str);
+    }
+  }
+
+  inputBits = 0; // reset on byte boundary. Not in specs!
+  for (Guint i=0; i<nSharedGroups; i++) {
+    groupNumObjects[i] =
+       nBitsNumObjects ? 1 + readBits(nBitsNumObjects, str) : 1;
+  }
+
+  for (Guint i=0; i<nSharedGroupsFirst; i++) {
+    groupNumObjects[i] = 0;
+    groupXRefOffset[i] = 0;
+  }
+  if (nSharedGroups > nSharedGroupsFirst ) {
+    groupXRefOffset[nSharedGroupsFirst] =
+        mainXRefEntriesOffset + 20*firstSharedObjectNumber;
+    for (Guint i=nSharedGroupsFirst+1; i<nSharedGroups; i++) {
+      groupXRefOffset[i] = groupXRefOffset[i-1] + 20*groupNumObjects[i-1];
+    }
+  }
+}
+
+Guint Hints::getPageOffset(int page)
+{
+  if ((page < 1) || (page > nPages)) return 0;
+
+  if (page-1 > pageFirst)
+    return pageOffset[page-1];
+  else if (page-1 < pageFirst)
+    return pageOffset[page];
+  else
+    return pageOffset[0];
+}
+
+GooVector<ByteRange>* Hints::getPageRanges(int page)
+{
+  if ((page < 1) || (page > nPages)) return NULL;
+
+  int idx;
+  if (page-1 > pageFirst)
+     idx = page-1;
+  else if (page-1 < pageFirst)
+     idx = page;
+  else
+     idx = 0;
+
+  ByteRange pageRange;
+  GooVector<ByteRange> *v = new GooVector<ByteRange>;
+
+  pageRange.offset = pageOffset[idx];
+  pageRange.length = pageLength[idx];
+  v->push_back(pageRange);
+
+  pageRange.offset = xRefOffset[idx];
+  pageRange.length = 20*nObjects[idx];
+  v->push_back(pageRange);
+
+  for (Guint j=0; j<numSharedObject[idx]; j++) {
+     Guint k = sharedObjectId[idx][j];
+
+     pageRange.offset = groupOffset[k];
+     pageRange.length = groupLength[k];
+     v->push_back(pageRange);
+
+     pageRange.offset = groupXRefOffset[k];
+     pageRange.length = 20*groupNumObjects[k];
+     v->push_back(pageRange);
+  }
+
+  return v;
+}
+
+Guint Hints::readBit(Stream *str)
+{
+  Guint bit;
+  int c;
+
+  if (inputBits == 0) {
+    if ((c = str->getChar()) == EOF) {
+      return (Guint) -1;
+    }
+    bitsBuffer = c;
+    inputBits = 8;
+  }
+  bit = (bitsBuffer >> (inputBits - 1)) & 1;
+  --inputBits;
+  return bit;
+}
+
+Guint Hints::readBits(int n, Stream *str)
+{
+  Guint bit, bits;
+
+  if (n < 0) return -1;
+  if (n == 0) return 0;
+
+  if (n == 1)
+    return readBit(str);
+
+  bit = (readBit(str) << (n-1));
+  if (bit == (Guint) -1)
+    return -1;
+
+  bits = readBits(n-1, str);
+  if (bits == (Guint) -1)
+    return -1;
+
+  return bit | bits;
+}
+
+Guint Hints::getPageObjectNum(int page) {
+  if ((page < 1) || (page > nPages)) return 0;
+
+  if (page-1 > pageFirst)
+    return pageObjectNum[page-1];
+  else if (page-1 < pageFirst)
+    return pageObjectNum[page];
+  else
+    return pageObjectNum[0];
+}
diff --git a/poppler/Hints.h b/poppler/Hints.h
new file mode 100644
index 0000000..85335a7
--- /dev/null
+++ b/poppler/Hints.h
@@ -0,0 +1,95 @@
+//========================================================================
+//
+// Hints.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#ifndef HINTS_H
+#define HINTS_H
+
+#include <string.h>
+#include "goo/gtypes.h"
+#include "goo/GooVector.h"
+//#include <vector>
+#include "PDFDoc.h"
+
+class Stream;
+class BaseStream;
+class Linearization;
+class XRef;
+
+//------------------------------------------------------------------------
+// Hints
+//------------------------------------------------------------------------
+
+class Hints {
+public:
+
+  Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
+  ~Hints();
+
+  Guint getPageObjectNum(int page);
+  Guint getPageOffset(int page);
+  GooVector<ByteRange>* getPageRanges(int page);
+
+private:
+
+  void readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
+  void readPageOffsetTable(Stream *str);
+  void readSharedObjectsTable(Stream *str);
+
+  Guint readBit(Stream *str);
+  Guint readBits(int n, Stream *str);
+
+  Guint hintsOffset;
+  Guint hintsLength;
+  Guint hintsOffset2;
+  Guint hintsLength2;
+  Guint mainXRefEntriesOffset;
+
+  int nPages;
+  int pageFirst;
+  Guint pageObjectFirst;
+  Guint pageOffsetFirst;
+  Guint pageEndFirst;
+  int objectNumberFirst;
+
+  Guint nObjectLeast;
+  Guint objectOffsetFirst;
+  Guint nBitsDiffObjects;
+  Guint pageLengthLeast;
+  Guint nBitsDiffPageLength;
+  Guint OffsetStreamLeast;
+  Guint nBitsOffsetStream;
+  Guint lengthStreamLeast;
+  Guint nBitsLengthStream;
+  Guint nBitsNumShared;
+  Guint nBitsShared;
+  Guint nBitsNumerator;
+  Guint denominator;
+
+  Guint *nObjects;
+  Guint *pageObjectNum;
+  Guint *xRefOffset;
+  Guint *pageLength;
+  Guint *pageOffset;
+  Guint *numSharedObject;
+  Guint **sharedObjectId;
+
+  Guint nSharedGroups;
+  Guint *groupLength;
+  Guint *groupOffset;
+  Guint *groupHasSignature;
+  Guint *groupNumObjects;
+  Guint *groupXRefOffset;
+
+  int inputBits;
+  char bitsBuffer;
+
+};
+
+#endif
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index bb6daa6..4147a92 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -206,6 +206,7 @@ poppler_include_HEADERS =	\
 	GfxState.h		\
 	GfxState_helpers.h	\
 	GlobalParams.h		\
+	Hints.h			\
 	JArithmeticDecoder.h	\
 	JBIG2Stream.h		\
 	Lexer.h			\
@@ -285,6 +286,7 @@ libpoppler_la_SOURCES =		\
 	GfxFont.cc 		\
 	GfxState.cc		\
 	GlobalParams.cc		\
+	Hints.cc		\
 	JArithmeticDecoder.cc	\
 	JBIG2Stream.cc		\
 	Lexer.cc 		\
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 1f768d4..85df238 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -70,6 +70,7 @@
 #include "Outline.h"
 #endif
 #include "PDFDoc.h"
+#include "Hints.h"
 
 //------------------------------------------------------------------------
 
@@ -98,6 +99,7 @@ void PDFDoc::init()
   xref = NULL;
   linearization = NULL;
   catalog = NULL;
+  hints = NULL;
 #ifndef DISABLE_OUTLINE
   outline = NULL;
 #endif
@@ -287,6 +289,9 @@ PDFDoc::~PDFDoc() {
   if (xref) {
     delete xref;
   }
+  if (hints) {
+    delete hints;
+  }
   if (linearization) {
     delete linearization;
   }
@@ -551,6 +556,15 @@ GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
   return gFalse;
 }
 
+Hints *PDFDoc::getHints()
+{
+  if (!hints && isLinearized()) {
+    hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
+  }
+
+  return hints;
+}
+
 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
   FILE *f;
   OutStream *outStr;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 33f3c2b..f04e59f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -51,6 +51,7 @@ class LinkDest;
 class Outline;
 class Linearization;
 class SecurityHandler;
+class Hints;
 
 enum PDFWriteMode {
   writeStandard,
@@ -241,6 +242,9 @@ private:
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
+  // Get hints.
+  Hints *getHints();
+
   PDFDoc();
   void init();
   GBool setup(GooString *ownerPassword, GooString *userPassword);
@@ -264,6 +268,7 @@ private:
   XRef *xref;
   SecurityHandler *secHdlr;
   Catalog *catalog;
+  Hints *hints;
 #ifndef DISABLE_OUTLINE
   Outline *outline;
 #endif
-- 
1.6.4.2


From c5888b41912227e1dfb3a2bf509128a1dee03ea0 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:06:02 +0200
Subject: [PATCH 16/17] Use hint tables for PDFDoc::getPage()

---
 poppler/PDFDoc.cc |   61 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 poppler/PDFDoc.h  |    3 ++
 2 files changed, 64 insertions(+), 0 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 85df238..9d5a40a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -105,6 +105,7 @@ void PDFDoc::init()
 #endif
   startXRefPos = ~(Guint)0;
   secHdlr = NULL;
+  pageCache = NULL;
 }
 
 PDFDoc::PDFDoc()
@@ -277,6 +278,14 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 }
 
 PDFDoc::~PDFDoc() {
+  if (pageCache) {
+    for (int i = 0; i < getNumPages(); i++) {
+      if (pageCache[i]) {
+        delete pageCache[i];
+      }
+    }
+    gfree(pageCache);
+  }
   delete secHdlr;
 #ifndef DISABLE_OUTLINE
   if (outline) {
@@ -1136,9 +1145,61 @@ int PDFDoc::getNumPages()
   }
 }
 
+Page *PDFDoc::parsePage(int page)
+{
+  Page *p = NULL;
+  Object obj;
+  Ref pageRef;
+  Dict *pageDict;
+
+  pageRef.num = (int) getHints()->getPageObjectNum(page);
+  if (!pageRef.num) {
+    error(-1, "Failed to get object num from hint tables for page %d", page);
+    return NULL;
+  }
+
+  // check for bogus ref - this can happen in corrupted PDF files
+  if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
+    error(-1, "Invalid object num (%d) for page %d", pageRef.num, page);
+    return NULL;
+  }
+
+  pageRef.gen = xref->getEntry(pageRef.num)->gen;
+  xref->fetch(pageRef.num, pageRef.gen, &obj);
+  if (!obj.isDict()) {
+    obj.free();
+    error(-1, "Object (%d %d) is not a pageDict", pageRef.num, pageRef.gen);
+    return NULL;
+  }
+  pageDict = obj.getDict();
+
+  p = new Page(xref, page, pageDict, pageRef,
+               new PageAttrs(NULL, pageDict), catalog->getForm());
+  obj.free();
+
+  return p;
+}
+
 Page *PDFDoc::getPage(int page)
 {
   if ((page < 1) || page > getNumPages()) return NULL;
 
+  if (isLinearized()) {
+    if (!pageCache) {
+      pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
+      for (int i = 0; i < getNumPages(); i++) {
+        pageCache[i] = NULL;
+      }
+    }
+    if (!pageCache[page-1]) {
+      pageCache[page-1] = parsePage(page);
+    }
+    if (pageCache[page-1]) {
+       return pageCache[page-1];
+    } else {
+       error(-1, "Failed parsing page %d using hint tables", page);
+    }
+  }
+
   return catalog->getPage(page);
 }
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index f04e59f..a7113c8 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -242,6 +242,8 @@ private:
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
+  Page *parsePage(int page);
+
   // Get hints.
   Hints *getHints();
 
@@ -272,6 +274,7 @@ private:
 #ifndef DISABLE_OUTLINE
   Outline *outline;
 #endif
+  Page **pageCache;
 
   GBool ok;
   int errCode;
-- 
1.6.4.2


From 2dec9534ac66999f3833259e05ee6c77f441769e Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 13:08:11 +0100
Subject: [PATCH 17/17] Fill CachedFileStream buffer in a smarter manner

This avoids downloading too many chunks by buffering on chunk boundries.
---
 poppler/CachedFile.h |    2 +-
 poppler/Stream.cc    |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/poppler/CachedFile.h b/poppler/CachedFile.h
index 897ff4a..e1ff817 100644
--- a/poppler/CachedFile.h
+++ b/poppler/CachedFile.h
@@ -24,7 +24,7 @@
 
 //------------------------------------------------------------------------
 
-#define CachedFileChunkSize 8192
+#define CachedFileChunkSize 8192 // This should be a multiple of cachedStreamBufSize
 
 class GooString;
 class CachedFileLoader;
diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index fbf2b33..93cc27b 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -875,7 +875,7 @@ GBool CachedFileStream::fillBuf()
   if (limited && bufPos + cachedStreamBufSize > start + length) {
     n = start + length - bufPos;
   } else {
-    n = cachedStreamBufSize;
+    n = cachedStreamBufSize - (bufPos % cachedStreamBufSize);
   }
   cc->read(buf, 1, n);
   bufEnd = buf + n;
-- 
1.6.4.2


More information about the poppler mailing list