[poppler] Linearization support
Hib Eris
hib at hiberis.nl
Wed Aug 4 14:47:24 PDT 2010
Hi all,
On Sat, Jun 12, 2010 at 12:37 PM, Hib Eris <hib at hiberis.nl> wrote:
> Hi all,
>
> Now that 0.14.0 is out and feature freeze is over, I have updated my
> linearization patches
> (see http://lists.freedesktop.org/archives/poppler/2010-April/005760.html)
> to current master.
>
> Any comments on it are very welcome.
I have updated my patches again as they no longer applied to current
git master. I have also fixed some errors I found with test documents.
I would appreciate it if anyone could test these patches against other
PDF documents.
Cheers,
Hib
-------------- next part --------------
From 5c4d917156a5b46f7b78972a39c7dec6ee4817d9 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 6 Apr 2010 19:24:42 +0200
Subject: [PATCH 01/12] Cleanup XRef constructors
---
poppler/XRef.cc | 14 ++++++--------
poppler/XRef.h | 1 +
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index a9cf571..bc6eb8e 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -258,7 +258,7 @@ Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
// XRef
//------------------------------------------------------------------------
-XRef::XRef() {
+void XRef::init() {
ok = gTrue;
errCode = errNone;
entries = NULL;
@@ -268,17 +268,15 @@ XRef::XRef() {
objStrs = new PopplerCache(5);
}
+XRef::XRef() {
+ init();
+}
+
XRef::XRef(BaseStream *strA) {
Guint pos;
Object obj;
- ok = gTrue;
- errCode = errNone;
- size = 0;
- entries = NULL;
- streamEnds = NULL;
- streamEndsLen = 0;
- objStrs = new PopplerCache(5);
+ init();
encrypted = gFalse;
permFlags = defPermFlags;
diff --git a/poppler/XRef.h b/poppler/XRef.h
index be19e23..4e3f403 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -156,6 +156,7 @@ private:
Guchar fileKey[16]; // file decryption key
GBool ownerPasswordOk; // true if owner password is correct
+ void init();
Guint getStartXref();
GBool readXRef(Guint *pos, GooVector<Guint> *followedXRefStm);
GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
--
1.6.4.2
From bf64216a7276092dece1c015215a42dff724652c Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 6 Apr 2010 19:16:45 +0200
Subject: [PATCH 02/12] Create no more XRef entries than specified
---
poppler/XRef.cc | 136 ++++++++++++++++++++++++++++---------------------------
poppler/XRef.h | 5 ++-
2 files changed, 73 insertions(+), 68 deletions(-)
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index bc6eb8e..b7d01b3 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -262,6 +262,7 @@ void XRef::init() {
ok = gTrue;
errCode = errNone;
entries = NULL;
+ capacity = 0;
size = 0;
streamEnds = NULL;
streamEndsLen = 0;
@@ -344,6 +345,56 @@ XRef::~XRef() {
}
}
+int XRef::reserve(int newSize)
+{
+ if (newSize > capacity) {
+
+ int realNewSize;
+ for (realNewSize = capacity ? 2 * capacity : 1024;
+ newSize > realNewSize && realNewSize > 0;
+ realNewSize <<= 1) ;
+ if ((realNewSize < 0) ||
+ (realNewSize >= INT_MAX / (int)sizeof(XRefEntry))) {
+ return 0;
+ }
+
+ void *p = greallocn_checkoverflow(entries, realNewSize, sizeof(XRefEntry));
+ if (p == NULL) {
+ return 0;
+ }
+
+ entries = (XRefEntry *) p;
+ capacity = realNewSize;
+
+ }
+
+ return capacity;
+}
+
+int XRef::resize(int newSize)
+{
+ if (newSize > size) {
+
+ if (reserve(newSize) < newSize) return size;
+
+ for (int i = size; i < newSize; ++i) {
+ entries[i].offset = 0xffffffff;
+ entries[i].type = xrefEntryFree;
+ entries[i].obj.initNull ();
+ entries[i].updated = false;
+ entries[i].gen = 0;
+ }
+ } else {
+ for (int i = newSize; i < size; i++) {
+ entries[i].obj.free ();
+ }
+ }
+
+ size = newSize;
+
+ return size;
+}
+
// Read the 'startxref' position.
Guint XRef::getStartXref() {
char buf[xrefSearchSize+1];
@@ -431,7 +482,7 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
GBool more;
Object obj, obj2;
Guint pos2;
- int first, n, newSize, i;
+ int first, n, i;
while (1) {
parser->getObj(&obj);
@@ -450,29 +501,13 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
n = obj.getInt();
obj.free();
if (first < 0 || n < 0 || first + n < 0) {
- goto err1;
+ goto err0;
}
if (first + n > size) {
- for (newSize = size ? 2 * size : 1024;
- first + n > newSize && newSize > 0;
- newSize <<= 1) ;
- if (newSize < 0) {
- goto err1;
- }
- if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
+ if (resize(first + n) != first + n) {
error(-1, "Invalid 'obj' parameters'");
- goto err1;
+ goto err0;
}
-
- entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
- for (i = size; i < newSize; ++i) {
- entries[i].offset = 0xffffffff;
- entries[i].type = xrefEntryFree;
- entries[i].obj.initNull ();
- entries[i].updated = false;
- entries[i].gen = 0;
- }
- size = newSize;
}
for (i = first; i < first + n; ++i) {
if (!parser->getObj(&obj)->isInt()) {
@@ -561,6 +596,7 @@ GBool XRef::readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followed
err1:
obj.free();
+ err0:
ok = gFalse;
return gFalse;
}
@@ -583,19 +619,10 @@ GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
goto err1;
}
if (newSize > size) {
- if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
- error(-1, "Invalid 'size' parameter.");
- return gFalse;
- }
- entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
- for (i = size; i < newSize; ++i) {
- entries[i].offset = 0xffffffff;
- entries[i].type = xrefEntryFree;
- entries[i].obj.initNull ();
- entries[i].updated = false;
- entries[i].gen = 0;
+ if (resize(newSize) != newSize) {
+ error(-1, "Invalid 'size' parameter");
+ goto err0;
}
- size = newSize;
}
if (!dict->lookupNF("W", &obj)->isArray() ||
@@ -668,31 +695,16 @@ GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
Guint offset;
- int type, gen, c, newSize, i, j;
+ int type, gen, c, i, j;
if (first + n < 0) {
return gFalse;
}
if (first + n > size) {
- for (newSize = size ? 2 * size : 1024;
- first + n > newSize && newSize > 0;
- newSize <<= 1) ;
- if (newSize < 0) {
- return gFalse;
- }
- if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
- error(-1, "Invalid 'size' inside xref table.");
+ if (resize(first + n) != size) {
+ error(-1, "Invalid 'size' inside xref table");
return gFalse;
}
- entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
- for (i = size; i < newSize; ++i) {
- entries[i].offset = 0xffffffff;
- entries[i].type = xrefEntryFree;
- entries[i].obj.initNull ();
- entries[i].updated = false;
- entries[i].gen = 0;
- }
- size = newSize;
}
for (i = first; i < first + n; ++i) {
if (w[0] == 0) {
@@ -753,13 +765,13 @@ GBool XRef::constructXRef() {
int newSize;
int streamEndsSize;
char *p;
- int i;
GBool gotRoot;
char* token = NULL;
bool oneCycle = true;
int offset = 0;
gfree(entries);
+ capacity = 0;
size = 0;
entries = NULL;
@@ -841,23 +853,10 @@ GBool XRef::constructXRef() {
error(-1, "Bad object number");
return gFalse;
}
- if (newSize >= INT_MAX / (int)sizeof(XRefEntry)) {
- error(-1, "Invalid 'obj' parameters.");
+ if (resize(newSize) != newSize) {
+ error(-1, "Invalid 'obj' parameters");
return gFalse;
}
- entries = (XRefEntry *)
- greallocn_checkoverflow(entries, newSize, sizeof(XRefEntry));
- if (entries == NULL) {
- size = 0;
- return gFalse;
- }
- for (i = size; i < newSize; ++i) {
- entries[i].offset = 0xffffffff;
- entries[i].type = xrefEntryFree;
- entries[i].obj.initNull ();
- entries[i].updated = false;
- }
- size = newSize;
}
if (entries[num].type == xrefEntryFree ||
gen >= entries[num].gen) {
@@ -1146,7 +1145,10 @@ Guint XRef::strToUnsigned(char *s) {
void XRef::add(int num, int gen, Guint offs, GBool used) {
if (num >= size) {
- entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry));
+ if (num >= capacity) {
+ entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry));
+ capacity = num + 1;
+ }
for (int i = size; i < num + 1; ++i) {
entries[i].offset = 0xffffffff;
entries[i].type = xrefEntryFree;
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 4e3f403..dbce7a3 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -137,7 +137,8 @@ private:
Guint start; // offset in file (to allow for garbage
// at beginning of file)
XRefEntry *entries; // xref entries
- int size; // size of <entries> array
+ int capacity; // size of <entries> array
+ int size; // number of entries
int rootNum, rootGen; // catalog dict
GBool ok; // true if xref table is valid
int errCode; // error code (if <ok> is false)
@@ -157,6 +158,8 @@ private:
GBool ownerPasswordOk; // true if owner password is correct
void init();
+ int reserve(int newSize);
+ int resize(int newSize);
Guint getStartXref();
GBool readXRef(Guint *pos, GooVector<Guint> *followedXRefStm);
GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
--
1.6.4.2
From b93ef777488fedd4a51d11780fef58d8e39e87a9 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 28 Apr 2010 12:45:42 +0200
Subject: [PATCH 03/12] Use XRef::add() in XRef::addIndirectObject()
---
poppler/XRef.cc | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index b7d01b3..4bf858f 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -1190,10 +1190,8 @@ Ref XRef::addIndirectObject (Object* o) {
XRefEntry *e;
if (entryIndexToUse == -1) {
entryIndexToUse = size;
- size++;
- entries = (XRefEntry *)greallocn(entries, size, sizeof(XRefEntry));
+ add(entryIndexToUse, 0, 0, gFalse);
e = &entries[entryIndexToUse];
- e->gen = 0;
} else {
//reuse a free entry
e = &entries[entryIndexToUse];
--
1.6.4.2
From e1a10f961b67e59b516058b18e33fe2b16b333c4 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 14 Apr 2010 12:20:49 +0200
Subject: [PATCH 04/12] Use XRef::getEntry() to access entries
---
poppler/XRef.cc | 49 +++++++++++++++++++++++++------------------------
poppler/XRef.h | 2 +-
2 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 4bf858f..3094b5a 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -978,7 +978,7 @@ Object *XRef::fetch(int num, int gen, Object *obj) {
goto err;
}
- e = &entries[num];
+ e = getEntry(num);
if(!e->obj.isNull ()) { //check for updated object
obj = e->obj.copy(obj);
return obj;
@@ -1110,20 +1110,20 @@ GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
return gTrue;
}
-int XRef::getNumEntry(Guint offset) const
+int XRef::getNumEntry(Guint offset)
{
if (size > 0)
{
int res = 0;
- Guint resOffset = entries[0].offset;
- XRefEntry e;
+ Guint resOffset = getEntry(0)->offset;
+ XRefEntry *e;
for (int i = 1; i < size; ++i)
{
- e = entries[i];
- if (e.offset < offset && e.offset >= resOffset)
+ e = getEntry(i);
+ if (e->offset < offset && e->offset >= resOffset)
{
res = i;
- resOffset = e.offset;
+ resOffset = e->offset;
}
}
return res;
@@ -1158,7 +1158,7 @@ void XRef::add(int num, int gen, Guint offs, GBool used) {
}
size = num + 1;
}
- XRefEntry *e = &entries[num];
+ XRefEntry *e = getEntry(num);
e->gen = gen;
e->obj.initNull ();
e->updated = false;
@@ -1176,25 +1176,26 @@ void XRef::setModifiedObject (Object* o, Ref r) {
error(-1,"XRef::setModifiedObject on unknown ref: %i, %i\n", r.num, r.gen);
return;
}
- entries[r.num].obj.free();
- o->copy(&entries[r.num].obj);
- entries[r.num].updated = true;
+ XRefEntry *e = getEntry(r.num);
+ e->obj.free();
+ o->copy(&(e->obj));
+ e->updated = true;
}
Ref XRef::addIndirectObject (Object* o) {
int entryIndexToUse = -1;
for (int i = 1; entryIndexToUse == -1 && i < size; ++i) {
- if (entries[i].type == xrefEntryFree) entryIndexToUse = i;
+ if (getEntry(i)->type == xrefEntryFree) entryIndexToUse = i;
}
XRefEntry *e;
if (entryIndexToUse == -1) {
entryIndexToUse = size;
add(entryIndexToUse, 0, 0, gFalse);
- e = &entries[entryIndexToUse];
+ e = getEntry(entryIndexToUse);
} else {
//reuse a free entry
- e = &entries[entryIndexToUse];
+ e = getEntry(entryIndexToUse);
//we don't touch gen number, because it should have been
//incremented when the object was deleted
}
@@ -1210,13 +1211,13 @@ Ref XRef::addIndirectObject (Object* o) {
void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
//create free entries linked-list
- if (entries[0].gen != 65535) {
+ if (getEntry(0)->gen != 65535) {
error(-1, "XRef::writeToFile, entry 0 of the XRef is invalid (gen != 65535)\n");
}
int lastFreeEntry = 0;
for (int i=0; i<size; i++) {
- if (entries[i].type == xrefEntryFree) {
- entries[lastFreeEntry].offset = i;
+ if (getEntry(i)->type == xrefEntryFree) {
+ getEntry(lastFreeEntry)->offset = i;
lastFreeEntry = i;
}
}
@@ -1226,10 +1227,10 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
outStr->printf("xref\r\n");
outStr->printf("%i %i\r\n", 0, size);
for (int i=0; i<size; i++) {
- XRefEntry &e = entries[i];
+ XRefEntry *e = getEntry(i);
- if(e.gen > 65535) e.gen = 65535; //cap generation number to 65535 (required by PDFReference)
- outStr->printf("%010i %05i %c\r\n", e.offset, e.gen, (e.type==xrefEntryFree)?'f':'n');
+ if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference)
+ outStr->printf("%010i %05i %c\r\n", e->offset, e->gen, (e->type==xrefEntryFree)?'f':'n');
}
} else {
//write the new xref
@@ -1238,16 +1239,16 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
while (i < size) {
int j;
for(j=i; j<size; j++) { //look for consecutive entries
- if ((entries[j].type == xrefEntryFree) && (entries[j].gen == 0))
+ if ((getEntry(j)->type == xrefEntryFree) && (getEntry(j)->gen == 0))
break;
}
if (j-i != 0)
{
outStr->printf("%i %i\r\n", i, j-i);
for (int k=i; k<j; k++) {
- XRefEntry &e = entries[k];
- if(e.gen > 65535) e.gen = 65535; //cap generation number to 65535 (required by PDFReference)
- outStr->printf("%010i %05i %c\r\n", e.offset, e.gen, (e.type==xrefEntryFree)?'f':'n');
+ XRefEntry *e = getEntry(k);
+ if(e->gen > 65535) e->gen = 65535; //cap generation number to 65535 (required by PDFReference)
+ outStr->printf("%010i %05i %c\r\n", e->offset, e->gen, (e->type==xrefEntryFree)?'f':'n');
}
i = j;
}
diff --git a/poppler/XRef.h b/poppler/XRef.h
index dbce7a3..df8993f 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -118,7 +118,7 @@ public:
GBool getStreamEnd(Guint streamStart, Guint *streamEnd);
// Retuns the entry that belongs to the offset
- int getNumEntry(Guint offset) const;
+ int getNumEntry(Guint offset);
// Direct access.
int getSize() { return size; }
--
1.6.4.2
From a3472cefec89512572f8ff3cc861bbe8649ed7f7 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 15 Apr 2010 17:34:13 +0200
Subject: [PATCH 05/12] Read XRef table sections on demand
---
poppler/XRef.cc | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----
poppler/XRef.h | 6 +++-
2 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 3094b5a..9aa9500 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -274,7 +274,6 @@ XRef::XRef() {
}
XRef::XRef(BaseStream *strA) {
- Guint pos;
Object obj;
init();
@@ -286,11 +285,11 @@ XRef::XRef(BaseStream *strA) {
// read the trailer
str = strA;
start = str->getStart();
- pos = getStartXref();
+ prevXRefOffset = pos;
// if there was a problem with the 'startxref' position, try to
// reconstruct the xref table
- if (pos == 0) {
+ if (prevXRefOffset == 0) {
if (!(ok = constructXRef())) {
errCode = errDamaged;
return;
@@ -299,7 +298,7 @@ XRef::XRef(BaseStream *strA) {
// read the xref table
} else {
GooVector<Guint> followedXRefStm;
- while (readXRef(&pos, &followedXRefStm)) ;
+ readXRef(&prevXRefOffset, &followedXRefStm);
// if there was a problem with the xref table,
// try to reconstruct it
@@ -311,6 +310,18 @@ XRef::XRef(BaseStream *strA) {
}
}
+ // set size according to trailer dict
+ trailerDict.dictLookupNF("Size", &obj);
+ if (obj.isInt() && (resize(obj.getInt()) == obj.getInt())) {
+ obj.free();
+ } else {
+ obj.free();
+ if (!(ok = constructXRef())) {
+ errCode = errDamaged;
+ return;
+ }
+ }
+
// get the root dictionary (catalog) object
trailerDict.dictLookupNF("Root", &obj);
if (obj.isRef()) {
@@ -379,7 +390,7 @@ int XRef::resize(int newSize)
for (int i = size; i < newSize; ++i) {
entries[i].offset = 0xffffffff;
- entries[i].type = xrefEntryFree;
+ entries[i].type = xrefEntryNone;
entries[i].obj.initNull ();
entries[i].updated = false;
entries[i].gen = 0;
@@ -1257,3 +1268,42 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
}
}
+XRefEntry *XRef::getEntry(int i)
+{
+ if (entries[i].type == xrefEntryNone) {
+
+ GooVector<Guint> followedPrev;
+ while (prevXRefOffset && entries[i].type == xrefEntryNone) {
+ bool ok = true;
+ for (size_t j = 0; j < followedPrev.size(); j++) {
+ if (followedPrev.at(j) == prevXRefOffset) {
+ ok = false;
+ break;
+ }
+ }
+ if (!ok) {
+ error(-1, "Circular XRef");
+ if (!(ok = constructXRef())) {
+ errCode = errDamaged;
+ }
+ break;
+ }
+
+ followedPrev.push_back (prevXRefOffset);
+
+ GooVector<Guint> followedXRefStm;
+ if (!readXRef(&prevXRefOffset, &followedXRefStm)) {
+ prevXRefOffset = 0;
+ }
+ }
+
+ if (entries[i].type == xrefEntryNone) {
+ error(-1, "Invalid XRef entry");
+ entries[i].type = xrefEntryFree;
+ }
+ }
+
+ return &entries[i];
+}
+
+
diff --git a/poppler/XRef.h b/poppler/XRef.h
index df8993f..7ab094f 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -46,7 +46,8 @@ class PopplerCache;
enum XRefEntryType {
xrefEntryFree,
xrefEntryUncompressed,
- xrefEntryCompressed
+ xrefEntryCompressed,
+ xrefEntryNone
};
struct XRefEntry {
@@ -122,7 +123,7 @@ public:
// Direct access.
int getSize() { return size; }
- XRefEntry *getEntry(int i) { return &entries[i]; }
+ XRefEntry *getEntry(int i);
Object *getTrailerDict() { return &trailerDict; }
// Write access
@@ -156,6 +157,7 @@ private:
int permFlags; // permission bits
Guchar fileKey[16]; // file decryption key
GBool ownerPasswordOk; // true if owner password is correct
+ Guint prevXRefOffset; // position of prev XRef section (= next to read)
void init();
int reserve(int newSize);
--
1.6.4.2
From 09d33d6a6dcbc3d8ff5cf88d12800c79bb0ffbc3 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 18:26:17 +0100
Subject: [PATCH 06/12] Add Linearization dictionary support
---
CMakeLists.txt | 2 +
poppler/Linearization.cc | 225 ++++++++++++++++++++++++++++++++++++++++++++++
poppler/Linearization.h | 45 +++++++++
poppler/Makefile.am | 2 +
poppler/PDFDoc.cc | 13 +++
poppler/PDFDoc.h | 5 +
6 files changed, 292 insertions(+), 0 deletions(-)
create mode 100644 poppler/Linearization.cc
create mode 100644 poppler/Linearization.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 916a780..3b71963 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -251,6 +251,7 @@ set(poppler_SRCS
poppler/JBIG2Stream.cc
poppler/Lexer.cc
poppler/Link.cc
+ poppler/Linearization.cc
poppler/LocalPDFDocBuilder.cc
poppler/NameToCharCode.cc
poppler/Object.cc
@@ -397,6 +398,7 @@ if(ENABLE_XPDF_HEADERS)
poppler/JBIG2Stream.h
poppler/Lexer.h
poppler/Link.h
+ poppler/Linearization.h
poppler/LocalPDFDocBuilder.h
poppler/Movie.h
poppler/NameToCharCode.h
diff --git a/poppler/Linearization.cc b/poppler/Linearization.cc
new file mode 100644
index 0000000..23c77f2
--- /dev/null
+++ b/poppler/Linearization.cc
@@ -0,0 +1,225 @@
+//========================================================================
+//
+// Linearization.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#include "Linearization.h"
+#include "Parser.h"
+#include "Lexer.h"
+
+//------------------------------------------------------------------------
+// Linearization
+//------------------------------------------------------------------------
+
+Linearization::Linearization (BaseStream *str)
+{
+ Parser *parser;
+ Object obj1, obj2, obj3, obj4, obj5;
+
+ linDict.initNull();
+
+ str->reset();
+ obj1.initNull();
+ parser = new Parser(NULL,
+ new Lexer(NULL, str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
+ gFalse);
+ parser->getObj(&obj1);
+ parser->getObj(&obj2);
+ parser->getObj(&obj3);
+ parser->getObj(&linDict);
+ parser->getObj(&obj4);
+ if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) {
+ linDict.dictLookup("Linearized", &obj5);
+ if (!(obj5.isNum() && obj5.getNum() > 0)) {
+ linDict.free();
+ linDict.initNull();
+ }
+ obj5.free();
+ }
+ obj4.free();
+ obj4.free();
+ obj3.free();
+ obj2.free();
+ obj1.free();
+ delete parser;
+}
+
+Linearization:: ~Linearization()
+{
+ linDict.free();
+}
+
+Guint Linearization::getLength()
+{
+ if (!linDict.isDict()) return 0;
+
+ int length;
+ if (linDict.getDict()->lookupInt("L", NULL, &length) &&
+ length > 0) {
+ return length;
+ } else {
+ error(-1, "Length in linearization table is invalid");
+ return 0;
+ }
+}
+
+Guint Linearization::getHintsOffset()
+{
+ int hintsOffset;
+
+ Object obj1, obj2;
+ if (linDict.isDict() &&
+ linDict.dictLookup("H", &obj1)->isArray() &&
+ obj1.arrayGetLength()>=2 &&
+ obj1.arrayGet(0, &obj2)->isInt() &&
+ obj2.getInt() > 0) {
+ hintsOffset = obj2.getInt();
+ } else {
+ error(-1, "Hints table offset in linearization table is invalid");
+ hintsOffset = 0;
+ }
+ obj2.free();
+ obj1.free();
+
+ return hintsOffset;
+}
+
+Guint Linearization::getHintsLength()
+{
+ int hintsLength;
+
+ Object obj1, obj2;
+ if (linDict.isDict() &&
+ linDict.dictLookup("H", &obj1)->isArray() &&
+ obj1.arrayGetLength()>=2 &&
+ obj1.arrayGet(1, &obj2)->isInt() &&
+ obj2.getInt() > 0) {
+ hintsLength = obj2.getInt();
+ } else {
+ error(-1, "Hints table length in linearization table is invalid");
+ hintsLength = 0;
+ }
+ obj2.free();
+ obj1.free();
+
+ return hintsLength;
+}
+
+Guint Linearization::getHintsOffset2()
+{
+ int hintsOffset2 = 0; // default to 0
+
+ Object obj1, obj2;
+ if (linDict.isDict() &&
+ linDict.dictLookup("H", &obj1)->isArray() &&
+ obj1.arrayGetLength()>=4) {
+ if (obj1.arrayGet(2, &obj2)->isInt() &&
+ obj2.getInt() > 0) {
+ hintsOffset2 = obj2.getInt();
+ } else {
+ error(-1, "Second hints table offset in linearization table is invalid");
+ hintsOffset2 = 0;
+ }
+ }
+ obj2.free();
+ obj1.free();
+
+ return hintsOffset2;
+}
+
+Guint Linearization::getHintsLength2()
+{
+ int hintsLength2 = 0; // default to 0
+
+ Object obj1, obj2;
+ if (linDict.isDict() &&
+ linDict.dictLookup("H", &obj1)->isArray() &&
+ obj1.arrayGetLength()>=4) {
+ if (obj1.arrayGet(3, &obj2)->isInt() &&
+ obj2.getInt() > 0) {
+ hintsLength2 = obj2.getInt();
+ } else {
+ error(-1, "Second hints table length in linearization table is invalid");
+ hintsLength2 = 0;
+ }
+ }
+ obj2.free();
+ obj1.free();
+
+ return hintsLength2;
+}
+
+int Linearization::getObjectNumberFirst()
+{
+ int objectNumberFirst = 0;
+ if (linDict.isDict() &&
+ linDict.getDict()->lookupInt("O", NULL, &objectNumberFirst) &&
+ objectNumberFirst > 0) {
+ return objectNumberFirst;
+ } else {
+ error(-1, "Object number of first page in linearization table is invalid");
+ return 0;
+ }
+}
+
+Guint Linearization::getEndFirst()
+{
+ int pageEndFirst = 0;
+ if (linDict.isDict() &&
+ linDict.getDict()->lookupInt("E", NULL, &pageEndFirst) &&
+ pageEndFirst > 0) {
+ return pageEndFirst;
+ } else {
+ error(-1, "First page end offset in linearization table is invalid");
+ return 0;
+ }
+}
+
+int Linearization::getNumPages()
+{
+ int numPages = 0;
+ if (linDict.isDict() &&
+ linDict.getDict()->lookupInt("N", NULL, &numPages) &&
+ numPages > 0) {
+ return numPages;
+ } else {
+ error(-1, "Page count in linearization table is invalid");
+ return 0;
+ }
+}
+
+Guint Linearization::getMainXRefEntriesOffset()
+{
+ int mainXRefEntriesOffset = 0;
+ if (linDict.isDict() &&
+ linDict.getDict()->lookupInt("T", NULL, &mainXRefEntriesOffset) &&
+ mainXRefEntriesOffset > 0) {
+ return mainXRefEntriesOffset;
+ } else {
+ error(-1, "Main Xref offset in linearization table is invalid");
+ return 0;
+ }
+}
+
+int Linearization::getPageFirst()
+{
+ int pageFirst = 0; // Optional, defaults to 0.
+
+ if (linDict.isDict()) {
+ linDict.getDict()->lookupInt("P", NULL, &pageFirst);
+ }
+
+ if (pageFirst < 0) {
+ error(-1, "First page in linearization table is invalid");
+ return 0;
+ }
+
+ return pageFirst;
+}
+
+
diff --git a/poppler/Linearization.h b/poppler/Linearization.h
new file mode 100644
index 0000000..6728a75
--- /dev/null
+++ b/poppler/Linearization.h
@@ -0,0 +1,45 @@
+//========================================================================
+//
+// Linearization.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#ifndef LINEARIZATION_H
+#define LINEARIZATION_H
+
+#include "goo/gtypes.h"
+#include "Object.h"
+class BaseStream;
+
+//------------------------------------------------------------------------
+// Linearization
+//------------------------------------------------------------------------
+
+class Linearization {
+public:
+
+ Linearization(BaseStream *str);
+ ~Linearization();
+
+ Guint getLength();
+ Guint getHintsOffset();
+ Guint getHintsLength();
+ Guint getHintsOffset2();
+ Guint getHintsLength2();
+ int getObjectNumberFirst();
+ Guint getEndFirst();
+ int getNumPages();
+ Guint getMainXRefEntriesOffset();
+ int getPageFirst();
+
+private:
+
+ Object linDict;
+
+};
+
+#endif
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index 5da63f0..522f27e 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -209,6 +209,7 @@ poppler_include_HEADERS = \
JArithmeticDecoder.h \
JBIG2Stream.h \
Lexer.h \
+ Linearization.h \
Link.h \
LocalPDFDocBuilder.h \
Movie.h \
@@ -287,6 +288,7 @@ libpoppler_la_SOURCES = \
JArithmeticDecoder.cc \
JBIG2Stream.cc \
Lexer.cc \
+ Linearization.cc \
Link.cc \
LocalPDFDocBuilder.cc \
Movie.cc \
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 33a2b4d..baada76 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -53,6 +53,7 @@
#include "Catalog.h"
#include "Stream.h"
#include "XRef.h"
+#include "Linearization.h"
#include "Link.h"
#include "OutputDev.h"
#include "Error.h"
@@ -83,6 +84,7 @@ void PDFDoc::init()
file = NULL;
str = NULL;
xref = NULL;
+ linearization = NULL;
catalog = NULL;
#ifndef DISABLE_OUTLINE
outline = NULL;
@@ -243,6 +245,9 @@ PDFDoc::~PDFDoc() {
if (xref) {
delete xref;
}
+ if (linearization) {
+ delete linearization;
+ }
if (str) {
delete str;
}
@@ -417,6 +422,14 @@ void PDFDoc::processLinks(OutputDev *out, int page) {
catalog->getPage(page)->processLinks(out, catalog);
}
+Linearization *PDFDoc::getLinearization()
+{
+ if (!linearization) {
+ linearization = new Linearization(str);
+ }
+ return linearization;
+}
+
GBool PDFDoc::isLinearized() {
Parser *parser;
Object obj1, obj2, obj3, obj4, obj5;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 6d7dea2..011f4c0 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -48,6 +48,7 @@ class Links;
class LinkAction;
class LinkDest;
class Outline;
+class Linearization;
enum PDFWriteMode {
writeStandard,
@@ -89,6 +90,9 @@ public:
// Get file name.
GooString *getFileName() { return fileName; }
+ // Get the linearization table.
+ Linearization *getLinearization();
+
// Get the xref table.
XRef *getXRef() { return xref; }
@@ -242,6 +246,7 @@ private:
void *guiData;
int pdfMajorVersion;
int pdfMinorVersion;
+ Linearization *linearization;
XRef *xref;
Catalog *catalog;
#ifndef DISABLE_OUTLINE
--
1.6.4.2
From fcc585329db5d55fc938a1cb40e8650a4da3fbdf Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 13 Apr 2010 18:51:40 +0200
Subject: [PATCH 07/12] Add getLength() to BaseStream
---
poppler/Stream.cc | 11 ++++++-----
poppler/Stream.h | 11 ++++++-----
2 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index 988f99a..bb25959 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -372,8 +372,9 @@ void FileOutStream::printf(const char *format, ...)
// BaseStream
//------------------------------------------------------------------------
-BaseStream::BaseStream(Object *dictA) {
+BaseStream::BaseStream(Object *dictA, Guint lengthA) {
dict = *dictA;
+ length = lengthA;
}
BaseStream::~BaseStream() {
@@ -693,7 +694,7 @@ GBool StreamPredictor::getNextLine() {
FileStream::FileStream(FILE *fA, Guint startA, GBool limitedA,
Guint lengthA, Object *dictA):
- BaseStream(dictA) {
+ BaseStream(dictA, lengthA) {
f = fA;
start = startA;
limited = limitedA;
@@ -818,7 +819,7 @@ void FileStream::moveStart(int delta) {
CachedFileStream::CachedFileStream(CachedFile *ccA, Guint startA,
GBool limitedA, Guint lengthA, Object *dictA)
- : BaseStream(dictA)
+ : BaseStream(dictA, lengthA)
{
cc = ccA;
start = startA;
@@ -916,7 +917,7 @@ void CachedFileStream::moveStart(int delta)
//------------------------------------------------------------------------
MemStream::MemStream(char *bufA, Guint startA, Guint lengthA, Object *dictA):
- BaseStream(dictA) {
+ BaseStream(dictA, lengthA) {
buf = bufA;
start = startA;
length = lengthA;
@@ -980,7 +981,7 @@ void MemStream::moveStart(int delta) {
EmbedStream::EmbedStream(Stream *strA, Object *dictA,
GBool limitedA, Guint lengthA):
- BaseStream(dictA) {
+ BaseStream(dictA, lengthA) {
str = strA;
limited = limitedA;
length = lengthA;
diff --git a/poppler/Stream.h b/poppler/Stream.h
index 583278f..e99f03b 100644
--- a/poppler/Stream.h
+++ b/poppler/Stream.h
@@ -293,7 +293,7 @@ private:
class BaseStream: public Stream {
public:
- BaseStream(Object *dictA);
+ BaseStream(Object *dictA, Guint lengthA);
virtual ~BaseStream();
virtual Stream *makeSubStream(Guint start, GBool limited,
Guint length, Object *dict) = 0;
@@ -303,11 +303,16 @@ public:
virtual Stream *getUndecodedStream() { return this; }
virtual Dict *getDict() { return dict.getDict(); }
virtual GooString *getFileName() { return NULL; }
+ virtual Guint getLength() { return length; }
// Get/set position of first byte of stream within the file.
virtual Guint getStart() = 0;
virtual void moveStart(int delta) = 0;
+protected:
+
+ Guint length;
+
private:
Object dict;
@@ -478,7 +483,6 @@ private:
FILE *f;
Guint start;
GBool limited;
- Guint length;
char buf[fileStreamBufSize];
char *bufPtr;
char *bufEnd;
@@ -523,7 +527,6 @@ private:
CachedFile *cc;
Guint start;
GBool limited;
- Guint length;
char buf[cachedStreamBufSize];
char *bufPtr;
char *bufEnd;
@@ -567,7 +570,6 @@ private:
char *buf;
Guint start;
- Guint length;
char *bufEnd;
char *bufPtr;
GBool needFree;
@@ -607,7 +609,6 @@ private:
Stream *str;
GBool limited;
- Guint length;
};
//------------------------------------------------------------------------
--
1.6.4.2
From 0d2042c45732100d8fb2032586b1651025636f08 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 19:16:14 +0100
Subject: [PATCH 08/12] Pass size of file when creating FileStream
---
poppler/PDFDoc.cc | 19 +++++++++++++++++--
1 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index baada76..b39d9e7 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -45,6 +45,7 @@
#ifdef _WIN32
# include <windows.h>
#endif
+#include <sys/stat.h>
#include "goo/gstrtod.h"
#include "goo/GooString.h"
#include "poppler-config.h"
@@ -99,12 +100,18 @@ PDFDoc::PDFDoc()
PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
GooString *userPassword, void *guiDataA) {
Object obj;
+ int size = 0;
init();
fileName = fileNameA;
guiData = guiDataA;
+ struct stat buf;
+ if (stat(fileName->getCString(), &buf) == 0) {
+ size = buf.st_size;
+ }
+
// try to open file
#ifdef VMS
file = fopen(fileName->getCString(), "rb", "ctx=stm");
@@ -124,7 +131,7 @@ PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
// create stream
obj.initNull();
- str = new FileStream(file, 0, gFalse, 0, &obj);
+ str = new FileStream(file, 0, gFalse, size, &obj);
ok = setup(ownerPassword, userPassword);
}
@@ -155,11 +162,19 @@ PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
// try to open file
// NB: _wfopen is only available in NT
+ struct stat buf;
+ int size;
version.dwOSVersionInfoSize = sizeof(version);
GetVersionEx(&version);
if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+ if (_wstat(fileName2, &buf) == 0) {
+ size = buf.st_size;
+ }
file = _wfopen(fileName2, L"rb");
} else {
+ if (_wstat(fileName->getCString(), &buf) == 0) {
+ size = buf.st_size;
+ }
file = fopen(fileName->getCString(), "rb");
}
if (!file) {
@@ -170,7 +185,7 @@ PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
// create stream
obj.initNull();
- str = new FileStream(file, 0, gFalse, 0, &obj);
+ str = new FileStream(file, 0, gFalse, size, &obj);
ok = setup(ownerPassword, userPassword);
}
--
1.6.4.2
From 7fc3fa9242785e250100c39b1afbee35b4cee8a8 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 19:32:59 +0100
Subject: [PATCH 09/12] Improve linearization check
---
poppler/PDFDoc.cc | 33 +++++----------------------------
1 files changed, 5 insertions(+), 28 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index b39d9e7..8c75e08 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -446,34 +446,11 @@ Linearization *PDFDoc::getLinearization()
}
GBool PDFDoc::isLinearized() {
- Parser *parser;
- Object obj1, obj2, obj3, obj4, obj5;
- GBool lin;
-
- lin = gFalse;
- obj1.initNull();
- parser = new Parser(xref,
- new Lexer(xref,
- str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
- gTrue);
- parser->getObj(&obj1);
- parser->getObj(&obj2);
- parser->getObj(&obj3);
- parser->getObj(&obj4);
- if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
- obj4.isDict()) {
- obj4.dictLookup("Linearized", &obj5);
- if (obj5.isNum() && obj5.getNum() > 0) {
- lin = gTrue;
- }
- obj5.free();
- }
- obj4.free();
- obj3.free();
- obj2.free();
- obj1.free();
- delete parser;
- return lin;
+ if ((str->getLength()) &&
+ (getLinearization()->getLength() == str->getLength()))
+ return gTrue;
+ else
+ return gFalse;
}
int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
--
1.6.4.2
From b602aaed9a192aa81bf160795e74c0c63ee01aac Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 7 Apr 2010 12:05:56 +0200
Subject: [PATCH 10/12] Move getStartXref from XRef to PDFDoc
---
poppler/PDFDoc.cc | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++-
poppler/PDFDoc.h | 5 ++++
poppler/XRef.cc | 50 +------------------------------------------
poppler/XRef.h | 6 +----
4 files changed, 66 insertions(+), 56 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 8c75e08..7b716ca 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -35,6 +35,7 @@
#pragma implementation
#endif
+#include <ctype.h>
#include <locale.h>
#include <stdio.h>
#include <errno.h>
@@ -73,6 +74,9 @@
#define headerSearchSize 1024 // read this many bytes at beginning of
// file to look for '%PDF'
+#define xrefSearchSize 1024 // read this many bytes at end of file
+ // to look for 'startxref'
+
//------------------------------------------------------------------------
// PDFDoc
//------------------------------------------------------------------------
@@ -90,6 +94,7 @@ void PDFDoc::init()
#ifndef DISABLE_OUTLINE
outline = NULL;
#endif
+ startXRefPos = ~(Guint)0;
}
PDFDoc::PDFDoc()
@@ -223,7 +228,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
checkHeader();
// read xref table
- xref = new XRef(str);
+ xref = new XRef(str, getStartXRef());
if (!xref->isOk()) {
error(-1, "Couldn't read xref table");
errCode = xref->getErrorCode();
@@ -894,7 +899,7 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
trailerDict->set("Root", &obj1);
if (incrUpdate) {
- obj1.initInt(xref->getLastXRefPos());
+ obj1.initInt(getStartXRef());
trailerDict->set("Prev", &obj1);
}
@@ -932,3 +937,55 @@ PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
return doc;
}
+
+Guint PDFDoc::strToUnsigned(char *s) {
+ Guint x;
+ char *p;
+ int i;
+
+ x = 0;
+ for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
+ x = 10 * x + (*p - '0');
+ }
+ return x;
+}
+
+// Read the 'startxref' position.
+Guint PDFDoc::getStartXRef()
+{
+ if (startXRefPos == ~(Guint)0) {
+
+ {
+ char buf[xrefSearchSize+1];
+ char *p;
+ int c, n, i;
+
+ // read last xrefSearchSize bytes
+ str->setPos(xrefSearchSize, -1);
+ for (n = 0; n < xrefSearchSize; ++n) {
+ if ((c = str->getChar()) == EOF) {
+ break;
+ }
+ buf[n] = c;
+ }
+ buf[n] = '\0';
+
+ // find startxref
+ for (i = n - 9; i >= 0; --i) {
+ if (!strncmp(&buf[i], "startxref", 9)) {
+ break;
+ }
+ }
+ if (i < 0) {
+ startXRefPos = 0;
+ }
+ for (p = &buf[i+9]; isspace(*p); ++p) ;
+ startXRefPos = strToUnsigned(p);
+ }
+
+ }
+
+ return startXRefPos;
+}
+
+
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 011f4c0..d093b59 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -239,6 +239,9 @@ private:
GBool checkFooter();
void checkHeader();
GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
+ // Get the offset of the start xref table.
+ Guint getStartXRef();
+ Guint strToUnsigned(char *s);
GooString *fileName;
FILE *file;
@@ -258,6 +261,8 @@ private:
//If there is an error opening the PDF file with fopen() in the constructor,
//then the POSIX errno will be here.
int fopenErrno;
+
+ Guint startXRefPos; // offset of last xref table
};
#endif
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index 9aa9500..a0145a5 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -48,11 +48,6 @@
#include "PopplerCache.h"
//------------------------------------------------------------------------
-
-#define xrefSearchSize 1024 // read this many bytes at end of file
- // to look for 'startxref'
-
-//------------------------------------------------------------------------
// Permission bits
// Note that the PDF spec uses 1 base (eg bit 3 is 1<<2)
//------------------------------------------------------------------------
@@ -273,7 +268,7 @@ XRef::XRef() {
init();
}
-XRef::XRef(BaseStream *strA) {
+XRef::XRef(BaseStream *strA, Guint pos) {
Object obj;
init();
@@ -406,37 +401,6 @@ int XRef::resize(int newSize)
return size;
}
-// Read the 'startxref' position.
-Guint XRef::getStartXref() {
- char buf[xrefSearchSize+1];
- char *p;
- int c, n, i;
-
- // read last xrefSearchSize bytes
- str->setPos(xrefSearchSize, -1);
- for (n = 0; n < xrefSearchSize; ++n) {
- if ((c = str->getChar()) == EOF) {
- break;
- }
- buf[n] = c;
- }
- buf[n] = '\0';
-
- // find startxref
- for (i = n - 9; i >= 0; --i) {
- if (!strncmp(&buf[i], "startxref", 9)) {
- break;
- }
- }
- if (i < 0) {
- return 0;
- }
- for (p = &buf[i+9]; isspace(*p); ++p) ;
- lastXRefPos = strToUnsigned(p);
-
- return lastXRefPos;
-}
-
// Read one xref table section. Also reads the associated trailer
// dictionary, and returns the prev pointer (if any).
GBool XRef::readXRef(Guint *pos, GooVector<Guint> *followedXRefStm) {
@@ -1142,18 +1106,6 @@ int XRef::getNumEntry(Guint offset)
else return -1;
}
-Guint XRef::strToUnsigned(char *s) {
- Guint x;
- char *p;
- int i;
-
- x = 0;
- for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
- x = 10 * x + (*p - '0');
- }
- return x;
-}
-
void XRef::add(int num, int gen, Guint offs, GBool used) {
if (num >= size) {
if (num >= capacity) {
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 7ab094f..9505ae8 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -64,7 +64,7 @@ public:
// Constructor, create an empty XRef, used for PDF writing
XRef();
// Constructor. Read xref table from stream.
- XRef(BaseStream *strA);
+ XRef(BaseStream *strA, Guint pos);
// Destructor.
~XRef();
@@ -107,9 +107,6 @@ public:
// Return the number of objects in the xref table.
int getNumObjects() { return size; }
- // Return the offset of the last xref table.
- Guint getLastXRefPos() { return lastXRefPos; }
-
// Return the catalog object reference.
int getRootNum() { return rootNum; }
int getRootGen() { return rootGen; }
@@ -144,7 +141,6 @@ private:
GBool ok; // true if xref table is valid
int errCode; // error code (if <ok> is false)
Object trailerDict; // trailer dictionary
- Guint lastXRefPos; // offset of last xref table
Guint *streamEnds; // 'endstream' positions - only used in
// damaged files
int streamEndsLen; // number of valid entries in streamEnds
--
1.6.4.2
From dab2ca2dbfc3368587bb5d75e82509e0afe80f4b Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 7 Apr 2010 12:35:05 +0200
Subject: [PATCH 11/12] Use XRef table at start of linearized document
---
poppler/PDFDoc.cc | 27 ++++++++++++++++++++++++++-
1 files changed, 26 insertions(+), 1 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 7b716ca..edabd56 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -74,6 +74,10 @@
#define headerSearchSize 1024 // read this many bytes at beginning of
// file to look for '%PDF'
+#define linearizationSearchSize 1024 // read this many bytes at beginning of
+ // file to look for linearization
+ // dictionary
+
#define xrefSearchSize 1024 // read this many bytes at end of file
// to look for 'startxref'
@@ -955,7 +959,28 @@ Guint PDFDoc::getStartXRef()
{
if (startXRefPos == ~(Guint)0) {
- {
+ if (isLinearized()) {
+ char buf[linearizationSearchSize+1];
+ int c, n, i;
+
+ str->setPos(0);
+ for (n = 0; n < linearizationSearchSize; ++n) {
+ if ((c = str->getChar()) == EOF) {
+ break;
+ }
+ buf[n] = c;
+ }
+ buf[n] = '\0';
+
+ // find end of first obj
+ startXRefPos = 0;
+ for (i = 0; i < n; i++) {
+ if (!strncmp("endobj", &buf[i], 6)) {
+ startXRefPos = i+6;
+ break;
+ }
+ }
+ } else {
char buf[xrefSearchSize+1];
char *p;
int c, n, i;
--
1.6.4.2
From 9b70cdc61ca4403d4ed96b2878575d59e860f1ac Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sun, 25 Apr 2010 17:34:49 +0200
Subject: [PATCH 12/12] Use linearization data to parse XRef entries
---
poppler/PDFDoc.cc | 12 +++++++++++-
poppler/PDFDoc.h | 3 +++
poppler/XRef.cc | 43 ++++++++++++++++++++++++++++++++++++++++++-
poppler/XRef.h | 6 +++++-
4 files changed, 61 insertions(+), 3 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index edabd56..16e6735 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -232,7 +232,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
checkHeader();
// read xref table
- xref = new XRef(str, getStartXRef());
+ xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset());
if (!xref->isOk()) {
error(-1, "Couldn't read xref table");
errCode = xref->getErrorCode();
@@ -1013,4 +1013,14 @@ Guint PDFDoc::getStartXRef()
return startXRefPos;
}
+Guint PDFDoc::getMainXRefEntriesOffset()
+{
+ Guint mainXRefEntriesOffset = 0;
+
+ if (isLinearized()) {
+ mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
+ }
+
+ return mainXRefEntriesOffset;
+}
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index d093b59..f6f8c8f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -241,6 +241,9 @@ private:
GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
// Get the offset of the start xref table.
Guint getStartXRef();
+ // Get the offset of the entries in the main XRef table of a
+ // linearized document (0 for non linearized documents).
+ Guint getMainXRefEntriesOffset();
Guint strToUnsigned(char *s);
GooString *fileName;
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index a0145a5..2d5437a 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -262,16 +262,19 @@ void XRef::init() {
streamEnds = NULL;
streamEndsLen = 0;
objStrs = new PopplerCache(5);
+ mainXRefEntriesOffset = 0;
+ xRefStream = gFalse;
}
XRef::XRef() {
init();
}
-XRef::XRef(BaseStream *strA, Guint pos) {
+XRef::XRef(BaseStream *strA, Guint pos, Guint mainXRefEntriesOffsetA) {
Object obj;
init();
+ mainXRefEntriesOffset = mainXRefEntriesOffsetA;
encrypted = gFalse;
permFlags = defPermFlags;
@@ -435,6 +438,9 @@ GBool XRef::readXRef(Guint *pos, GooVector<Guint> *followedXRefStm) {
if (!parser->getObj(&obj)->isStream()) {
goto err1;
}
+ if (trailerDict.isNone()) {
+ xRefStream = gTrue;
+ }
more = readXRefStream(obj.getStream(), pos);
obj.free();
@@ -1220,10 +1226,44 @@ void XRef::writeToFile(OutStream* outStr, GBool writeAllEntries) {
}
}
+GBool XRef::parseEntry(Guint offset, XRefEntry *entry)
+{
+ GBool r;
+
+ Object obj;
+ obj.initNull();
+ Parser parser = Parser(NULL, new Lexer(NULL,
+ str->makeSubStream(offset, gFalse, 20, &obj)), gTrue);
+
+ Object obj1, obj2, obj3;
+ if ((parser.getObj(&obj1)->isInt()) &&
+ (parser.getObj(&obj2)->isInt()) &&
+ (parser.getObj(&obj3)->isCmd("n") || obj3.isCmd("f"))) {
+ entry->offset = (Guint) obj1.getInt();
+ entry->gen = obj2.getInt();
+ entry->type = obj3.isCmd("n") ? xrefEntryUncompressed : xrefEntryFree;
+ entry->obj.initNull ();
+ entry->updated = false;
+ r = gTrue;
+ } else {
+ r = gFalse;
+ }
+ obj1.free();
+ obj2.free();
+ obj3.free();
+
+ return r;
+}
+
XRefEntry *XRef::getEntry(int i)
{
if (entries[i].type == xrefEntryNone) {
+ if ((!xRefStream) && mainXRefEntriesOffset) {
+ if (!parseEntry(mainXRefEntriesOffset + 20*i, &entries[i])) {
+ error(-1, "Failed to parse XRef entry [%d].", i);
+ }
+ } else {
GooVector<Guint> followedPrev;
while (prevXRefOffset && entries[i].type == xrefEntryNone) {
bool ok = true;
@@ -1253,6 +1293,7 @@ XRefEntry *XRef::getEntry(int i)
error(-1, "Invalid XRef entry");
entries[i].type = xrefEntryFree;
}
+ }
}
return &entries[i];
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 9505ae8..9ec93e7 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -64,7 +64,7 @@ public:
// Constructor, create an empty XRef, used for PDF writing
XRef();
// Constructor. Read xref table from stream.
- XRef(BaseStream *strA, Guint pos);
+ XRef(BaseStream *strA, Guint pos, Guint mainXRefEntriesOffsetA = 0);
// Destructor.
~XRef();
@@ -154,6 +154,8 @@ private:
Guchar fileKey[16]; // file decryption key
GBool ownerPasswordOk; // true if owner password is correct
Guint prevXRefOffset; // position of prev XRef section (= next to read)
+ Guint mainXRefEntriesOffset; // offset of entries in main XRef table
+ GBool xRefStream; // true if last XRef section is a stream
void init();
int reserve(int newSize);
@@ -165,6 +167,8 @@ private:
GBool readXRefStream(Stream *xrefStr, Guint *pos);
GBool constructXRef();
Guint strToUnsigned(char *s);
+ GBool parseEntry(Guint offset, XRefEntry *entry);
+
};
#endif
--
1.6.4.2
-------------- next part --------------
From 6307ac24be6cb823594c4e62662ba9ab4d147100 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:03:54 +0200
Subject: [PATCH 01/17] add PDFDoc::getPage()
---
poppler/PDFDoc.cc | 8 ++++++++
poppler/PDFDoc.h | 3 +++
2 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 16e6735..89e2c52 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1024,3 +1024,11 @@ Guint PDFDoc::getMainXRefEntriesOffset()
return mainXRefEntriesOffset;
}
+Page *PDFDoc::getPage(int page)
+{
+ if ((page < 1) || page > getNumPages()) return NULL;
+
+ {
+ return catalog->getPage(page);
+ }
+}
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index f6f8c8f..011e6e1 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -127,6 +127,9 @@ public:
// Return the structure tree root object.
Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
+ // Get page.
+ Page *getPage(int page);
+
// Display a page.
void displayPage(OutputDev *out, int page,
double hDPI, double vDPI, int rotate,
--
1.6.4.2
From e98e4b00ac066ced38d796f0dd4c43642ba748ae Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:36:08 +0200
Subject: [PATCH 02/17] Use PDFDoc::getPage() in PDFDoc
---
poppler/PDFDoc.cc | 24 ++++++++++++++++--------
poppler/PDFDoc.h | 10 +++++-----
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 89e2c52..e1b00fc 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -397,11 +397,13 @@ void PDFDoc::displayPage(OutputDev *out, int page,
if (globalParams->getPrintCommands()) {
printf("***** page %d *****\n", page);
}
- if (catalog->getPage(page))
- catalog->getPage(page)->display(out, hDPI, vDPI,
+
+ if (getPage(page))
+ getPage(page)->display(out, hDPI, vDPI,
rotate, useMediaBox, crop, printing, catalog,
abortCheckCbk, abortCheckCbkData,
annotDisplayDecideCbk, annotDisplayDecideCbkData);
+
}
void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
@@ -428,8 +430,8 @@ void PDFDoc::displayPageSlice(OutputDev *out, int page,
void *abortCheckCbkData,
GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
void *annotDisplayDecideCbkData) {
- if (catalog->getPage(page))
- catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
+ if (getPage(page))
+ getPage(page)->displaySlice(out, hDPI, vDPI,
rotate, useMediaBox, crop,
sliceX, sliceY, sliceW, sliceH,
printing, catalog,
@@ -438,12 +440,18 @@ void PDFDoc::displayPageSlice(OutputDev *out, int page,
}
Links *PDFDoc::getLinks(int page) {
- return catalog->getPage(page) ? catalog->getPage(page)->getLinks(catalog) : NULL;
+ Page *p = getPage(page);
+ if (!p) {
+ Object obj;
+ obj.initNull();
+ return new Links (&obj, NULL);
+ }
+ return p->getLinks(catalog);
}
-
+
void PDFDoc::processLinks(OutputDev *out, int page) {
- if (catalog->getPage(page))
- catalog->getPage(page)->processLinks(out, catalog);
+ if (getPage(page))
+ getPage(page)->processLinks(out, catalog);
}
Linearization *PDFDoc::getLinearization()
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 011e6e1..8de139f 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -107,15 +107,15 @@ public:
// Get page parameters.
double getPageMediaWidth(int page)
- { return catalog->getPage(page)->getMediaWidth(); }
+ { return getPage(page) ? getPage(page)->getMediaWidth() : 0.0 ; }
double getPageMediaHeight(int page)
- { return catalog->getPage(page)->getMediaHeight(); }
+ { return getPage(page) ? getPage(page)->getMediaHeight() : 0.0 ; }
double getPageCropWidth(int page)
- { return catalog->getPage(page)->getCropWidth(); }
+ { return getPage(page) ? getPage(page)->getCropWidth() : 0.0 ; }
double getPageCropHeight(int page)
- { return catalog->getPage(page)->getCropHeight(); }
+ { return getPage(page) ? getPage(page)->getCropHeight() : 0.0 ; }
int getPageRotate(int page)
- { return catalog->getPage(page)->getRotate(); }
+ { return getPage(page) ? getPage(page)->getRotate() : 0 ; }
// Get number of pages.
int getNumPages() { return catalog->getNumPages(); }
--
1.6.4.2
From 5b93c5bad5da179b675317bb45bc763bb6189d63 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 20:48:30 +0200
Subject: [PATCH 03/17] Use PDFDoc::getPage() in FontInfo
---
poppler/FontInfo.cc | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/poppler/FontInfo.cc b/poppler/FontInfo.cc
index 0037e07..c348d14 100644
--- a/poppler/FontInfo.cc
+++ b/poppler/FontInfo.cc
@@ -70,7 +70,9 @@ GooList *FontInfoScanner::scan(int nPages) {
}
for (int pg = currentPage; pg < lastPage; ++pg) {
- page = doc->getCatalog()->getPage(pg);
+ page = doc->getPage(pg);
+ if (!page) continue;
+
if ((resDict = page->getResourceDict())) {
scanFonts(resDict, result);
}
--
1.6.4.2
From 8d45acf9f592cfd5a52efd5b6e512a01d5482d96 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 11:11:11 +0200
Subject: [PATCH 04/17] Use PDFDoc::getPage() in pdfinfo
---
utils/pdfinfo.cc | 22 +++++++++++++++-------
1 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index 2abe8b4..a94e4e8 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -257,7 +257,11 @@ int main(int argc, char *argv[]) {
if (printBoxes) {
if (multiPage) {
for (pg = firstPage; pg <= lastPage; ++pg) {
- page = doc->getCatalog()->getPage(pg);
+ page = doc->getPage(pg);
+ if (!page) {
+ error(-1, "Failed to print boxes for page %d", pg);
+ continue;
+ }
sprintf(buf, "Page %4d MediaBox: ", pg);
printBox(buf, page->getMediaBox());
sprintf(buf, "Page %4d CropBox: ", pg);
@@ -270,12 +274,16 @@ int main(int argc, char *argv[]) {
printBox(buf, page->getArtBox());
}
} else {
- page = doc->getCatalog()->getPage(firstPage);
- printBox("MediaBox: ", page->getMediaBox());
- printBox("CropBox: ", page->getCropBox());
- printBox("BleedBox: ", page->getBleedBox());
- printBox("TrimBox: ", page->getTrimBox());
- printBox("ArtBox: ", page->getArtBox());
+ page = doc->getPage(firstPage);
+ if (!page) {
+ error(-1, "Failed to print boxes for page %d", firstPage);
+ } else {
+ printBox("MediaBox: ", page->getMediaBox());
+ printBox("CropBox: ", page->getCropBox());
+ printBox("BleedBox: ", page->getBleedBox());
+ printBox("TrimBox: ", page->getTrimBox());
+ printBox("ArtBox: ", page->getArtBox());
+ }
}
}
--
1.6.4.2
From a287a37f788d7a7d43e1fd20bd7ea27742dd5269 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 11:19:53 +0200
Subject: [PATCH 05/17] Use PDFDoc::getPage() in pdffonts
---
utils/pdffonts.cc | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/utils/pdffonts.cc b/utils/pdffonts.cc
index 81b20e4..30e25dc 100644
--- a/utils/pdffonts.cc
+++ b/utils/pdffonts.cc
@@ -166,7 +166,11 @@ int main(int argc, char *argv[]) {
fonts = NULL;
fontsLen = fontsSize = 0;
for (pg = firstPage; pg <= lastPage; ++pg) {
- page = doc->getCatalog()->getPage(pg);
+ page = doc->getPage(pg);
+ if (!page) {
+ error(-1, "Failed to read fonts from page %d", pg);
+ continue;
+ }
if ((resDict = page->getResourceDict())) {
scanFonts(resDict, doc);
}
--
1.6.4.2
From f2ef96b3dfdf4bd427be43cc9e6f7741887a1564 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 15:52:20 +0200
Subject: [PATCH 06/17] Use PDFDoc::getPage() in glib
---
glib/poppler-action.cc | 4 ++--
glib/poppler-document.cc | 17 ++++++++++-------
2 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/glib/poppler-action.cc b/glib/poppler-action.cc
index c076551..85557db 100644
--- a/glib/poppler-action.cc
+++ b/glib/poppler-action.cc
@@ -425,13 +425,13 @@ find_annot_movie_for_action (PopplerDocument *document,
xref->fetch (ref->num, ref->gen, &annotObj);
} else if (link->hasAnnotTitle ()) {
- Catalog *catalog = document->doc->getCatalog ();
Object annots;
GooString *title = link->getAnnotTitle ();
int i;
for (i = 1; i <= document->doc->getNumPages (); ++i) {
- Page *p = catalog->getPage (i);
+ Page *p = document->doc->getPage (i);
+ if (!p) continue;
if (p->getAnnots (&annots)->isArray ()) {
int j;
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 680c8de..3f9bbf1 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -388,15 +388,14 @@ PopplerPage *
poppler_document_get_page (PopplerDocument *document,
int index)
{
- Catalog *catalog;
Page *page;
g_return_val_if_fail (0 <= index &&
index < poppler_document_get_n_pages (document),
NULL);
- catalog = document->doc->getCatalog();
- page = catalog->getPage (index + 1);
+ page = document->doc->getPage (index + 1);
+ if (!page) return NULL;
return _poppler_page_new (document, page, index);
}
@@ -2036,18 +2035,22 @@ PopplerFormField *
poppler_document_get_form_field (PopplerDocument *document,
gint id)
{
- Catalog *catalog = document->doc->getCatalog();
+ Page *page;
unsigned pageNum;
unsigned fieldNum;
FormPageWidgets *widgets;
FormWidget *field;
FormWidget::decodeID (id, &pageNum, &fieldNum);
-
- widgets = catalog->getPage (pageNum)->getPageWidgets ();
+
+ page = document->doc->getPage (pageNum);
+ if (!page)
+ return NULL;
+
+ widgets = page->getPageWidgets ();
if (!widgets)
return NULL;
-
+
field = widgets->getWidget (fieldNum);
if (field)
return _poppler_form_field_new (document, field);
--
1.6.4.2
From dc04f21032259216098150539f1d8fcd18346876 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 22 Apr 2010 17:59:01 +0200
Subject: [PATCH 07/17] Use PDFDoc::getPage() in qt4
Note API change: With this patch, Document::Page(int index) can now return NULL
when poppler fails to create a page. Any application using these bindings
should check the return value.
---
qt4/src/poppler-document.cc | 8 +++++++-
qt4/src/poppler-link.cc | 6 ++++--
qt4/src/poppler-page.cc | 3 ++-
qt4/src/poppler-qt4.h | 3 +++
4 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/qt4/src/poppler-document.cc b/qt4/src/poppler-document.cc
index 41d35b6..dc0ce97 100644
--- a/qt4/src/poppler-document.cc
+++ b/qt4/src/poppler-document.cc
@@ -98,7 +98,13 @@ namespace Poppler {
Page *Document::page(int index) const
{
- return new Page(m_doc, index);
+ Page *page = new Page(m_doc, index);
+ if (!page->isOk()) {
+ delete page;
+ return NULL;
+ }
+
+ return page;
}
bool Document::isLocked() const
diff --git a/qt4/src/poppler-link.cc b/qt4/src/poppler-link.cc
index de06242..4f54201 100644
--- a/qt4/src/poppler-link.cc
+++ b/qt4/src/poppler-link.cc
@@ -232,9 +232,11 @@ class LinkMoviePrivate : public LinkPrivate
int leftAux = 0, topAux = 0, rightAux = 0, bottomAux = 0;
- if (d->pageNum > 0 && d->pageNum <= data.doc->doc->getNumPages())
+ ::Page *page;
+ if (d->pageNum > 0 &&
+ d->pageNum <= data.doc->doc->getNumPages() &&
+ (page = data.doc->doc->getPage( d->pageNum )))
{
- ::Page *page = data.doc->doc->getCatalog()->getPage( d->pageNum );
cvtUserToDev( page, left, top, &leftAux, &topAux );
cvtUserToDev( page, right, bottom, &rightAux, &bottomAux );
diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc
index ae67b11..54c1785 100644
--- a/qt4/src/poppler-page.cc
+++ b/qt4/src/poppler-page.cc
@@ -189,8 +189,9 @@ Page::Page(DocumentData *doc, int index) {
m_page = new PageData();
m_page->index = index;
m_page->parentDoc = doc;
- m_page->page = doc->doc->getCatalog()->getPage(m_page->index + 1);
+ m_page->page = doc->doc->getPage(m_page->index + 1);
m_page->transition = 0;
+ ok = m_page->page ? true : false;
}
Page::~Page()
diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h
index 117dc43..2e77f48 100644
--- a/qt4/src/poppler-qt4.h
+++ b/qt4/src/poppler-qt4.h
@@ -587,11 +587,14 @@ delete it;
**/
QString label() const;
+ bool isOk() { return ok; };
+
private:
Q_DISABLE_COPY(Page)
Page(DocumentData *doc, int index);
PageData *m_page;
+ bool ok;
};
/**
--
1.6.4.2
From d342d36c668dc8d370009df226fc7903b91559d0 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Fri, 23 Apr 2010 09:21:23 +0200
Subject: [PATCH 08/17] Use PDFDoc::getPage() in qt
Note API change: With this patch, Document::getPage(int index) can now
return NULL when poppler fails to create a page. Any application using
these bindings should check the return value.
---
qt/poppler-document.cc | 11 +++++++++++
qt/poppler-page.cc | 11 +++++++----
qt/poppler-qt.h | 6 +++++-
3 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/qt/poppler-document.cc b/qt/poppler-document.cc
index bade1d1..1a5892b 100644
--- a/qt/poppler-document.cc
+++ b/qt/poppler-document.cc
@@ -113,6 +113,17 @@ int Document::getNumPages() const
return data->doc.getNumPages();
}
+Page *Document::getPage(int index) const
+{
+ Page *p = new Page(this, index);
+ if (!p->isOk()) {
+ delete p;
+ return NULL;
+ }
+
+ return p;
+}
+
QValueList<FontInfo> Document::fonts() const
{
QValueList<FontInfo> ourList;
diff --git a/qt/poppler-page.cc b/qt/poppler-page.cc
index a42aa15..ef077a7 100644
--- a/qt/poppler-page.cc
+++ b/qt/poppler-page.cc
@@ -47,6 +47,7 @@ class PageData {
const Document *doc;
int index;
PageTransition *transition;
+ ::Page *page;
};
Page::Page(const Document *doc, int index) {
@@ -54,6 +55,8 @@ Page::Page(const Document *doc, int index) {
data->index = index;
data->doc = doc;
data->transition = 0;
+ data->page = doc->data->doc.getPage(data->index + 1);
+ ok = data->page ? true : false;
}
Page::~Page()
@@ -132,7 +135,7 @@ QString Page::getText(const Rectangle &r) const
output_dev = new TextOutputDev(0, gFalse, gFalse, gFalse);
data->doc->data->doc.displayPageSlice(output_dev, data->index + 1, 72, 72,
0, false, false, false, -1, -1, -1, -1);
- p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+ p = data->page;
if (r.isNull())
{
rect = p->getCropBox();
@@ -197,7 +200,7 @@ PageTransition *Page::getTransition() const
{
Object o;
PageTransitionParams params;
- params.dictObj = data->doc->data->doc.getCatalog()->getPage(data->index + 1)->getTrans(&o);
+ params.dictObj = data->page->getTrans(&o);
data->transition = new PageTransition(params);
o.free();
}
@@ -208,7 +211,7 @@ QSize Page::pageSize() const
{
::Page *p;
- p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+ p = data->page;
if ( ( Page::Landscape == orientation() ) || (Page::Seascape == orientation() ) ) {
return QSize( (int)p->getCropHeight(), (int)p->getCropWidth() );
} else {
@@ -218,7 +221,7 @@ QSize Page::pageSize() const
Page::Orientation Page::orientation() const
{
- ::Page *p = data->doc->data->doc.getCatalog()->getPage(data->index + 1);
+ ::Page *p = data->page;
int rotation = p->getRotate();
switch (rotation) {
diff --git a/qt/poppler-qt.h b/qt/poppler-qt.h
index a6b1e6e..549ffd2 100644
--- a/qt/poppler-qt.h
+++ b/qt/poppler-qt.h
@@ -31,6 +31,7 @@
#include <qdom.h>
#include <qpixmap.h>
+
namespace Poppler {
class Document;
@@ -198,9 +199,12 @@ class Page {
*/
QValueList<Link*> links() const;
+ bool isOk() { return ok; };
+
private:
Page(const Document *doc, int index);
PageData *data;
+ bool ok;
};
class DocumentData;
@@ -219,7 +223,7 @@ public:
static Document *load(const QString & filePath);
- Page *getPage(int index) const{ return new Page(this, index); }
+ Page *getPage(int index) const;
int getNumPages() const;
--
1.6.4.2
From 196a34e2c154ab975717edccdf473d6ed9aceefa Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Fri, 23 Apr 2010 12:07:39 +0200
Subject: [PATCH 09/17] Use PDFDoc::getPage() in PSOutputDev
---
glib/poppler-page.cc | 1 +
poppler/PSOutputDev.cc | 37 ++++++++++++++++++++++---------------
poppler/PSOutputDev.h | 13 ++++++++-----
qt/poppler-document.cc | 2 +-
qt4/src/poppler-ps-converter.cc | 1 +
utils/pdftohtml.cc | 2 +-
utils/pdftops.cc | 2 +-
7 files changed, 35 insertions(+), 23 deletions(-)
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index bc95e65..06c2e3c 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -1195,6 +1195,7 @@ poppler_page_render_to_ps (PopplerPage *page,
if (!ps_file->out)
ps_file->out = new PSOutputDev (ps_file->filename,
+ ps_file->document->doc,
ps_file->document->doc->getXRef(),
ps_file->document->doc->getCatalog(),
NULL,
diff --git a/poppler/PSOutputDev.cc b/poppler/PSOutputDev.cc
index 179a494..5e5d3d0 100644
--- a/poppler/PSOutputDev.cc
+++ b/poppler/PSOutputDev.cc
@@ -70,6 +70,7 @@
# include "SplashOutputDev.h"
#endif
#include "PSOutputDev.h"
+#include "PDFDoc.h"
#ifdef MACOS
// needed for setting type/creator of MacOS files
@@ -972,7 +973,7 @@ static void outputToFile(void *stream, char *data, int len) {
fwrite(data, 1, len, (FILE *)stream);
}
-PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
+PSOutputDev::PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
char *psTitle,
int firstPage, int lastPage, PSOutMode modeA,
int paperWidthA, int paperHeightA, GBool duplexA,
@@ -1033,13 +1034,14 @@ PSOutputDev::PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
}
init(outputToFile, f, fileTypeA, psTitle,
- xrefA, catalog, firstPage, lastPage, modeA,
+ doc, xrefA, catalog, firstPage, lastPage, modeA,
imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
paperWidthA, paperHeightA, duplexA);
}
PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
char *psTitle,
+ PDFDoc *doc,
XRef *xrefA, Catalog *catalog,
int firstPage, int lastPage, PSOutMode modeA,
int paperWidthA, int paperHeightA, GBool duplexA,
@@ -1068,18 +1070,17 @@ PSOutputDev::PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
forceRasterize = forceRasterizeA;
init(outputFuncA, outputStreamA, psGeneric, psTitle,
- xrefA, catalog, firstPage, lastPage, modeA,
+ doc, xrefA, catalog, firstPage, lastPage, modeA,
imgLLXA, imgLLYA, imgURXA, imgURYA, manualCtrlA,
paperWidthA, paperHeightA, duplexA);
}
void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
- PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
+ PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
int firstPage, int lastPage, PSOutMode modeA,
int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
GBool manualCtrlA, int paperWidthA, int paperHeightA,
GBool duplexA) {
- Page *page;
PDFRectangle *box;
// initialize
@@ -1099,12 +1100,12 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
imgURX = imgURXA;
imgURY = imgURYA;
if (paperWidth < 0 || paperHeight < 0) {
- // this check is needed in case the document has zero pages
- if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
- page = catalog->getPage(firstPage);
+ Page *page;
+ if ((page = doc->getPage(firstPage))) {
paperWidth = (int)ceil(page->getMediaWidth());
paperHeight = (int)ceil(page->getMediaHeight());
} else {
+ error(-1, "Invalid page %d", firstPage);
paperWidth = 1;
paperHeight = 1;
}
@@ -1170,14 +1171,16 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
embFontList = new GooString();
if (!manualCtrl) {
+ Page *page;
// this check is needed in case the document has zero pages
- if (firstPage > 0 && firstPage <= catalog->getNumPages()) {
+ if ((page = doc->getPage(firstPage))) {
writeHeader(firstPage, lastPage,
- catalog->getPage(firstPage)->getMediaBox(),
- catalog->getPage(firstPage)->getCropBox(),
- catalog->getPage(firstPage)->getRotate(),
+ page->getMediaBox(),
+ page->getCropBox(),
+ page->getRotate(),
pstitle);
} else {
+ error(-1, "Invalid page %d", firstPage);
box = new PDFRectangle(0, 0, 1, 1);
writeHeader(firstPage, lastPage, box, box, 0, pstitle);
delete box;
@@ -1190,7 +1193,7 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA,
writePS("%%EndProlog\n");
writePS("%%BeginSetup\n");
}
- writeDocSetup(catalog, firstPage, lastPage, duplexA);
+ writeDocSetup(doc, catalog, firstPage, lastPage, duplexA);
if (mode != psModeForm) {
writePS("%%EndSetup\n");
}
@@ -1400,7 +1403,7 @@ void PSOutputDev::writeXpdfProcset() {
}
}
-void PSOutputDev::writeDocSetup(Catalog *catalog,
+void PSOutputDev::writeDocSetup(PDFDoc *doc, Catalog *catalog,
int firstPage, int lastPage,
GBool duplexA) {
Page *page;
@@ -1416,7 +1419,11 @@ void PSOutputDev::writeDocSetup(Catalog *catalog,
writePS("xpdf begin\n");
}
for (pg = firstPage; pg <= lastPage; ++pg) {
- page = catalog->getPage(pg);
+ page = doc->getPage(pg);
+ if (!page) {
+ error(-1, "Failed writing resources for page %d", pg);
+ continue;
+ }
if ((resDict = page->getResourceDict())) {
setupResources(resDict);
}
diff --git a/poppler/PSOutputDev.h b/poppler/PSOutputDev.h
index 38c838c..a84a638 100644
--- a/poppler/PSOutputDev.h
+++ b/poppler/PSOutputDev.h
@@ -50,6 +50,7 @@ struct PSFont8Info;
struct PSFont16Enc;
class PSOutCustomColor;
class Function;
+class PDFDoc;
//------------------------------------------------------------------------
// PSOutputDev
@@ -75,7 +76,7 @@ class PSOutputDev: public OutputDev {
public:
// Open a PostScript output file, and write the prolog.
- PSOutputDev(const char *fileName, XRef *xrefA, Catalog *catalog,
+ PSOutputDev(const char *fileName, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
char *psTitle,
int firstPage, int lastPage, PSOutMode modeA,
int paperWidthA = -1, int paperHeightA = -1,
@@ -88,6 +89,7 @@ public:
// Open a PSOutputDev that will write to a generic stream.
PSOutputDev(PSOutputFunc outputFuncA, void *outputStreamA,
char *psTitle,
+ PDFDoc *doc,
XRef *xrefA, Catalog *catalog,
int firstPage, int lastPage, PSOutMode modeA,
int paperWidthA = -1, int paperHeightA = -1,
@@ -145,9 +147,6 @@ public:
// Write the Xpdf procset.
void writeXpdfProcset();
- // Write the document-level setup.
- void writeDocSetup(Catalog *catalog, int firstPage, int lastPage, GBool duplexA);
-
// Write the trailer for the current page.
void writePageTrailer();
@@ -287,7 +286,7 @@ public:
private:
void init(PSOutputFunc outputFuncA, void *outputStreamA,
- PSFileType fileTypeA, char *pstitle, XRef *xrefA, Catalog *catalog,
+ PSFileType fileTypeA, char *pstitle, PDFDoc *doc, XRef *xrefA, Catalog *catalog,
int firstPage, int lastPage, PSOutMode modeA,
int imgLLXA, int imgLLYA, int imgURXA, int imgURYA,
GBool manualCtrlA, int paperWidthA, int paperHeightA,
@@ -341,6 +340,10 @@ private:
double *x1, double *y1);
#endif
void cvtFunction(Function *func);
+
+ // Write the document-level setup.
+ void writeDocSetup(PDFDoc *doc, Catalog *catalog, int firstPage, int lastPage, GBool duplexA);
+
void writePSChar(char c);
void writePS(char *s);
void writePSFmt(const char *fmt, ...);
diff --git a/qt/poppler-document.cc b/qt/poppler-document.cc
index 1a5892b..03d01fa 100644
--- a/qt/poppler-document.cc
+++ b/qt/poppler-document.cc
@@ -325,7 +325,7 @@ bool Document::print(const QString &fileName, QValueList<int> pageList, double h
bool Document::print(const QString &file, QValueList<int> pageList, double hDPI, double vDPI, int rotate, int paperWidth, int paperHeight)
{
- PSOutputDev *psOut = new PSOutputDev(file.latin1(), data->doc.getXRef(), data->doc.getCatalog(), NULL, 1, data->doc.getNumPages(), psModePS, paperWidth, paperHeight);
+ PSOutputDev *psOut = new PSOutputDev(file.latin1(), &(data->doc), data->doc.getXRef(), data->doc.getCatalog(), NULL, 1, data->doc.getNumPages(), psModePS, paperWidth, paperHeight);
if (psOut->isOk()) {
QValueList<int>::iterator it;
diff --git a/qt4/src/poppler-ps-converter.cc b/qt4/src/poppler-ps-converter.cc
index 7a1957b..9dc82ec 100644
--- a/qt4/src/poppler-ps-converter.cc
+++ b/qt4/src/poppler-ps-converter.cc
@@ -195,6 +195,7 @@ bool PSConverter::convert()
PSOutputDev *psOut = new PSOutputDev(outputToQIODevice, dev,
pstitlechar,
+ d->document->doc,
d->document->doc->getXRef(),
d->document->doc->getCatalog(),
1,
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 3c74c6e..0558e5c 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -350,7 +350,7 @@ int main(int argc, char *argv[]) {
psFileName = new GooString(htmlFileName->getCString());
psFileName->append(".ps");
- psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+ psOut = new PSOutputDev(psFileName->getCString(), doc, doc->getXRef(),
doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h);
psOut->setDisplayText(gFalse);
doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
diff --git a/utils/pdftops.cc b/utils/pdftops.cc
index 0bc43a1..8231458 100644
--- a/utils/pdftops.cc
+++ b/utils/pdftops.cc
@@ -359,7 +359,7 @@ int main(int argc, char *argv[]) {
}
// write PostScript file
- psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+ psOut = new PSOutputDev(psFileName->getCString(), doc, doc->getXRef(),
doc->getCatalog(), NULL, firstPage, lastPage, mode,
paperWidth,
paperHeight,
--
1.6.4.2
From da96db8212e13ec9f3227d5a9fde3378f28db016 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sat, 24 Apr 2010 10:17:56 +0200
Subject: [PATCH 10/17] Use PDFDoc::getPage() in HtmlOutputDev
---
utils/HtmlOutputDev.cc | 2 +-
utils/HtmlOutputDev.h | 2 ++
2 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index dbf677f..f47b2c1 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -1094,7 +1094,7 @@ void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
void HtmlOutputDev::endPage() {
- Links *linksList = catalog->getPage(pageNum)->getLinks(catalog);
+ Links *linksList = docPage->getLinks(catalog);
for (int i = 0; i < linksList->getNumLinks(); ++i)
{
doProcessLink(linksList->getLink(i));
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
index 24ccfd1..48b04c6 100644
--- a/utils/HtmlOutputDev.h
+++ b/utils/HtmlOutputDev.h
@@ -256,6 +256,7 @@ public:
GBool (* abortCheckCbk)(void *data) = NULL,
void * abortCheckCbkData = NULL)
{
+ docPage = page;
catalog = catalogA;
return gTrue;
}
@@ -323,6 +324,7 @@ private:
GooString *docTitle;
GooList *glMetaVars;
Catalog *catalog;
+ Page *docPage;
friend class HtmlPage;
};
--
1.6.4.2
From ae48a4ffebb9471f3ff5a729e6eb06090eed65e1 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 31 Mar 2010 14:39:57 +0200
Subject: [PATCH 11/17] Parse page tree on demand
---
poppler/Catalog.cc | 266 ++++++++++++++++++++++++++++++++++-----------------
poppler/Catalog.h | 12 ++-
2 files changed, 185 insertions(+), 93 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index dbf9af2..f6c3bb9 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -59,9 +59,6 @@ Catalog::Catalog(XRef *xrefA) {
Object catDict, pagesDict, pagesDictRef;
Object obj, obj2;
Object optContentProps;
- char *alreadyRead;
- int numPages0;
- int i;
ok = gTrue;
xref = xrefA;
@@ -78,6 +75,12 @@ Catalog::Catalog(XRef *xrefA) {
embeddedFileNameTree = NULL;
jsNameTree = NULL;
+ pagesList = NULL;
+ pagesRefList = NULL;
+ attrsList = NULL;
+ kidsIdxList = NULL;
+ lastCachedPage = 0;
+
xref->getCatalog(&catDict);
if (!catDict.isDict()) {
error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
@@ -100,31 +103,11 @@ Catalog::Catalog(XRef *xrefA) {
if (!obj.isNum()) {
error(-1, "Page count in top-level pages object is wrong type (%s)",
obj.getTypeName());
- pagesSize = numPages0 = 0;
+ numPages = 0;
} else {
- pagesSize = numPages0 = (int)obj.getNum();
+ numPages = (int)obj.getNum();
}
obj.free();
- pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
- pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
- for (i = 0; i < pagesSize; ++i) {
- pages[i] = NULL;
- pageRefs[i].num = -1;
- pageRefs[i].gen = -1;
- }
- alreadyRead = (char *)gmalloc(xref->getNumObjects());
- memset(alreadyRead, 0, xref->getNumObjects());
- if (catDict.dictLookupNF("Pages", &pagesDictRef)->isRef() &&
- pagesDictRef.getRefNum() >= 0 &&
- pagesDictRef.getRefNum() < xref->getNumObjects()) {
- alreadyRead[pagesDictRef.getRefNum()] = 1;
- }
- pagesDictRef.free();
- numPages = readPageTree(pagesDict.getDict(), NULL, 0, alreadyRead);
- gfree(alreadyRead);
- if (numPages != numPages0) {
- error(-1, "Page count in top-level pages object is incorrect");
- }
pagesDict.free();
// read base URI
@@ -163,6 +146,10 @@ Catalog::Catalog(XRef *xrefA) {
Catalog::~Catalog() {
int i;
+ delete kidsIdxList;
+ delete attrsList;
+ delete pagesRefList;
+ delete pagesList;
if (pages) {
for (i = 0; i < pagesSize; ++i) {
if (pages[i]) {
@@ -221,91 +208,192 @@ GooString *Catalog::readMetadata() {
return s;
}
-int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start,
- char *alreadyRead) {
- Object kids;
- Object kid;
- Object kidRef;
- PageAttrs *attrs1, *attrs2;
- Page *page;
- int i, j;
-
- attrs1 = new PageAttrs(attrs, pagesDict);
- pagesDict->lookup("Kids", &kids);
- if (!kids.isArray()) {
- error(-1, "Kids object (page %d) is wrong type (%s)",
- start+1, kids.getTypeName());
- return start;
- }
- for (i = 0; i < kids.arrayGetLength(); ++i) {
- kids.arrayGetNF(i, &kidRef);
- if (kidRef.isRef() &&
- kidRef.getRefNum() >= 0 &&
- kidRef.getRefNum() < xref->getNumObjects()) {
- if (alreadyRead[kidRef.getRefNum()]) {
- error(-1, "Loop in Pages tree");
- kidRef.free();
- continue;
+Page *Catalog::getPage(int i)
+{
+ if (i < 1) return NULL;
+
+ if (i > lastCachedPage) {
+ if (cachePageTree(i) == gFalse) return NULL;
+ }
+ return pages[i-1];
+}
+
+Ref *Catalog::getPageRef(int i)
+{
+ if (i < 1) return NULL;
+
+ if (i > lastCachedPage) {
+ if (cachePageTree(i) == gFalse) return NULL;
+ }
+ return &pageRefs[i-1];
+}
+
+GBool Catalog::cachePageTree(int page)
+{
+ Dict *pagesDict;
+
+ if (pagesList == NULL) {
+
+ Object catDict;
+ Ref pagesRef;
+
+ xref->getCatalog(&catDict);
+
+ Object pagesDictRef;
+ if (catDict.dictLookupNF("Pages", &pagesDictRef)->isRef() &&
+ pagesDictRef.getRefNum() >= 0 &&
+ pagesDictRef.getRefNum() < xref->getNumObjects()) {
+ pagesRef = pagesDictRef.getRef();
+ pagesDictRef.free();
+ } else {
+ error(-1, "Catalog dictionary does not contain a valid \"Pages\" entry");
+ pagesDictRef.free();
+ return gFalse;
+ }
+
+ Object obj;
+ catDict.dictLookup("Pages", &obj);
+ // This should really be isDict("Pages"), but I've seen at least one
+ // PDF file where the /Type entry is missing.
+ if (obj.isDict()) {
+ obj.getDict()->incRef();
+ pagesDict = obj.getDict();
+ obj.free();
+ }
+ else {
+ error(-1, "Top-level pages object is wrong type (%s)", obj.getTypeName());
+ obj.free();
+ return gFalse;
+ }
+
+ pagesSize = numPages;
+ pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
+ pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
+ for (int i = 0; i < pagesSize; ++i) {
+ pages[i] = NULL;
+ pageRefs[i].num = -1;
+ pageRefs[i].gen = -1;
+ }
+
+ pagesList = new GooVector<Dict *>();
+ pagesList->push_back(pagesDict);
+ pagesRefList = new GooVector<Ref>();
+ pagesRefList->push_back(pagesRef);
+ attrsList = new GooVector<PageAttrs *>();
+ attrsList->push_back(new PageAttrs(NULL, pagesDict));
+ kidsIdxList = new GooVector<int>();
+ kidsIdxList->push_back(0);
+ lastCachedPage = 0;
+
+ }
+
+ while(1) {
+
+ if (page <= lastCachedPage) return gTrue;
+
+ if (pagesList->empty()) return gFalse;
+
+ pagesDict = pagesList->back();
+ Object kids;
+ pagesDict->lookup("Kids", &kids);
+ if (!kids.isArray()) {
+ error(-1, "Kids object (page %d) is wrong type (%s)",
+ lastCachedPage+1, kids.getTypeName());
+ kids.free();
+ return gFalse;
+ }
+
+ int kidsIdx = kidsIdxList->back();
+ if (kidsIdx >= kids.arrayGetLength()) {
+ delete pagesList->back();
+ pagesList->pop_back();
+ pagesRefList->pop_back();
+ delete attrsList->back();
+ attrsList->pop_back();
+ kidsIdxList->pop_back();
+ if (!kidsIdxList->empty()) kidsIdxList->back()++;
+ kids.free();
+ continue;
+ }
+
+ Object kidRef;
+ kids.arrayGetNF(kidsIdx, &kidRef);
+ if (!kidRef.isRef()) {
+ error(-1, "Kid object (page %d) is not an indirect reference (%s)",
+ lastCachedPage+1, kidRef.getTypeName());
+ kidRef.free();
+ kids.free();
+ return gFalse;
+ }
+
+ for (size_t i = 0; i < pagesRefList->size(); i++) {
+ if (((*pagesRefList)[i]).num == kidRef.getRefNum()) {
+ error(-1, "Loop in Pages tree");
+ kidRef.free();
+ kids.free();
+ kidsIdxList->back()++;
+ continue;
}
- alreadyRead[kidRef.getRefNum()] = 1;
}
- kids.arrayGet(i, &kid);
+
+ Object kid;
+ kids.arrayGet(kidsIdx, &kid);
+ kids.free();
if (kid.isDict("Page")) {
- attrs2 = new PageAttrs(attrs1, kid.getDict());
- page = new Page(xref, start+1, kid.getDict(), kidRef.getRef(), attrs2, getForm());
- if (!page->isOk()) {
- ++start;
- goto err3;
- }
- if (start >= pagesSize) {
- pagesSize += 32;
- pages = (Page **)greallocn(pages, pagesSize, sizeof(Page *));
- pageRefs = (Ref *)greallocn(pageRefs, pagesSize, sizeof(Ref));
- for (j = pagesSize - 32; j < pagesSize; ++j) {
- pages[j] = NULL;
- pageRefs[j].num = -1;
- pageRefs[j].gen = -1;
- }
+ PageAttrs *attrs = new PageAttrs(attrsList->back(), kid.getDict());
+ Page *p = new Page(xref, lastCachedPage+1, kid.getDict(),
+ kidRef.getRef(), attrs, form);
+ if (!p->isOk()) {
+ error(-1, "Failed to create page (page %d)", lastCachedPage+1);
+ delete p;
+ kidRef.free();
+ kid.free();
+ return gFalse;
}
- pages[start] = page;
- if (kidRef.isRef()) {
- pageRefs[start].num = kidRef.getRefNum();
- pageRefs[start].gen = kidRef.getRefGen();
+
+ if (lastCachedPage >= numPages) {
+ error(-1, "Page count in top-level pages object is incorrect");
+ kidRef.free();
+ kid.free();
+ return gFalse;
}
- ++start;
+
+ pages[lastCachedPage] = p;
+ pageRefs[lastCachedPage].num = kidRef.getRefNum();
+ pageRefs[lastCachedPage].gen = kidRef.getRefGen();
+
+ lastCachedPage++;
+ kidsIdxList->back()++;
+
// This should really be isDict("Pages"), but I've seen at least one
// PDF file where the /Type entry is missing.
} else if (kid.isDict()) {
- if ((start = readPageTree(kid.getDict(), attrs1, start, alreadyRead))
- < 0)
- goto err2;
+ attrsList->push_back(new PageAttrs(attrsList->back(), kid.getDict()));
+ pagesRefList->push_back(kidRef.getRef());
+ kid.getDict()->incRef();
+ pagesList->push_back(kid.getDict());
+ kidsIdxList->push_back(0);
} else {
error(-1, "Kid object (page %d) is wrong type (%s)",
- start+1, kid.getTypeName());
+ lastCachedPage+1, kid.getTypeName());
+ kidRef.free();
+ kid.free();
+ return gFalse;
}
- kid.free();
kidRef.free();
+ kid.free();
+
}
- delete attrs1;
- kids.free();
- return start;
- err3:
- delete page;
- err2:
- kid.free();
- kidRef.free();
- kids.free();
- delete attrs1;
- ok = gFalse;
- return -1;
+ return gFalse;
}
int Catalog::findPage(int num, int gen) {
int i;
for (i = 0; i < numPages; ++i) {
- if (pageRefs[i].num == num && pageRefs[i].gen == gen)
+ Ref *ref = getPageRef(i+1);
+ if (ref->num == num && ref->gen == gen)
return i + 1;
}
return 0;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 2cab80a..5a25109 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -151,10 +151,10 @@ public:
int getNumPages() { return numPages; }
// Get a page.
- Page *getPage(int i) { return pages[i-1]; }
+ Page *getPage(int i);
// Get the reference for a page object.
- Ref *getPageRef(int i) { return &pageRefs[i-1]; }
+ Ref *getPageRef(int i);
// Return base URI, or NULL if none.
GooString *getBaseURI() { return baseURI; }
@@ -232,6 +232,11 @@ private:
XRef *xref; // the xref table for this PDF file
Page **pages; // array of pages
Ref *pageRefs; // object ID for each page
+ int lastCachedPage;
+ GooVector<Dict *> *pagesList;
+ GooVector<Ref> *pagesRefList;
+ GooVector<PageAttrs *> *attrsList;
+ GooVector<int> *kidsIdxList;
Form *form;
int numPages; // number of pages
int pagesSize; // size of pages array
@@ -251,8 +256,7 @@ private:
PageMode pageMode; // page mode
PageLayout pageLayout; // page layout
- int readPageTree(Dict *pages, PageAttrs *attrs, int start,
- char *alreadyRead);
+ GBool cachePageTree(int page); // Cache first <page> pages.
Object *findDestInTree(Object *tree, GooString *name, Object *obj);
Object *getNames();
--
1.6.4.2
From 6a732e32577e3371f67067094dd7879ed00db116 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 22:01:41 +0100
Subject: [PATCH 12/17] Parse number of pages on demand
---
poppler/Catalog.cc | 70 +++++++++++++++++++++++++++++++--------------------
poppler/Catalog.h | 2 +-
2 files changed, 43 insertions(+), 29 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index f6c3bb9..35bfad4 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -64,7 +64,8 @@ Catalog::Catalog(XRef *xrefA) {
xref = xrefA;
pages = NULL;
pageRefs = NULL;
- numPages = pagesSize = 0;
+ numPages = -1;
+ pagesSize = 0;
baseURI = NULL;
pageLabelInfo = NULL;
form = NULL;
@@ -89,27 +90,6 @@ Catalog::Catalog(XRef *xrefA) {
// get the AcroForm dictionary
catDict.dictLookup("AcroForm", &acroForm);
- // read page tree
- catDict.dictLookup("Pages", &pagesDict);
- // This should really be isDict("Pages"), but I've seen at least one
- // PDF file where the /Type entry is missing.
- if (!pagesDict.isDict()) {
- error(-1, "Top-level pages object is wrong type (%s)",
- pagesDict.getTypeName());
- goto err2;
- }
- pagesDict.dictLookup("Count", &obj);
- // some PDF files actually use real numbers here ("/Count 9.0")
- if (!obj.isNum()) {
- error(-1, "Page count in top-level pages object is wrong type (%s)",
- obj.getTypeName());
- numPages = 0;
- } else {
- numPages = (int)obj.getNum();
- }
- obj.free();
- pagesDict.free();
-
// read base URI
if (catDict.dictLookup("URI", &obj)->isDict()) {
if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -136,8 +116,6 @@ Catalog::Catalog(XRef *xrefA) {
catDict.free();
return;
- err2:
- pagesDict.free();
err1:
catDict.free();
ok = gFalse;
@@ -266,7 +244,7 @@ GBool Catalog::cachePageTree(int page)
return gFalse;
}
- pagesSize = numPages;
+ pagesSize = getNumPages();
pages = (Page **)gmallocn(pagesSize, sizeof(Page *));
pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref));
for (int i = 0; i < pagesSize; ++i) {
@@ -391,7 +369,7 @@ GBool Catalog::cachePageTree(int page)
int Catalog::findPage(int num, int gen) {
int i;
- for (i = 0; i < numPages; ++i) {
+ for (i = 0; i < getNumPages(); ++i) {
Ref *ref = getPageRef(i+1);
if (ref->num == num && ref->gen == gen)
return i + 1;
@@ -711,7 +689,7 @@ GBool Catalog::labelToIndex(GooString *label, int *index)
return gFalse;
}
- if (*index < 0 || *index >= numPages)
+ if (*index < 0 || *index >= getNumPages())
return gFalse;
return gTrue;
@@ -721,7 +699,7 @@ GBool Catalog::indexToLabel(int index, GooString *label)
{
char buffer[32];
- if (index < 0 || index >= numPages)
+ if (index < 0 || index >= getNumPages())
return gFalse;
PageLabelInfo *pli = getPageLabelInfo();
@@ -837,6 +815,42 @@ EmbFile::EmbFile(Object *efDict, GooString *description)
m_mimetype = new GooString();
}
+int Catalog::getNumPages()
+{
+ if (numPages == -1)
+ {
+ Object catDict, pagesDict, obj;
+
+ xref->getCatalog(&catDict);
+ catDict.dictLookup("Pages", &pagesDict);
+ catDict.free();
+
+ // This should really be isDict("Pages"), but I've seen at least one
+ // PDF file where the /Type entry is missing.
+ if (!pagesDict.isDict()) {
+ error(-1, "Top-level pages object is wrong type (%s)",
+ pagesDict.getTypeName());
+ pagesDict.free();
+ return 0;
+ }
+
+ pagesDict.dictLookup("Count", &obj);
+ // some PDF files actually use real numbers here ("/Count 9.0")
+ if (!obj.isNum()) {
+ error(-1, "Page count in top-level pages object is wrong type (%s)",
+ obj.getTypeName());
+ numPages = 0;
+ } else {
+ numPages = (int)obj.getNum();
+ }
+
+ obj.free();
+ pagesDict.free();
+ }
+
+ return numPages;
+}
+
PageLabelInfo *Catalog::getPageLabelInfo()
{
if (!pageLabelInfo) {
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 5a25109..8bca80b 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -148,7 +148,7 @@ public:
GBool isOk() { return ok; }
// Get number of pages.
- int getNumPages() { return numPages; }
+ int getNumPages();
// Get a page.
Page *getPage(int i);
--
1.6.4.2
From 18129a510273f5a97f6da17c7ef12852c1e617b2 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 18:53:54 +0100
Subject: [PATCH 13/17] Get number of pages from linearization table
---
poppler/PDFDoc.cc | 9 +++++++++
poppler/PDFDoc.h | 2 +-
2 files changed, 10 insertions(+), 1 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index e1b00fc..cf7a85a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1032,6 +1032,15 @@ Guint PDFDoc::getMainXRefEntriesOffset()
return mainXRefEntriesOffset;
}
+int PDFDoc::getNumPages()
+{
+ if (isLinearized()) {
+ return getLinearization()->getNumPages();
+ } else {
+ return catalog->getNumPages();
+ }
+}
+
Page *PDFDoc::getPage(int page)
{
if ((page < 1) || page > getNumPages()) return NULL;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 8de139f..9069698 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -118,7 +118,7 @@ public:
{ return getPage(page) ? getPage(page)->getRotate() : 0 ; }
// Get number of pages.
- int getNumPages() { return catalog->getNumPages(); }
+ int getNumPages();
// Return the contents of the metadata stream, or NULL if there is
// no metadata.
--
1.6.4.2
From 45eca9ae14cd11bd05395711b8d4d067e0ab1298 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 4 Aug 2010 18:09:36 +0200
Subject: [PATCH 14/17] Keep security handler available in PDFDoc
---
poppler/PDFDoc.cc | 4 ++--
poppler/PDFDoc.h | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index cf7a85a..4f2c3b1 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -99,6 +99,7 @@ void PDFDoc::init()
outline = NULL;
#endif
startXRefPos = ~(Guint)0;
+ secHdlr = NULL;
}
PDFDoc::PDFDoc()
@@ -258,6 +259,7 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
}
PDFDoc::~PDFDoc() {
+ delete secHdlr;
#ifndef DISABLE_OUTLINE
if (outline) {
delete outline;
@@ -354,7 +356,6 @@ void PDFDoc::checkHeader() {
GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
Object encrypt;
GBool encrypted;
- SecurityHandler *secHdlr;
GBool ret;
xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
@@ -374,7 +375,6 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword)
// authorization failed
ret = gFalse;
}
- delete secHdlr;
} else {
// couldn't find the matching security handler
ret = gFalse;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 9069698..55fdb2b 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -49,6 +49,7 @@ class LinkAction;
class LinkDest;
class Outline;
class Linearization;
+class SecurityHandler;
enum PDFWriteMode {
writeStandard,
@@ -257,6 +258,7 @@ private:
int pdfMinorVersion;
Linearization *linearization;
XRef *xref;
+ SecurityHandler *secHdlr;
Catalog *catalog;
#ifndef DISABLE_OUTLINE
Outline *outline;
--
1.6.4.2
From 4cf98ea25e08647e4efb7abd5600386de7ed92c7 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Wed, 24 Mar 2010 22:03:27 +0100
Subject: [PATCH 15/17] Add hint tables support
---
CMakeLists.txt | 2 +
poppler/Hints.cc | 420 +++++++++++++++++++++++++++++++++++++++++++++++++++
poppler/Hints.h | 92 +++++++++++
poppler/Makefile.am | 2 +
poppler/PDFDoc.cc | 14 ++
poppler/PDFDoc.h | 5 +
6 files changed, 535 insertions(+), 0 deletions(-)
create mode 100644 poppler/Hints.cc
create mode 100644 poppler/Hints.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3b71963..52b7281 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,6 +247,7 @@ set(poppler_SRCS
poppler/GfxFont.cc
poppler/GfxState.cc
poppler/GlobalParams.cc
+ poppler/Hints.cc
poppler/JArithmeticDecoder.cc
poppler/JBIG2Stream.cc
poppler/Lexer.cc
@@ -394,6 +395,7 @@ if(ENABLE_XPDF_HEADERS)
poppler/GfxState.h
poppler/GfxState_helpers.h
poppler/GlobalParams.h
+ poppler/Hints.h
poppler/JArithmeticDecoder.h
poppler/JBIG2Stream.h
poppler/Lexer.h
diff --git a/poppler/Hints.cc b/poppler/Hints.cc
new file mode 100644
index 0000000..a8aa241
--- /dev/null
+++ b/poppler/Hints.cc
@@ -0,0 +1,420 @@
+//========================================================================
+//
+// Hints.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#include <config.h>
+
+#include "Hints.h"
+
+#include "Linearization.h"
+#include "Object.h"
+#include "Stream.h"
+#include "XRef.h"
+#include "Parser.h"
+#include "Lexer.h"
+#include "SecurityHandler.h"
+
+#include <limits.h>
+
+//------------------------------------------------------------------------
+// Hints
+//------------------------------------------------------------------------
+
+Hints::Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr)
+{
+ mainXRefEntriesOffset = linearization->getMainXRefEntriesOffset();
+ nPages = linearization->getNumPages();
+ pageFirst = linearization->getPageFirst();
+ pageEndFirst = linearization->getEndFirst();
+
+ if (nPages >= INT_MAX / (int)sizeof(Guint)) {
+ error(-1, "Invalid number of pages (%d) for hints table", nPages);
+ nPages = 0;
+ }
+ nObjects = (Guint *) gmallocn(nPages, sizeof(Guint));
+ xRefOffset = (Guint *) gmallocn(nPages, sizeof(Guint));
+ pageLength = (Guint *) gmallocn(nPages, sizeof(Guint));
+ pageOffset = (Guint *) gmallocn(nPages, sizeof(Guint));
+ numSharedObject = (Guint *) gmallocn(nPages, sizeof(Guint));
+ sharedObjectId = (Guint **) gmallocn(nPages, sizeof(Guint*));
+ if (!nObjects || !xRefOffset || !pageLength || !pageOffset ||
+ !numSharedObject || !sharedObjectId) {
+ error(-1, "Failed to allocate memory for hints tabel");
+ nPages = 0;
+ }
+
+ memset(numSharedObject, 0, nPages);
+
+ nSharedGroups = 0;
+ groupLength = NULL;
+ groupOffset = NULL;
+ groupHasSignature = NULL;
+ groupNumObjects = NULL;
+ groupXRefOffset = NULL;
+
+ readTables(str, linearization, xref, secHdlr);
+}
+
+Hints::~Hints()
+{
+ gfree(nObjects);
+ gfree(xRefOffset);
+ gfree(pageLength);
+ gfree(pageOffset);
+ for (int i=0; i< nPages; i++) {
+ if (numSharedObject[i]) {
+ gfree(sharedObjectId[i]);
+ }
+ }
+ gfree(sharedObjectId);
+ gfree(numSharedObject);
+
+ gfree(groupLength);
+ gfree(groupOffset);
+ gfree(groupHasSignature);
+ gfree(groupNumObjects);
+ gfree(groupXRefOffset);
+}
+
+void Hints::readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr)
+{
+ hintsOffset = linearization->getHintsOffset();
+ hintsLength = linearization->getHintsLength();
+ hintsOffset2 = linearization->getHintsOffset2();
+ hintsLength2 = linearization->getHintsLength2();
+
+ Parser *parser;
+ Object obj;
+
+ int bufLength = hintsLength + hintsLength2;
+
+ char buf[bufLength];
+ char *p = buf;
+
+ obj.initNull();
+ Stream *s = str->makeSubStream(hintsOffset, gFalse, hintsLength, &obj);
+ s->reset();
+ for (Guint i=0; i < hintsLength; i++) { *p++ = s->getChar(); }
+ delete s;
+
+ if (hintsOffset2 && hintsLength2) {
+ obj.initNull();
+ s = str->makeSubStream(hintsOffset2, gFalse, hintsLength2, &obj);
+ s->reset();
+ for (Guint i=0; i < hintsLength2; i++) { *p++ = s->getChar(); }
+ delete s;
+ }
+
+ obj.initNull();
+ MemStream *memStream = new MemStream (buf, 0, bufLength, &obj);
+
+ obj.initNull();
+ parser = new Parser(xref, new Lexer(xref, memStream), gTrue);
+
+ int num, gen;
+ if (parser->getObj(&obj)->isInt() &&
+ (num = obj.getInt(), obj.free(), parser->getObj(&obj)->isInt()) &&
+ (gen = obj.getInt(), obj.free(), parser->getObj(&obj)->isCmd("obj")) &&
+ (obj.free(), parser->getObj(&obj,
+ secHdlr ? secHdlr->getFileKey() : (Guchar *)NULL,
+ secHdlr ? secHdlr->getEncAlgorithm() : cryptRC4,
+ secHdlr ? secHdlr->getFileKeyLength() : 0,
+ num, gen)->isStream())) {
+ Stream *hintsStream = obj.getStream();
+ Dict *hintsDict = obj.streamGetDict();
+
+ int sharedStreamOffset = 0;
+ if (hintsDict->lookupInt("S", NULL, &sharedStreamOffset) &&
+ sharedStreamOffset > 0) {
+
+ hintsStream->reset();
+ readPageOffsetTable(hintsStream);
+
+ hintsStream->reset();
+ for (int i=0; i<sharedStreamOffset; i++) hintsStream->getChar();
+ readSharedObjectsTable(hintsStream);
+ } else {
+ error(-1, "Invalid shared object hint table offset");
+ }
+ } else {
+ error(-1, "Failed parsing hints table object");
+ }
+ obj.free();
+
+ delete parser;
+}
+
+void Hints::readPageOffsetTable(Stream *str)
+{
+ if (nPages < 1) {
+ error(-1, "Invalid number of pages reading page offset hints table");
+ return;
+ }
+
+ inputBits = 0; // reset on byte boundary.
+
+ nObjectLeast = readBits(32, str);
+
+ objectOffsetFirst = readBits(32, str);
+ if (objectOffsetFirst >= hintsOffset) objectOffsetFirst += hintsLength;
+
+ nBitsDiffObjects = readBits(16, str);
+
+ pageLengthLeast = readBits(32, str);
+
+ nBitsDiffPageLength = readBits(16, str);
+
+ OffsetStreamLeast = readBits(32, str);
+
+ nBitsOffsetStream = readBits(16, str);
+
+ lengthStreamLeast = readBits(32, str);
+
+ nBitsLengthStream = readBits(16, str);
+
+ nBitsNumShared = readBits(16, str);
+
+ nBitsShared = readBits(16, str);
+
+ nBitsNumerator = readBits(16, str);
+
+ denominator = readBits(16, str);
+
+ for (int i=0; i<nPages; i++) {
+ nObjects[i] = nObjectLeast + readBits(nBitsDiffObjects, str);
+ }
+
+ nObjects[0] = 0;
+ xRefOffset[0] = mainXRefEntriesOffset + 20;
+ for (int i=1; i<nPages; i++) {
+ xRefOffset[i] = xRefOffset[i-1] + 20*nObjects[i-1];
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ for (int i=0; i<nPages; i++) {
+ pageLength[i] = pageLengthLeast + readBits(nBitsDiffPageLength, str);
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ numSharedObject[0] = readBits(nBitsNumShared, str);
+ numSharedObject[0] = 0; // Do not trust the read value to be 0.
+ sharedObjectId[0] = NULL;
+ for (int i=1; i<nPages; i++) {
+ numSharedObject[i] = readBits(nBitsNumShared, str);
+ if (numSharedObject[i] >= INT_MAX / (int)sizeof(Guint)) {
+ error(-1, "Invalid number of shared objects");
+ numSharedObject[i] = 0;
+ return;
+ }
+ sharedObjectId[i] = (Guint *) gmallocn(numSharedObject[i], sizeof(Guint));
+ if (numSharedObject[i] && !sharedObjectId[i]) {
+ error(-1, "Failed to allocate memory for shared object IDs");
+ numSharedObject[i] = 0;
+ return;
+ }
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ for (int i=1; i<nPages; i++) {
+ for (Guint j=0; j < numSharedObject[i]; j++) {
+ sharedObjectId[i][j] = readBits(nBitsShared, str);
+ }
+ }
+
+ pageOffset[0] = objectOffsetFirst;
+ // set fake pageOffset[0] to correct for hint table.
+ if (pageOffset[0] < hintsOffset) {
+ pageOffset[0] += hintsLength;
+ }
+ // find pageOffsets.
+ for (int i=1; i<nPages; i++) {
+ pageOffset[i] = pageOffset[i-1] + pageLength[i-1];
+ }
+ // restore correct pageOffset[0].
+ pageOffset[0] = objectOffsetFirst;
+
+}
+
+void Hints::readSharedObjectsTable(Stream *str)
+{
+ inputBits = 0; // reset on byte boundary.
+
+ Guint firstSharedObjectNumber = readBits(32, str);
+
+ Guint firstSharedObjectOffset = readBits(32, str);
+ firstSharedObjectOffset += hintsLength;
+
+ Guint nSharedGroupsFirst = readBits(32, str);
+
+ Guint nSharedGroups = readBits(32, str);
+
+ Guint nBitsNumObjects = readBits(16, str);
+
+ Guint groupLengthLeast = readBits(32, str);
+
+ Guint nBitsDiffGroupLength = readBits(16, str);
+
+ if ((!nSharedGroups) || (nSharedGroups >= INT_MAX / (int)sizeof(Guint))) {
+ error(-1, "Invalid number of shared object groups");
+ nSharedGroups = 0;
+ return;
+ }
+ if ((!nSharedGroupsFirst) || (nSharedGroupsFirst > nSharedGroups)) {
+ error(-1, "Invalid number of first page shared object groups");
+ nSharedGroupsFirst = nSharedGroups;
+ }
+
+ groupLength = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+ groupOffset = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+ groupHasSignature = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+ groupNumObjects = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+ groupXRefOffset = (Guint *) gmallocn(nSharedGroups, sizeof(Guint));
+ if (!groupLength || !groupOffset || !groupHasSignature ||
+ !groupNumObjects || !groupXRefOffset) {
+ error(-1, "Failed to allocate memory for shared object groups");
+ nSharedGroups = 0;
+ return;
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ for (Guint i=0; i<nSharedGroups; i++) {
+ groupLength[i] = groupLengthLeast + readBits(nBitsDiffGroupLength, str);
+ }
+
+ groupOffset[0] = objectOffsetFirst;
+ for (Guint i=1; i<nSharedGroupsFirst; i++) {
+ groupOffset[i] = groupOffset[i-1] + groupLength[i-1];
+ }
+ if (nSharedGroups > nSharedGroupsFirst ) {
+ groupOffset[nSharedGroupsFirst] = firstSharedObjectOffset;
+ for (Guint i=nSharedGroupsFirst+1; i<nSharedGroups; i++) {
+ groupOffset[i] = groupOffset[i-1] + groupLength[i-1];
+ }
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ for (Guint i=0; i<nSharedGroups; i++) {
+ groupHasSignature[i] = readBits(1, str);
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ for (Guint i=0; i<nSharedGroups; i++) {
+ if (groupHasSignature[i]) {
+ readBits(128, str);
+ }
+ }
+
+ inputBits = 0; // reset on byte boundary. Not in specs!
+ for (Guint i=0; i<nSharedGroups; i++) {
+ groupNumObjects[i] =
+ nBitsNumObjects ? 1 + readBits(nBitsNumObjects, str) : 1;
+ }
+
+ for (Guint i=0; i<nSharedGroupsFirst; i++) {
+ groupNumObjects[i] = 0;
+ groupXRefOffset[i] = 0;
+ }
+ if (nSharedGroups > nSharedGroupsFirst ) {
+ groupXRefOffset[nSharedGroupsFirst] =
+ mainXRefEntriesOffset + 20*firstSharedObjectNumber;
+ for (Guint i=nSharedGroupsFirst+1; i<nSharedGroups; i++) {
+ groupXRefOffset[i] = groupXRefOffset[i-1] + 20*groupNumObjects[i-1];
+ }
+ }
+}
+
+Guint Hints::getPageOffset(int page)
+{
+ if ((page < 1) || (page > nPages)) return 0;
+
+ if (page-1 > pageFirst)
+ return pageOffset[page-1];
+ else if (page-1 < pageFirst)
+ return pageOffset[page];
+ else
+ return pageOffset[0];
+}
+
+GooVector<ByteRange>* Hints::getPageRanges(int page)
+{
+ if ((page < 1) || (page > nPages)) return NULL;
+
+ int idx;
+ if (page-1 > pageFirst)
+ idx = page-1;
+ else if (page-1 < pageFirst)
+ idx = page;
+ else
+ idx = 0;
+
+ ByteRange pageRange;
+ GooVector<ByteRange> *v = new GooVector<ByteRange>;
+
+ pageRange.offset = pageOffset[idx];
+ pageRange.length = pageLength[idx];
+ v->push_back(pageRange);
+
+ pageRange.offset = xRefOffset[idx];
+ pageRange.length = 20*nObjects[idx];
+ v->push_back(pageRange);
+
+ for (Guint j=0; j<numSharedObject[idx]; j++) {
+ Guint k = sharedObjectId[idx][j];
+
+ pageRange.offset = groupOffset[k];
+ pageRange.length = groupLength[k];
+ v->push_back(pageRange);
+
+ pageRange.offset = groupXRefOffset[k];
+ pageRange.length = 20*groupNumObjects[k];
+ v->push_back(pageRange);
+ }
+
+ return v;
+}
+
+Guint Hints::readBit(Stream *str)
+{
+ Guint bit;
+ int c;
+
+ if (inputBits == 0) {
+ if ((c = str->getChar()) == EOF) {
+ return (Guint) -1;
+ }
+ bitsBuffer = c;
+ inputBits = 8;
+ }
+ bit = (bitsBuffer >> (inputBits - 1)) & 1;
+ --inputBits;
+ return bit;
+}
+
+Guint Hints::readBits(int n, Stream *str)
+{
+ Guint bit, bits;
+
+ if (n < 0) return -1;
+ if (n == 0) return 0;
+
+ if (n == 1)
+ return readBit(str);
+
+ bit = (readBit(str) << (n-1));
+ if (bit == (Guint) -1)
+ return -1;
+
+ bits = readBits(n-1, str);
+ if (bits == (Guint) -1)
+ return -1;
+
+ return bit | bits;
+}
+
+
diff --git a/poppler/Hints.h b/poppler/Hints.h
new file mode 100644
index 0000000..3a52d67
--- /dev/null
+++ b/poppler/Hints.h
@@ -0,0 +1,92 @@
+//========================================================================
+//
+// Hints.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2010 Hib Eris <hib at hiberis.nl>
+//
+//========================================================================
+
+#ifndef HINTS_H
+#define HINTS_H
+
+#include <string.h>
+#include "goo/gtypes.h"
+#include "goo/GooVector.h"
+//#include <vector>
+#include "PDFDoc.h"
+
+class Stream;
+class BaseStream;
+class Linearization;
+class XRef;
+
+//------------------------------------------------------------------------
+// Hints
+//------------------------------------------------------------------------
+
+class Hints {
+public:
+
+ Hints(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
+ ~Hints();
+
+ Guint getPageOffset(int page);
+ GooVector<ByteRange>* getPageRanges(int page);
+
+private:
+
+ void readTables(BaseStream *str, Linearization *linearization, XRef *xref, SecurityHandler *secHdlr);
+ void readPageOffsetTable(Stream *str);
+ void readSharedObjectsTable(Stream *str);
+
+ Guint readBit(Stream *str);
+ Guint readBits(int n, Stream *str);
+
+ Guint hintsOffset;
+ Guint hintsLength;
+ Guint hintsOffset2;
+ Guint hintsLength2;
+ Guint mainXRefEntriesOffset;
+
+ int nPages;
+ int pageFirst;
+ Guint pageOffsetFirst;
+ Guint pageEndFirst;
+ int objectNumberFirst;
+
+ Guint nObjectLeast;
+ Guint objectOffsetFirst;
+ Guint nBitsDiffObjects;
+ Guint pageLengthLeast;
+ Guint nBitsDiffPageLength;
+ Guint OffsetStreamLeast;
+ Guint nBitsOffsetStream;
+ Guint lengthStreamLeast;
+ Guint nBitsLengthStream;
+ Guint nBitsNumShared;
+ Guint nBitsShared;
+ Guint nBitsNumerator;
+ Guint denominator;
+
+ Guint *nObjects;
+ Guint *xRefOffset;
+ Guint *pageLength;
+ Guint *pageOffset;
+ Guint *numSharedObject;
+ Guint **sharedObjectId;
+
+ Guint nSharedGroups;
+ Guint *groupLength;
+ Guint *groupOffset;
+ Guint *groupHasSignature;
+ Guint *groupNumObjects;
+ Guint *groupXRefOffset;
+
+ int inputBits;
+ char bitsBuffer;
+
+};
+
+#endif
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index 522f27e..b445c18 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -206,6 +206,7 @@ poppler_include_HEADERS = \
GfxState.h \
GfxState_helpers.h \
GlobalParams.h \
+ Hints.h \
JArithmeticDecoder.h \
JBIG2Stream.h \
Lexer.h \
@@ -285,6 +286,7 @@ libpoppler_la_SOURCES = \
GfxFont.cc \
GfxState.cc \
GlobalParams.cc \
+ Hints.cc \
JArithmeticDecoder.cc \
JBIG2Stream.cc \
Lexer.cc \
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 4f2c3b1..88d2f25 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -68,6 +68,7 @@
#include "Outline.h"
#endif
#include "PDFDoc.h"
+#include "Hints.h"
//------------------------------------------------------------------------
@@ -95,6 +96,7 @@ void PDFDoc::init()
xref = NULL;
linearization = NULL;
catalog = NULL;
+ hints = NULL;
#ifndef DISABLE_OUTLINE
outline = NULL;
#endif
@@ -271,6 +273,9 @@ PDFDoc::~PDFDoc() {
if (xref) {
delete xref;
}
+ if (hints) {
+ delete hints;
+ }
if (linearization) {
delete linearization;
}
@@ -470,6 +475,15 @@ GBool PDFDoc::isLinearized() {
return gFalse;
}
+Hints *PDFDoc::getHints()
+{
+ if (!hints && isLinearized()) {
+ hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
+ }
+
+ return hints;
+}
+
int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
FILE *f;
OutStream *outStr;
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 55fdb2b..6e6b6ac 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -50,6 +50,7 @@ class LinkDest;
class Outline;
class Linearization;
class SecurityHandler;
+class Hints;
enum PDFWriteMode {
writeStandard,
@@ -237,6 +238,9 @@ private:
void saveIncrementalUpdate (OutStream* outStr);
void saveCompleteRewrite (OutStream* outStr);
+ // Get hints.
+ Hints *getHints();
+
PDFDoc();
void init();
GBool setup(GooString *ownerPassword, GooString *userPassword);
@@ -260,6 +264,7 @@ private:
XRef *xref;
SecurityHandler *secHdlr;
Catalog *catalog;
+ Hints *hints;
#ifndef DISABLE_OUTLINE
Outline *outline;
#endif
--
1.6.4.2
From bfdc1bf5dfad11addfaca85493472f9d45c35877 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Tue, 20 Apr 2010 19:06:02 +0200
Subject: [PATCH 16/17] Use hint tables for PDFDoc::getPage()
---
poppler/PDFDoc.cc | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
poppler/PDFDoc.h | 4 +++
2 files changed, 79 insertions(+), 1 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 88d2f25..190e87e 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -102,6 +102,7 @@ void PDFDoc::init()
#endif
startXRefPos = ~(Guint)0;
secHdlr = NULL;
+ pageCache = NULL;
}
PDFDoc::PDFDoc()
@@ -261,6 +262,14 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
}
PDFDoc::~PDFDoc() {
+ if (pageCache) {
+ for (int i = 0; i < getNumPages(); i++) {
+ if (pageCache[i]) {
+ delete pageCache[i];
+ }
+ }
+ gfree(pageCache);
+ }
delete secHdlr;
#ifndef DISABLE_OUTLINE
if (outline) {
@@ -1055,11 +1064,76 @@ int PDFDoc::getNumPages()
}
}
+Guint PDFDoc::getPageOffset(int page)
+{
+ if (isLinearized() && (page-1 == getLinearization()->getPageFirst())) {
+ return xref->getEntry(linearization->getObjectNumberFirst())->offset;
+ }
+
+ Guint offset;
+ if (getHints() && (offset = getHints()->getPageOffset(page))) {
+ return offset;
+ } else {
+ error(-1, "Failed getting page offset from hint table");
+ return 0;
+ }
+}
+
+Page *PDFDoc::parsePage(Guint offset, int page)
+{
+ Page *p = NULL;
+ Object obj;
+
+ obj.initNull();
+ Stream *stream = str->makeSubStream(offset, gFalse, 0, &obj);
+ Parser parser = Parser(xref, new Lexer(xref, stream), gTrue);
+
+ Object obj1, obj2, obj3, obj4;
+ if (parser.getObj(&obj1)->isInt() &&
+ parser.getObj(&obj2)->isInt() &&
+ parser.getObj(&obj3)->isCmd("obj") &&
+ parser.getObj(&obj4)->isDict("Page")) {
+ Ref pageRef;
+ Dict *pageDict;
+ pageRef.num = obj1.getInt();
+ pageRef.gen = obj2.getInt();
+ pageDict = obj4.getDict();
+ p = new Page(xref, page, pageDict, pageRef,
+ new PageAttrs(NULL, pageDict),
+ catalog->getForm());
+ if (!p->isOk()) {
+ delete p;
+ p = NULL;
+ }
+ }
+ obj4.free();
+ obj3.free();
+ obj2.free();
+ obj1.free();
+
+ return p;
+}
+
Page *PDFDoc::getPage(int page)
{
if ((page < 1) || page > getNumPages()) return NULL;
- {
+ if (isLinearized()) {
+ if (!pageCache) {
+ pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
+ for (int i = 0; i < getNumPages(); i++) {
+ pageCache[i] = NULL;
+ }
+ }
+ if (!pageCache[page-1]) {
+ pageCache[page-1] = parsePage(getPageOffset(page), page);
+ if (!pageCache[page-1]) {
+ error(-1, "Failed parsing page %d at offset %d",
+ page, getPageOffset(page));
+ }
+ }
+ return pageCache[page-1];
+ } else {
return catalog->getPage(page);
}
}
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 6e6b6ac..d661abe 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -238,6 +238,9 @@ private:
void saveIncrementalUpdate (OutStream* outStr);
void saveCompleteRewrite (OutStream* outStr);
+ Guint getPageOffset(int page);
+ Page *parsePage(Guint offset, int page);
+
// Get hints.
Hints *getHints();
@@ -268,6 +271,7 @@ private:
#ifndef DISABLE_OUTLINE
Outline *outline;
#endif
+ Page **pageCache;
GBool ok;
int errCode;
--
1.6.4.2
From a92875718afd7f792e25ba45d7007f1cb65708c7 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 13:08:11 +0100
Subject: [PATCH 17/17] Fill CachedFileStream buffer in a smarter manner
This avoids downloading too many chunks by buffering on chunk boundries.
---
poppler/CachedFile.h | 2 +-
poppler/Stream.cc | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/poppler/CachedFile.h b/poppler/CachedFile.h
index 897ff4a..e1ff817 100644
--- a/poppler/CachedFile.h
+++ b/poppler/CachedFile.h
@@ -24,7 +24,7 @@
//------------------------------------------------------------------------
-#define CachedFileChunkSize 8192
+#define CachedFileChunkSize 8192 // This should be a multiple of cachedStreamBufSize
class GooString;
class CachedFileLoader;
diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index bb25959..60b909c 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -874,7 +874,7 @@ GBool CachedFileStream::fillBuf()
if (limited && bufPos + cachedStreamBufSize > start + length) {
n = start + length - bufPos;
} else {
- n = cachedStreamBufSize;
+ n = cachedStreamBufSize - (bufPos % cachedStreamBufSize);
}
cc->read(buf, 1, n);
bufEnd = buf + n;
--
1.6.4.2
More information about the poppler
mailing list