[poppler] poppler/poppler: Lexer.cc, 1.3, 1.4 Lexer.h, 1.3,
1.4 PageLabelInfo.cc, 1.6, 1.7 Parser.cc, 1.6,
1.7 UGooString.cc, 1.3, 1.4 UGooString.h, 1.1, 1.2
Albert Astals Cid
aacid at kemper.freedesktop.org
Thu Dec 28 07:51:47 PST 2006
- Previous message: [poppler] poppler/goo: GooString.cc, 1.3, 1.4 GooString.h, 1.3,
1.4 gmem.c, 1.5, 1.6 gmem.h, 1.3, 1.4
- Next message: [poppler] poppler: ChangeLog,1.434.2.13,1.434.2.14
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvs/poppler/poppler/poppler
In directory kemper:/tmp/cvs-serv20649/poppler
Modified Files:
Lexer.cc Lexer.h PageLabelInfo.cc Parser.cc UGooString.cc
UGooString.h
Log Message:
* goo/GooString.cc
* goo/GooString.h
* goo/gmem.c
* goo/gmem.h
* poppler/Lexer.cc
* poppler/Lexer.h
* poppler/PageLabelInfo.cc
* poppler/Parser.cc
* poppler/UGooString.cc
* poppler/UGooString.h: Patch by Krzysztof Kowalczyk <kkowalczyk at gmail.com> to improve performance. See bug 7808 for details.
Index: Lexer.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Lexer.cc,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- Lexer.cc 17 Jan 2006 21:35:31 -0000 1.3
+++ Lexer.cc 28 Dec 2006 15:51:44 -0000 1.4
@@ -50,6 +50,7 @@
Lexer::Lexer(XRef *xrefA, Stream *str) {
Object obj;
+ lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
xref = xrefA;
curStr.initStream(str);
@@ -63,6 +64,7 @@
Lexer::Lexer(XRef *xrefA, Object *obj) {
Object obj2;
+ lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
xref = xrefA;
if (obj->isStream()) {
@@ -90,9 +92,15 @@
}
}
-int Lexer::getChar() {
+int inline Lexer::getChar() {
int c;
+ if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) {
+ c = lookCharLastValueCached;
+ lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
+ return c;
+ }
+
c = EOF;
while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
curStr.streamClose();
@@ -106,11 +114,12 @@
return c;
}
-int Lexer::lookChar() {
- if (curStr.isNone()) {
- return EOF;
+int inline Lexer::lookChar() {
+ if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) {
+ return lookCharLastValueCached;
}
- return curStr.streamLookChar();
+ lookCharLastValueCached = getChar();
+ return lookCharLastValueCached;
}
Object *Lexer::getObj(Object *obj, int objNum) {
Index: Lexer.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Lexer.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- Lexer.h 17 Jan 2006 21:35:31 -0000 1.3
+++ Lexer.h 28 Dec 2006 15:51:44 -0000 1.4
@@ -63,6 +63,16 @@
// Returns true if <c> is a whitespace character.
static GBool isSpace(int c);
+
+ // often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar
+ // is called right after lookChar. In order to avoid expensive re-doing
+ // getChar() of underlying stream, we cache the last value found by
+ // lookChar() in lookCharLastValueCached. A special value
+ // LOOK_VALUE_NOT_CACHED that should never be part of stream indicates
+ // that no value was cached
+ static const int LOOK_VALUE_NOT_CACHED = -3;
+ int lookCharLastValueCached;
+
private:
int getChar();
Index: PageLabelInfo.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/PageLabelInfo.cc,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- PageLabelInfo.cc 1 May 2006 13:41:14 -0000 1.6
+++ PageLabelInfo.cc 28 Dec 2006 15:51:44 -0000 1.7
@@ -1,3 +1,4 @@
+#include <config.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
Index: Parser.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Parser.cc,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- Parser.cc 3 Sep 2006 09:27:21 -0000 1.6
+++ Parser.cc 28 Dec 2006 15:51:44 -0000 1.7
@@ -39,6 +39,7 @@
Object *Parser::getObj(Object *obj,
Guchar *fileKey, int keyLength,
int objNum, int objGen) {
+ UGooString key;
Stream *str;
Object obj2;
int num;
@@ -75,14 +76,13 @@
error(getPos(), "Dictionary key must be a name object");
shift();
} else {
- // buf1 might go away in shift(), so construct the key
- UGooString *key = new UGooString(buf1.getName());
+ // buf1 might go away in shift(), so construct the key
+ key.Set(buf1.getName());
shift();
if (buf1.isEOF() || buf1.isError()) {
- gfree(key);
break;
}
- obj->dictAddOwnKeyVal(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
+ obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
}
}
if (buf1.isEOF())
@@ -120,8 +120,8 @@
s = obj->getString();
decrypt = new Decrypt(fileKey, keyLength, objNum, objGen);
for (i = 0, p = obj->getString()->getCString();
- i < s->getLength();
- ++i, ++p) {
+ i < s->getLength();
+ ++i, ++p) {
*p = decrypt->decryptByte(*p);
}
delete decrypt;
@@ -174,6 +174,11 @@
baseStr = lexer->getStream()->getBaseStream();
// skip over stream data
+ if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) {
+ // take into account the fact that we've cached one value
+ pos = pos - 1;
+ lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
+ }
lexer->setPos(pos + length);
// refill token buffers and check for 'endstream'
Index: UGooString.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/UGooString.cc,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- UGooString.cc 11 Jun 2006 16:14:32 -0000 1.3
+++ UGooString.cc 28 Dec 2006 15:51:44 -0000 1.4
@@ -15,61 +15,139 @@
#include "PDFDocEncoding.h"
#include "UGooString.h"
-UGooString::UGooString(Unicode *u, int l)
+int inline UGooString::roundedSize(int len) {
+ int delta;
+ if (len <= STR_STATIC_SIZE-1)
+ return STR_STATIC_SIZE;
+ delta = len < 256 ? 7 : 255;
+ return ((len + 1) + delta) & ~delta;
+}
+
+// Make sure that the buffer is big enough to contain <newLength> characters
+// plus terminating 0.
+// We assume that if this is being called from the constructor, <s> was set
+// to NULL and <length> was set to 0 to indicate unused string before calling us.
+void inline UGooString::resize(int newLength) {
+ Unicode *s1 = s;
+
+ if (!s || (roundedSize(length) != roundedSize(newLength))) {
+ // requires re-allocating data for string
+ if (newLength < STR_STATIC_SIZE)
+ s1 = sStatic;
+ else
+ s1 = new Unicode[roundedSize(newLength)];
+
+ // we had to re-allocate the memory, so copy the content of previous
+ // buffer into a new buffer
+ if (s) {
+ if (newLength < length) {
+ memcpy(s1, s, newLength);
+ } else {
+ memcpy(s1, s, length);
+ }
+ }
+ if (s != sStatic)
+ delete[] s;
+ }
+
+ s = s1;
+ length = newLength;
+ s[length] = '\0';
+}
+
+UGooString::UGooString()
{
- s = u;
- length = l;
+ s = NULL;
+ length = 0;
+ resize(0);
}
UGooString::UGooString(GooString &str)
{
- if ((str.getChar(0) & 0xff) == 0xfe && (str.getChar(1) & 0xff) == 0xff)
+ s = NULL;
+ length = 0;
+ if (str.hasUnicodeMarker())
{
- length = (str.getLength() - 2) / 2;
- s = (Unicode *)gmallocn(length, sizeof(Unicode));
+ resize((str.getLength() - 2) / 2);
for (int j = 0; j < length; ++j) {
s[j] = ((str.getChar(2 + 2*j) & 0xff) << 8) | (str.getChar(3 + 2*j) & 0xff);
}
} else
- initChar(str);
+ Set(str.getCString(), str.getLength());
+}
+
+UGooString::UGooString(Unicode *u, int strLen)
+{
+ resize(strLen);
+ s = u;
}
UGooString::UGooString(const UGooString &str)
{
- length = str.length;
- s = (Unicode *)gmallocn(length, sizeof(Unicode));
- memcpy(s, str.s, length * sizeof(Unicode));
+ s = NULL;
+ length = 0;
+ Set(str);
}
-UGooString::UGooString(const char *str)
+UGooString::UGooString(const char *str, int strLen)
{
- GooString aux(str);
- initChar(aux);
+ s = NULL;
+ length = 0;
+ if (CALC_STRING_LEN == strLen)
+ strLen = strlen(str);
+ Set(str, strLen);
}
-void UGooString::initChar(GooString &str)
+UGooString *UGooString::Set(const UGooString &str)
{
- length = str.getLength();
- s = (Unicode *)gmallocn(length, sizeof(Unicode));
- bool anyNonEncoded = false;
- for (int j = 0; j < length && !anyNonEncoded; ++j) {
- s[j] = pdfDocEncoding[str.getChar(j) & 0xff];
- if (!s[j]) anyNonEncoded = true;
+ resize(str.length);
+ memcpy(s, str.s, length * sizeof(Unicode));
+ return this;
+}
+
+UGooString* UGooString::Set(const char *str, int strLen)
+{
+ int j;
+ bool foundUnencoded = false;
+
+ if (CALC_STRING_LEN == strLen)
+ strLen = strlen(str);
+
+ resize(strLen);
+ for (j = 0; !foundUnencoded && j < length; ++j) {
+ s[j] = pdfDocEncoding[str[j] & 0xff];
+ if (!s[j]) {
+ foundUnencoded = true;
+ break;
+ }
}
- if ( anyNonEncoded )
+ if ( foundUnencoded )
{
- for (int j = 0; j < length; ++j) {
- s[j] = str.getChar(j);
+ for (j = 0; j < length; ++j) {
+ s[j] = str[j];
}
}
+ return this;
+}
+
+UGooString *UGooString::clear()
+{
+ resize(0);
+ return this;
}
UGooString::~UGooString()
{
- gfree(s);
+ if (s != sStatic)
+ delete[] s;
}
-int UGooString::cmp(UGooString *str) const
+int UGooString::cmp(const UGooString &str) const
+{
+ return cmp(&str);
+}
+
+int UGooString::cmp(const UGooString *str) const
{
int n1, n2, i, x;
Unicode *p1, *p2;
@@ -85,6 +163,14 @@
return n1 - n2;
}
+// FIXME:
+// a) this is confusing because GooString::getCSTring() returns a pointer
+// but UGooString returns a newly allocated copy. Should give this
+// a different name, like copyAsAscii() or copyAsGooString()
+// b) this interface requires copying. It should be changed to take a
+// GooString& as a param and put the data inside it so that it uses
+// caching optimization of GooString. Callers should be changed to use
+// this new interface
char *UGooString::getCString() const
{
char *res = new char[length + 1];
@@ -92,3 +178,4 @@
res[length] = '\0';
return res;
}
+
Index: UGooString.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/UGooString.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- UGooString.h 18 Jan 2006 22:36:01 -0000 1.1
+++ UGooString.h 28 Dec 2006 15:51:44 -0000 1.2
@@ -18,36 +18,60 @@
class UGooString
{
public:
- // Create an unicode string
- UGooString(Unicode *u, int l);
+
+ // Create empty unicode string
+ UGooString();
// Create a unicode string from <str>.
UGooString(GooString &str);
+ // Create a unicode string from u
+ UGooString(Unicode *u, int strLen);
+
// Copy the unicode string
UGooString(const UGooString &str);
// Create a unicode string from <str>.
- UGooString(const char *str);
+ UGooString(const char *str, int strLen = CALC_STRING_LEN);
+
+ UGooString *Set(const char *str, int strLen = CALC_STRING_LEN);
+ UGooString *Set(const UGooString &str);
+
+ // Set the string to empty string, freeing all dynamically allocated memory
+ // as a side effect
+ UGooString *clear();
- // Destructor.
~UGooString();
- // Get length.
+ void resize(int newLength);
+
int getLength() const { return length; }
// Compare two strings: -1:< 0:= +1:>
- int cmp(UGooString *str) const;
+ int cmp(const UGooString *str) const;
+ int cmp(const UGooString &str) const;
// get the unicode
Unicode *unicode() const { return s; }
- // get the const char*
+ // Return a newly allocated copy of the string converted to
+ // ascii (non-Unicode) format. Caller has to delete [] the result
char *getCString() const;
private:
- void initChar(GooString &str);
+ // you can tweak this number for a different speed/memory usage tradeoffs.
+ // In libc malloc() rounding is 16 so it's best to choose a value that
+ // results in sizeof(UGooString) be a multiple of 16.
+ // 20 makes sizeof(UGooString) to be 48.
+ static const int STR_STATIC_SIZE = 20;
+ // a special value telling that the length of the string is not given
+ // so it must be calculated from the strings
+ static const int CALC_STRING_LEN = -1;
+ int roundedSize(int len);
+ void initChar(const char *str, int strLen);
+
+ Unicode sStatic[STR_STATIC_SIZE];
int length;
Unicode *s;
};
- Previous message: [poppler] poppler/goo: GooString.cc, 1.3, 1.4 GooString.h, 1.3,
1.4 gmem.c, 1.5, 1.6 gmem.h, 1.3, 1.4
- Next message: [poppler] poppler: ChangeLog,1.434.2.13,1.434.2.14
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the poppler
mailing list