[poppler] poppler/poppler: Lexer.cc, 1.3, 1.4 Lexer.h, 1.3, 1.4 PageLabelInfo.cc, 1.6, 1.7 Parser.cc, 1.6, 1.7 UGooString.cc, 1.3, 1.4 UGooString.h, 1.1, 1.2

Albert Astals Cid aacid at kemper.freedesktop.org
Thu Dec 28 07:51:47 PST 2006


Update of /cvs/poppler/poppler/poppler
In directory kemper:/tmp/cvs-serv20649/poppler

Modified Files:
	Lexer.cc Lexer.h PageLabelInfo.cc Parser.cc UGooString.cc 
	UGooString.h 
Log Message:
* goo/GooString.cc
* goo/GooString.h
* goo/gmem.c
* goo/gmem.h
* poppler/Lexer.cc
* poppler/Lexer.h
* poppler/PageLabelInfo.cc
* poppler/Parser.cc
* poppler/UGooString.cc
* poppler/UGooString.h: Patch by Krzysztof Kowalczyk <kkowalczyk at gmail.com> to improve performance. See bug 7808 for details.


Index: Lexer.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Lexer.cc,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- Lexer.cc	17 Jan 2006 21:35:31 -0000	1.3
+++ Lexer.cc	28 Dec 2006 15:51:44 -0000	1.4
@@ -50,6 +50,7 @@
 Lexer::Lexer(XRef *xrefA, Stream *str) {
   Object obj;
 
+  lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
   xref = xrefA;
 
   curStr.initStream(str);
@@ -63,6 +64,7 @@
 Lexer::Lexer(XRef *xrefA, Object *obj) {
   Object obj2;
 
+  lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
   xref = xrefA;
 
   if (obj->isStream()) {
@@ -90,9 +92,15 @@
   }
 }
 
-int Lexer::getChar() {
+int inline Lexer::getChar() {
   int c;
 
+  if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) {
+    c = lookCharLastValueCached;
+    lookCharLastValueCached = LOOK_VALUE_NOT_CACHED;
+    return c;
+  }
+
   c = EOF;
   while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
     curStr.streamClose();
@@ -106,11 +114,12 @@
   return c;
 }
 
-int Lexer::lookChar() {
-  if (curStr.isNone()) {
-    return EOF;
+int inline Lexer::lookChar() {
+  if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) {
+    return lookCharLastValueCached;
   }
-  return curStr.streamLookChar();
+  lookCharLastValueCached = getChar();
+  return lookCharLastValueCached;
 }
 
 Object *Lexer::getObj(Object *obj, int objNum) {

Index: Lexer.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Lexer.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- Lexer.h	17 Jan 2006 21:35:31 -0000	1.3
+++ Lexer.h	28 Dec 2006 15:51:44 -0000	1.4
@@ -63,6 +63,16 @@
   // Returns true if <c> is a whitespace character.
   static GBool isSpace(int c);
 
+
+  // often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar
+  // is called right after lookChar. In order to avoid expensive re-doing
+  // getChar() of underlying stream, we cache the last value found by
+  // lookChar() in lookCharLastValueCached. A special value 
+  // LOOK_VALUE_NOT_CACHED that should never be part of stream indicates
+  // that no value was cached
+  static const int LOOK_VALUE_NOT_CACHED = -3;
+  int lookCharLastValueCached;
+
 private:
 
   int getChar();

Index: PageLabelInfo.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/PageLabelInfo.cc,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- PageLabelInfo.cc	1 May 2006 13:41:14 -0000	1.6
+++ PageLabelInfo.cc	28 Dec 2006 15:51:44 -0000	1.7
@@ -1,3 +1,4 @@
+#include <config.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <stdio.h>

Index: Parser.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/Parser.cc,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- Parser.cc	3 Sep 2006 09:27:21 -0000	1.6
+++ Parser.cc	28 Dec 2006 15:51:44 -0000	1.7
@@ -39,6 +39,7 @@
 Object *Parser::getObj(Object *obj,
 		       Guchar *fileKey, int keyLength,
 		       int objNum, int objGen) {
+  UGooString key;
   Stream *str;
   Object obj2;
   int num;
@@ -75,14 +76,13 @@
 	error(getPos(), "Dictionary key must be a name object");
 	shift();
       } else {
-        // buf1 might go away in shift(), so construct the key
-        UGooString *key = new UGooString(buf1.getName());
+	// buf1 might go away in shift(), so construct the key
+	key.Set(buf1.getName());
 	shift();
 	if (buf1.isEOF() || buf1.isError()) {
-	  gfree(key);
 	  break;
 	}
-	obj->dictAddOwnKeyVal(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
+	obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
       }
     }
     if (buf1.isEOF())
@@ -120,8 +120,8 @@
     s = obj->getString();
     decrypt = new Decrypt(fileKey, keyLength, objNum, objGen);
     for (i = 0, p = obj->getString()->getCString();
-	 i < s->getLength();
-	 ++i, ++p) {
+      i < s->getLength();
+      ++i, ++p) {
       *p = decrypt->decryptByte(*p);
     }
     delete decrypt;
@@ -174,6 +174,11 @@
   baseStr = lexer->getStream()->getBaseStream();
 
   // skip over stream data
+  if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) {
+      // take into account the fact that we've cached one value
+      pos = pos - 1;
+      lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
+  }
   lexer->setPos(pos + length);
 
   // refill token buffers and check for 'endstream'

Index: UGooString.cc
===================================================================
RCS file: /cvs/poppler/poppler/poppler/UGooString.cc,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- UGooString.cc	11 Jun 2006 16:14:32 -0000	1.3
+++ UGooString.cc	28 Dec 2006 15:51:44 -0000	1.4
@@ -15,61 +15,139 @@
 #include "PDFDocEncoding.h"
 #include "UGooString.h"
 
-UGooString::UGooString(Unicode *u, int l)
+int inline UGooString::roundedSize(int len) {
+  int delta;
+  if (len <= STR_STATIC_SIZE-1)
+      return STR_STATIC_SIZE;
+  delta = len < 256 ? 7 : 255;
+  return ((len + 1) + delta) & ~delta;
+}
+
+// Make sure that the buffer is big enough to contain <newLength> characters
+// plus terminating 0.
+// We assume that if this is being called from the constructor, <s> was set
+// to NULL and <length> was set to 0 to indicate unused string before calling us.
+void inline UGooString::resize(int newLength) {
+  Unicode *s1 = s;
+
+  if (!s || (roundedSize(length) != roundedSize(newLength))) {
+    // requires re-allocating data for string
+    if (newLength < STR_STATIC_SIZE)
+        s1 = sStatic;
+    else
+        s1 = new Unicode[roundedSize(newLength)];
+
+    // we had to re-allocate the memory, so copy the content of previous
+    // buffer into a new buffer
+    if (s) {
+      if (newLength < length) {
+        memcpy(s1, s, newLength);
+      } else {
+        memcpy(s1, s, length);
+      }
+    }
+    if (s != sStatic)
+      delete[] s;
+  }
+
+  s = s1;
+  length = newLength;
+  s[length] = '\0';
+}
+
+UGooString::UGooString()
 {
-  s = u;
-  length = l;
+  s = NULL;
+  length = 0;
+  resize(0);
 }
 
 UGooString::UGooString(GooString &str)
 {
-  if ((str.getChar(0) & 0xff) == 0xfe && (str.getChar(1) & 0xff) == 0xff)
+  s = NULL;
+  length = 0;
+  if (str.hasUnicodeMarker())
   {
-    length = (str.getLength() - 2) / 2;
-    s = (Unicode *)gmallocn(length, sizeof(Unicode));
+    resize((str.getLength() - 2) / 2);
     for (int j = 0; j < length; ++j) {
       s[j] = ((str.getChar(2 + 2*j) & 0xff) << 8) | (str.getChar(3 + 2*j) & 0xff);
     }
   } else
-    initChar(str);
+    Set(str.getCString(), str.getLength());
+}
+
+UGooString::UGooString(Unicode *u, int strLen)
+{
+  resize(strLen);
+  s = u;
 }
 
 UGooString::UGooString(const UGooString &str)
 {
-  length = str.length;
-  s = (Unicode *)gmallocn(length, sizeof(Unicode));
-  memcpy(s, str.s, length * sizeof(Unicode));
+  s = NULL;
+  length = 0;
+  Set(str);
 }
 
-UGooString::UGooString(const char *str)
+UGooString::UGooString(const char *str, int strLen)
 {
-  GooString aux(str);
-  initChar(aux);
+  s = NULL;
+  length = 0;
+  if (CALC_STRING_LEN == strLen)
+    strLen = strlen(str);
+  Set(str, strLen);
 }
 
-void UGooString::initChar(GooString &str)
+UGooString *UGooString::Set(const UGooString &str)
 {
-  length = str.getLength();
-  s = (Unicode *)gmallocn(length, sizeof(Unicode));
-  bool anyNonEncoded = false;
-  for (int j = 0; j < length && !anyNonEncoded; ++j) {
-    s[j] = pdfDocEncoding[str.getChar(j) & 0xff];
-    if (!s[j]) anyNonEncoded = true;
+  resize(str.length);
+  memcpy(s, str.s, length * sizeof(Unicode));
+  return this;
+}
+
+UGooString* UGooString::Set(const char *str, int strLen)
+{
+  int  j;
+  bool foundUnencoded = false;
+
+  if (CALC_STRING_LEN == strLen)
+    strLen = strlen(str);
+
+  resize(strLen);
+  for (j = 0; !foundUnencoded && j < length; ++j) {
+    s[j] = pdfDocEncoding[str[j] & 0xff];
+    if (!s[j]) {
+        foundUnencoded = true;
+        break;
+    }
   }
-  if ( anyNonEncoded )
+  if ( foundUnencoded )
   {
-    for (int j = 0; j < length; ++j) {
-      s[j] = str.getChar(j);
+    for (j = 0; j < length; ++j) {
+      s[j] = str[j];
     }
   }
+  return this;
+}
+
+UGooString *UGooString::clear()
+{
+    resize(0);
+    return this;
 }
 
 UGooString::~UGooString()
 {
-  gfree(s);
+  if (s != sStatic)
+    delete[] s;
 }
 
-int UGooString::cmp(UGooString *str) const
+int UGooString::cmp(const UGooString &str) const
+{
+    return cmp(&str);
+}
+
+int UGooString::cmp(const UGooString *str) const
 {
   int n1, n2, i, x;
   Unicode *p1, *p2;
@@ -85,6 +163,14 @@
   return n1 - n2;
 }
 
+// FIXME: 
+// a) this is confusing because GooString::getCSTring() returns a pointer
+//    but UGooString returns a newly allocated copy. Should give this
+//    a different name, like copyAsAscii() or copyAsGooString()
+// b) this interface requires copying. It should be changed to take a
+//    GooString& as a param and put the data inside it so that it uses
+//    caching optimization of GooString. Callers should be changed to use
+//    this new interface
 char *UGooString::getCString() const
 {
   char *res = new char[length + 1];
@@ -92,3 +178,4 @@
   res[length] = '\0';
   return res;
 }
+

Index: UGooString.h
===================================================================
RCS file: /cvs/poppler/poppler/poppler/UGooString.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- UGooString.h	18 Jan 2006 22:36:01 -0000	1.1
+++ UGooString.h	28 Dec 2006 15:51:44 -0000	1.2
@@ -18,36 +18,60 @@
 class UGooString
 {
 public:
-  // Create an unicode string
-  UGooString(Unicode *u, int l);
+
+  // Create empty unicode string
+  UGooString();
 
   // Create a unicode string from <str>.
   UGooString(GooString &str);
 
+  // Create a unicode string from u
+  UGooString(Unicode *u, int strLen);
+
   // Copy the unicode string
   UGooString(const UGooString &str);
 
   // Create a unicode string from <str>.
-  UGooString(const char *str);
+  UGooString(const char *str, int strLen = CALC_STRING_LEN);
+
+  UGooString *Set(const char *str, int strLen = CALC_STRING_LEN);
+  UGooString *Set(const UGooString &str);
+
+  // Set the string to empty string, freeing all dynamically allocated memory
+  // as a side effect
+  UGooString *clear();
 
-  // Destructor.
   ~UGooString();
 
-  // Get length.
+  void resize(int newLength);
+
   int getLength() const { return length; }
 
   // Compare two strings:  -1:<  0:=  +1:>
-  int cmp(UGooString *str) const;
+  int cmp(const UGooString *str) const;
+  int cmp(const UGooString &str) const;
 
   // get the unicode
   Unicode *unicode() const { return s; }
 
-  // get the const char*
+  // Return a newly allocated copy of the string converted to 
+  // ascii (non-Unicode) format. Caller has to delete [] the result
   char *getCString() const;
 
 private:
-  void initChar(GooString &str);
+  // you can tweak this number for a different speed/memory usage tradeoffs.
+  // In libc malloc() rounding is 16 so it's best to choose a value that
+  // results in sizeof(UGooString) be a multiple of 16.
+  // 20 makes sizeof(UGooString) to be 48.
+  static const int STR_STATIC_SIZE = 20;
+  // a special value telling that the length of the string is not given
+  // so it must be calculated from the strings
+  static const int CALC_STRING_LEN = -1;
 
+  int  roundedSize(int len);
+  void initChar(const char *str, int strLen);
+
+  Unicode sStatic[STR_STATIC_SIZE];
   int length;
   Unicode *s;
 };



More information about the poppler mailing list