[poppler] poppler/Dict.cc poppler/Dict.h

Albert Astals Cid aacid at kemper.freedesktop.org
Sun Oct 17 04:47:45 PDT 2010


 poppler/Dict.cc |   98 ++++++++++++++++++++++++++++++++++++++++++++------------
 poppler/Dict.h  |    2 +
 2 files changed, 80 insertions(+), 20 deletions(-)

New commits:
commit dd14ef6b211ac1c8a4f16bb6094dbfd6a09cbef9
Author: Albert Astals Cid <aacid at kde.org>
Date:   Sun Oct 17 12:46:55 2010 +0100

    Improve dict lookup speed for big dicts
    
    Based on a patch by Paweł Wiejacha <pawel.wiejacha at gmail.com>

diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index a899bca..4dbf641 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -16,7 +16,8 @@
 // Copyright (C) 2005 Kristian Høgsberg <krh at redhat.com>
 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
 // Copyright (C) 2007-2008 Julien Rebetez <julienr at svn.gnome.org>
-// Copyright (C) 2008 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2008, 2010 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2010 Paweł Wiejacha <pawel.wiejacha at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -29,6 +30,7 @@
 #pragma implementation
 #endif
 
+#include <algorithm>
 #include <stddef.h>
 #include <string.h>
 #include "goo/gmem.h"
@@ -40,11 +42,37 @@
 // Dict
 //------------------------------------------------------------------------
 
+static const int SORT_LENGTH_LOWER_LIMIT = 32;
+
+static inline bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2)
+{
+  return strcmp(e1.key, e2.key) < 0;
+}
+
+static int binarySearch(const char *key, DictEntry *entries, int length)
+{
+  int first = 0;
+  int end = length - 1;
+  while (first <= end) {
+    const int middle = (first + end) / 2;
+    const int res = strcmp(key, entries[middle].key);
+    if (res == 0) {
+      return middle;
+    } else if (res < 0) {
+      end = middle - 1;
+    } else {
+      first = middle + 1;
+    }
+  }
+  return -1;
+}
+
 Dict::Dict(XRef *xrefA) {
   xref = xrefA;
   entries = NULL;
   size = length = 0;
   ref = 1;
+  sorted = gFalse;
 }
 
 Dict::Dict(Dict* dictA) {
@@ -52,6 +80,7 @@ Dict::Dict(Dict* dictA) {
   size = length = dictA->length;
   ref = 1;
 
+  sorted = dictA->sorted;
   entries = (DictEntry *)gmallocn(size, sizeof(DictEntry));
   for (int i=0; i<length; i++) {
     entries[i].key = strdup(dictA->entries[i].key);
@@ -70,6 +99,12 @@ Dict::~Dict() {
 }
 
 void Dict::add(char *key, Object *val) {
+  if (sorted) {
+    // We use add on very few occasions so
+    // virtually this will never be hit
+    sorted = gFalse;
+  }
+
   if (length == size) {
     if (length == 0) {
       size = 8;
@@ -84,33 +119,56 @@ void Dict::add(char *key, Object *val) {
 }
 
 inline DictEntry *Dict::find(char *key) {
-  int i;
+  if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT)
+  {
+      sorted = gTrue;
+      std::sort(entries, entries+length, cmpDictEntries);
+  }
+
+  if (sorted) {
+    const int pos = binarySearch(key, entries, length);
+    if (pos != -1) {
+      return &entries[pos];
+    }
+  } else {
+    int i;
 
-  for (i = length - 1; i >=0; --i) {
-    if (!strcmp(key, entries[i].key))
-      return &entries[i];
+    for (i = length - 1; i >=0; --i) {
+      if (!strcmp(key, entries[i].key))
+        return &entries[i];
+    }
   }
   return NULL;
 }
 
 void Dict::remove(char *key) {
-  int i; 
-  bool found = false;
-  DictEntry tmp;
-  if(length == 0) return;
-
-  for(i=0; i<length; i++) {
-    if (!strcmp(key, entries[i].key)) {
-      found = true;
-      break;
+  if (sorted) {
+    const int pos = binarySearch(key, entries, length);
+    if (pos != -1) {
+      length -= 1;
+      if (pos != length) {
+        memmove(&entries[pos], &entries[pos + 1], (length - pos) * sizeof(DictEntry));
+      }
+    }
+  } else {
+    int i; 
+    bool found = false;
+    DictEntry tmp;
+    if(length == 0) return;
+
+    for(i=0; i<length; i++) {
+      if (!strcmp(key, entries[i].key)) {
+        found = true;
+        break;
+      }
     }
+    if(!found) return;
+    //replace the deleted entry with the last entry
+    length -= 1;
+    tmp = entries[length];
+    if (i!=length) //don't copy the last entry if it is deleted 
+      entries[i] = tmp;
   }
-  if(!found) return;
-  //replace the deleted entry with the last entry
-  length -= 1;
-  tmp = entries[length];
-  if (i!=length) //don't copy the last entry if it is deleted 
-    entries[i] = tmp;
 }
 
 void Dict::set(char *key, Object *val) {
diff --git a/poppler/Dict.h b/poppler/Dict.h
index a76bc89..6dfc403 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -17,6 +17,7 @@
 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
 // Copyright (C) 2007-2008 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2010 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2010 Paweł Wiejacha <pawel.wiejacha at gmail.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -89,6 +90,7 @@ public:
 
 private:
 
+  GBool sorted;
   XRef *xref;			// the xref table for this PDF file
   DictEntry *entries;		// array of entries
   int size;			// size of <entries> array


More information about the poppler mailing list