[poppler] poppler/Dict.cc poppler/Dict.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Sun Oct 17 04:47:45 PDT 2010
poppler/Dict.cc | 98 ++++++++++++++++++++++++++++++++++++++++++++------------
poppler/Dict.h | 2 +
2 files changed, 80 insertions(+), 20 deletions(-)
New commits:
commit dd14ef6b211ac1c8a4f16bb6094dbfd6a09cbef9
Author: Albert Astals Cid <aacid at kde.org>
Date: Sun Oct 17 12:46:55 2010 +0100
Improve dict lookup speed for big dicts
Based on a patch by PaweÅ Wiejacha <pawel.wiejacha at gmail.com>
diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index a899bca..4dbf641 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -16,7 +16,8 @@
// Copyright (C) 2005 Kristian Høgsberg <krh at redhat.com>
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
// Copyright (C) 2007-2008 Julien Rebetez <julienr at svn.gnome.org>
-// Copyright (C) 2008 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2008, 2010 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2010 PaweÅ Wiejacha <pawel.wiejacha at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -29,6 +30,7 @@
#pragma implementation
#endif
+#include <algorithm>
#include <stddef.h>
#include <string.h>
#include "goo/gmem.h"
@@ -40,11 +42,37 @@
// Dict
//------------------------------------------------------------------------
+static const int SORT_LENGTH_LOWER_LIMIT = 32;
+
+static inline bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2)
+{
+ return strcmp(e1.key, e2.key) < 0;
+}
+
+static int binarySearch(const char *key, DictEntry *entries, int length)
+{
+ int first = 0;
+ int end = length - 1;
+ while (first <= end) {
+ const int middle = (first + end) / 2;
+ const int res = strcmp(key, entries[middle].key);
+ if (res == 0) {
+ return middle;
+ } else if (res < 0) {
+ end = middle - 1;
+ } else {
+ first = middle + 1;
+ }
+ }
+ return -1;
+}
+
Dict::Dict(XRef *xrefA) {
xref = xrefA;
entries = NULL;
size = length = 0;
ref = 1;
+ sorted = gFalse;
}
Dict::Dict(Dict* dictA) {
@@ -52,6 +80,7 @@ Dict::Dict(Dict* dictA) {
size = length = dictA->length;
ref = 1;
+ sorted = dictA->sorted;
entries = (DictEntry *)gmallocn(size, sizeof(DictEntry));
for (int i=0; i<length; i++) {
entries[i].key = strdup(dictA->entries[i].key);
@@ -70,6 +99,12 @@ Dict::~Dict() {
}
void Dict::add(char *key, Object *val) {
+ if (sorted) {
+ // We use add on very few occasions so
+ // virtually this will never be hit
+ sorted = gFalse;
+ }
+
if (length == size) {
if (length == 0) {
size = 8;
@@ -84,33 +119,56 @@ void Dict::add(char *key, Object *val) {
}
inline DictEntry *Dict::find(char *key) {
- int i;
+ if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT)
+ {
+ sorted = gTrue;
+ std::sort(entries, entries+length, cmpDictEntries);
+ }
+
+ if (sorted) {
+ const int pos = binarySearch(key, entries, length);
+ if (pos != -1) {
+ return &entries[pos];
+ }
+ } else {
+ int i;
- for (i = length - 1; i >=0; --i) {
- if (!strcmp(key, entries[i].key))
- return &entries[i];
+ for (i = length - 1; i >=0; --i) {
+ if (!strcmp(key, entries[i].key))
+ return &entries[i];
+ }
}
return NULL;
}
void Dict::remove(char *key) {
- int i;
- bool found = false;
- DictEntry tmp;
- if(length == 0) return;
-
- for(i=0; i<length; i++) {
- if (!strcmp(key, entries[i].key)) {
- found = true;
- break;
+ if (sorted) {
+ const int pos = binarySearch(key, entries, length);
+ if (pos != -1) {
+ length -= 1;
+ if (pos != length) {
+ memmove(&entries[pos], &entries[pos + 1], (length - pos) * sizeof(DictEntry));
+ }
+ }
+ } else {
+ int i;
+ bool found = false;
+ DictEntry tmp;
+ if(length == 0) return;
+
+ for(i=0; i<length; i++) {
+ if (!strcmp(key, entries[i].key)) {
+ found = true;
+ break;
+ }
}
+ if(!found) return;
+ //replace the deleted entry with the last entry
+ length -= 1;
+ tmp = entries[length];
+ if (i!=length) //don't copy the last entry if it is deleted
+ entries[i] = tmp;
}
- if(!found) return;
- //replace the deleted entry with the last entry
- length -= 1;
- tmp = entries[length];
- if (i!=length) //don't copy the last entry if it is deleted
- entries[i] = tmp;
}
void Dict::set(char *key, Object *val) {
diff --git a/poppler/Dict.h b/poppler/Dict.h
index a76bc89..6dfc403 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -17,6 +17,7 @@
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
// Copyright (C) 2007-2008 Julien Rebetez <julienr at svn.gnome.org>
// Copyright (C) 2010 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2010 PaweÅ Wiejacha <pawel.wiejacha at gmail.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -89,6 +90,7 @@ public:
private:
+ GBool sorted;
XRef *xref; // the xref table for this PDF file
DictEntry *entries; // array of entries
int size; // size of <entries> array
More information about the poppler
mailing list