[poppler] 2 commits - CMakeLists.txt poppler/Makefile.am poppler/poppler-config.h.cmake poppler/strtok_r.cpp
Albert Astals Cid
aacid at kemper.freedesktop.org
Wed May 12 12:50:28 PDT 2010
CMakeLists.txt | 1
poppler/Makefile.am | 1
poppler/poppler-config.h.cmake | 4
poppler/strtok_r.cpp | 212 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 218 insertions(+)
New commits:
commit 57ab0ebe993d79fe551bba58e0a70d55d32828f8
Author: Albert Astals Cid <aacid at kde.org>
Date: Wed May 12 20:49:31 2010 +0100
better copyright
diff --git a/poppler/strtok_r.cpp b/poppler/strtok_r.cpp
index 1d5b31c..900bc8c 100644
--- a/poppler/strtok_r.cpp
+++ b/poppler/strtok_r.cpp
@@ -17,6 +17,29 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+/* Copyright (C) 1991,93,96,97,99,2000,2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Based on strlen implementation by Torbjorn Granlund (tege at sics.se),
+ with help from Dan Sahlin (dan at sics.se) and
+ commentary by Jim Blandy (jimb at ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick at cca.ucsf.edu),
+ and implemented by Roland McGrath (roland at ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
#ifdef __MINGW32__
#include <string.h>
#include <stdlib.h>
commit 69c2cf76cef9c190ac07726f60f1dccd3df5cb6d
Author: Albert Astals Cid <aacid at kde.org>
Date: Wed May 12 20:47:25 2010 +0100
mingw does not provide strtok_r
Copy the glibc implementation
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1eba1fe..589191d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -266,6 +266,7 @@ set(poppler_SRCS
poppler/PreScanOutputDev.cc
poppler/PSTokenizer.cc
poppler/Stream.cc
+ poppler/strtok_r.cpp
poppler/UnicodeMap.cc
poppler/UnicodeTypeTable.cc
poppler/XRef.cc
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index 5dd8082..3affce8 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -309,6 +309,7 @@ libpoppler_la_SOURCES = \
StdinCachedFile.cc \
StdinPDFDocBuilder.cc \
Stream.cc \
+ strtok_r.cpp \
UnicodeMap.cc \
UnicodeTypeTable.cc \
XRef.cc \
diff --git a/poppler/poppler-config.h.cmake b/poppler/poppler-config.h.cmake
index e3bb211..d049a12 100644
--- a/poppler/poppler-config.h.cmake
+++ b/poppler/poppler-config.h.cmake
@@ -91,7 +91,11 @@
#endif
#if defined(_WIN32)
+#ifdef _MSC_VER
#define strtok_r strtok_s
+#elif __MINGW32__
+char * strtok_r (char *s, const char *delim, char **save_ptr);
+#endif
#endif
//------------------------------------------------------------------------
diff --git a/poppler/strtok_r.cpp b/poppler/strtok_r.cpp
new file mode 100644
index 0000000..1d5b31c
--- /dev/null
+++ b/poppler/strtok_r.cpp
@@ -0,0 +1,189 @@
+/* Reentrant string tokenizer. Generic version.
+ Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef __MINGW32__
+#include <string.h>
+#include <stdlib.h>
+
+#define LONG_MAX_32_BITS 2147483647
+
+#ifndef LONG_MAX
+#define LONG_MAX LONG_MAX_32_BITS
+#endif
+
+#define __ptr_t char*
+
+/* Find the first occurrence of C in S. */
+static char * __rawmemchr (const void * s,int c_in)
+{
+ const unsigned char *char_ptr;
+ const unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+ unsigned char c;
+
+ c = (unsigned char) c_in;
+
+ /* Handle the first few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a longword boundary. */
+ for (char_ptr = (const unsigned char *) s;
+ ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
+ ++char_ptr)
+ if (*char_ptr == c)
+ return (__ptr_t) char_ptr;
+
+ /* All these elucidatory comments refer to 4-byte longwords,
+ but the theory applies equally well to 8-byte longwords. */
+
+ longword_ptr = (unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+
+ if (sizeof (longword) != 4 && sizeof (longword) != 8)
+ abort ();
+
+#if LONG_MAX <= LONG_MAX_32_BITS
+ magic_bits = 0x7efefeff;
+#else
+ magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
+#endif
+
+ /* Set up a longword, each of whose bytes is C. */
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+#if LONG_MAX > LONG_MAX_32_BITS
+ charmask |= charmask << 32;
+#endif
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (1)
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *longword_ptr++ ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
+
+ if (cp[0] == c)
+ return (__ptr_t) cp;
+ if (cp[1] == c)
+ return (__ptr_t) &cp[1];
+ if (cp[2] == c)
+ return (__ptr_t) &cp[2];
+ if (cp[3] == c)
+ return (__ptr_t) &cp[3];
+#if LONG_MAX > 2147483647
+ if (cp[4] == c)
+ return (__ptr_t) &cp[4];
+ if (cp[5] == c)
+ return (__ptr_t) &cp[5];
+ if (cp[6] == c)
+ return (__ptr_t) &cp[6];
+ if (cp[7] == c)
+ return (__ptr_t) &cp[7];
+#endif
+ }
+ }
+}
+
+char * strtok_r (char *s, const char *delim, char **save_ptr)
+{
+ char *token;
+
+ if (s == NULL)
+ s = *save_ptr;
+
+ /* Scan leading delimiters. */
+ s += strspn (s, delim);
+ if (*s == '\0')
+ {
+ *save_ptr = s;
+ return NULL;
+ }
+
+ /* Find the end of the token. */
+ token = s;
+ s = strpbrk (token, delim);
+ if (s == NULL)
+ /* This token finishes the string. */
+ *save_ptr = __rawmemchr (token, '\0');
+ else
+ {
+ /* Terminate the token and make *SAVE_PTR point past it. */
+ *s = '\0';
+ *save_ptr = s + 1;
+ }
+ return token;
+}
+#endif
More information about the poppler
mailing list