[poppler] 2 commits - CMakeLists.txt poppler/Makefile.am poppler/poppler-config.h.cmake poppler/strtok_r.cpp

Albert Astals Cid aacid at kemper.freedesktop.org
Wed May 12 12:50:28 PDT 2010


 CMakeLists.txt                 |    1 
 poppler/Makefile.am            |    1 
 poppler/poppler-config.h.cmake |    4 
 poppler/strtok_r.cpp           |  212 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 218 insertions(+)

New commits:
commit 57ab0ebe993d79fe551bba58e0a70d55d32828f8
Author: Albert Astals Cid <aacid at kde.org>
Date:   Wed May 12 20:49:31 2010 +0100

    better copyright

diff --git a/poppler/strtok_r.cpp b/poppler/strtok_r.cpp
index 1d5b31c..900bc8c 100644
--- a/poppler/strtok_r.cpp
+++ b/poppler/strtok_r.cpp
@@ -17,6 +17,29 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+/* Copyright (C) 1991,93,96,97,99,2000,2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Based on strlen implementation by Torbjorn Granlund (tege at sics.se),
+   with help from Dan Sahlin (dan at sics.se) and
+   commentary by Jim Blandy (jimb at ai.mit.edu);
+   adaptation to memchr suggested by Dick Karpinski (dick at cca.ucsf.edu),
+   and implemented by Roland McGrath (roland at ai.mit.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
 #ifdef __MINGW32__
 #include <string.h>
 #include <stdlib.h>
commit 69c2cf76cef9c190ac07726f60f1dccd3df5cb6d
Author: Albert Astals Cid <aacid at kde.org>
Date:   Wed May 12 20:47:25 2010 +0100

    mingw does not provide strtok_r
    
    Copy the glibc implementation

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1eba1fe..589191d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -266,6 +266,7 @@ set(poppler_SRCS
   poppler/PreScanOutputDev.cc
   poppler/PSTokenizer.cc
   poppler/Stream.cc
+  poppler/strtok_r.cpp
   poppler/UnicodeMap.cc
   poppler/UnicodeTypeTable.cc
   poppler/XRef.cc
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index 5dd8082..3affce8 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -309,6 +309,7 @@ libpoppler_la_SOURCES =		\
 	StdinCachedFile.cc	\
 	StdinPDFDocBuilder.cc	\
 	Stream.cc 		\
+	strtok_r.cpp		\
 	UnicodeMap.cc		\
 	UnicodeTypeTable.cc	\
 	XRef.cc			\
diff --git a/poppler/poppler-config.h.cmake b/poppler/poppler-config.h.cmake
index e3bb211..d049a12 100644
--- a/poppler/poppler-config.h.cmake
+++ b/poppler/poppler-config.h.cmake
@@ -91,7 +91,11 @@
 #endif
 
 #if defined(_WIN32)
+#ifdef _MSC_VER
 #define strtok_r strtok_s
+#elif __MINGW32__
+char * strtok_r (char *s, const char *delim, char **save_ptr);
+#endif
 #endif
 
 //------------------------------------------------------------------------
diff --git a/poppler/strtok_r.cpp b/poppler/strtok_r.cpp
new file mode 100644
index 0000000..1d5b31c
--- /dev/null
+++ b/poppler/strtok_r.cpp
@@ -0,0 +1,189 @@
+/* Reentrant string tokenizer.  Generic version.
+   Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifdef __MINGW32__
+#include <string.h>
+#include <stdlib.h>
+
+#define LONG_MAX_32_BITS 2147483647
+
+#ifndef LONG_MAX
+#define LONG_MAX LONG_MAX_32_BITS
+#endif
+
+#define __ptr_t char*
+
+/* Find the first occurrence of C in S.  */
+static char * __rawmemchr (const void * s,int c_in)
+{
+  const unsigned char *char_ptr;
+  const unsigned long int *longword_ptr;
+  unsigned long int longword, magic_bits, charmask;
+  unsigned char c;
+
+  c = (unsigned char) c_in;
+
+  /* Handle the first few characters by reading one character at a time.
+     Do this until CHAR_PTR is aligned on a longword boundary.  */
+  for (char_ptr = (const unsigned char *) s;
+       ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
+       ++char_ptr)
+    if (*char_ptr == c)
+      return (__ptr_t) char_ptr;
+
+  /* All these elucidatory comments refer to 4-byte longwords,
+     but the theory applies equally well to 8-byte longwords.  */
+
+  longword_ptr = (unsigned long int *) char_ptr;
+
+  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
+     the "holes."  Note that there is a hole just to the left of
+     each byte, with an extra at the end:
+
+     bits:  01111110 11111110 11111110 11111111
+     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+     The 1-bits make sure that carries propagate to the next 0-bit.
+     The 0-bits provide holes for carries to fall into.  */
+
+  if (sizeof (longword) != 4 && sizeof (longword) != 8)
+    abort ();
+
+#if LONG_MAX <= LONG_MAX_32_BITS
+  magic_bits = 0x7efefeff;
+#else
+  magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
+#endif
+
+  /* Set up a longword, each of whose bytes is C.  */
+  charmask = c | (c << 8);
+  charmask |= charmask << 16;
+#if LONG_MAX > LONG_MAX_32_BITS
+  charmask |= charmask << 32;
+#endif
+
+  /* Instead of the traditional loop which tests each character,
+     we will test a longword at a time.  The tricky part is testing
+     if *any of the four* bytes in the longword in question are zero.  */
+  while (1)
+    {
+      /* We tentatively exit the loop if adding MAGIC_BITS to
+	 LONGWORD fails to change any of the hole bits of LONGWORD.
+
+	 1) Is this safe?  Will it catch all the zero bytes?
+	 Suppose there is a byte with all zeros.  Any carry bits
+	 propagating from its left will fall into the hole at its
+	 least significant bit and stop.  Since there will be no
+	 carry from its most significant bit, the LSB of the
+	 byte to the left will be unchanged, and the zero will be
+	 detected.
+
+	 2) Is this worthwhile?  Will it ignore everything except
+	 zero bytes?  Suppose every byte of LONGWORD has a bit set
+	 somewhere.  There will be a carry into bit 8.  If bit 8
+	 is set, this will carry into bit 16.  If bit 8 is clear,
+	 one of bits 9-15 must be set, so there will be a carry
+	 into bit 16.  Similarly, there will be a carry into bit
+	 24.  If one of bits 24-30 is set, there will be a carry
+	 into bit 31, so all of the hole bits will be changed.
+
+	 The one misfire occurs when bits 24-30 are clear and bit
+	 31 is set; in this case, the hole at bit 31 is not
+	 changed.  If we had access to the processor carry flag,
+	 we could close this loophole by putting the fourth hole
+	 at bit 32!
+
+	 So it ignores everything except 128's, when they're aligned
+	 properly.
+
+	 3) But wait!  Aren't we looking for C, not zero?
+	 Good point.  So what we do is XOR LONGWORD with a longword,
+	 each of whose bytes is C.  This turns each byte that is C
+	 into a zero.  */
+
+      longword = *longword_ptr++ ^ charmask;
+
+      /* Add MAGIC_BITS to LONGWORD.  */
+      if ((((longword + magic_bits)
+
+	    /* Set those bits that were unchanged by the addition.  */
+	    ^ ~longword)
+
+	   /* Look at only the hole bits.  If any of the hole bits
+	      are unchanged, most likely one of the bytes was a
+	      zero.  */
+	   & ~magic_bits) != 0)
+	{
+	  /* Which of the bytes was C?  If none of them were, it was
+	     a misfire; continue the search.  */
+
+	  const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
+
+	  if (cp[0] == c)
+	    return (__ptr_t) cp;
+	  if (cp[1] == c)
+	    return (__ptr_t) &cp[1];
+	  if (cp[2] == c)
+	    return (__ptr_t) &cp[2];
+	  if (cp[3] == c)
+	    return (__ptr_t) &cp[3];
+#if LONG_MAX > 2147483647
+	  if (cp[4] == c)
+	    return (__ptr_t) &cp[4];
+	  if (cp[5] == c)
+	    return (__ptr_t) &cp[5];
+	  if (cp[6] == c)
+	    return (__ptr_t) &cp[6];
+	  if (cp[7] == c)
+	    return (__ptr_t) &cp[7];
+#endif
+	}
+    }
+}
+
+char * strtok_r (char *s, const char *delim, char **save_ptr)
+{
+  char *token;
+
+  if (s == NULL)
+    s = *save_ptr;
+
+  /* Scan leading delimiters.  */
+  s += strspn (s, delim);
+  if (*s == '\0')
+    {
+      *save_ptr = s;
+      return NULL;
+    }
+
+  /* Find the end of the token.  */
+  token = s;
+  s = strpbrk (token, delim);
+  if (s == NULL)
+    /* This token finishes the string.  */
+    *save_ptr = __rawmemchr (token, '\0');
+  else
+    {
+      /* Terminate the token and make *SAVE_PTR point past it.  */
+      *s = '\0';
+      *save_ptr = s + 1;
+    }
+  return token;
+}
+#endif


More information about the poppler mailing list