[poppler] 4 commits - regtest/backends regtest/Printer.py

Carlos Garcia Campos carlosgc at kemper.freedesktop.org
Fri Nov 29 01:24:18 PST 2013


 regtest/Printer.py           |   10 ++---
 regtest/backends/__init__.py |   79 ++++++++++++++++++++++---------------------
 2 files changed, 47 insertions(+), 42 deletions(-)

New commits:
commit 817cc333ca8009998f2099583fd0a2fc703f3db3
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date:   Fri Nov 29 10:07:16 2013 +0100

    regtest: Do not buffer stderr output
    
    Some buggy documents can produce a huge stderr output because of parsing
    errors or whatever. We could give a file directly to Popen to write the
    stderr file, but we only want to create the file when there's output,
    because it's what we use to know whether the command produced output or
    not. So, instead of buffering the whole output and then write it to the
    file, now we read from the pipe while the command is running, writing
    the output in chunks to the file. This improves a lot the memory
    consumption when running some tests.

diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py
index aa12022..b57d8aa 100644
--- a/regtest/backends/__init__.py
+++ b/regtest/backends/__init__.py
@@ -18,6 +18,7 @@
 
 import hashlib
 import os
+import select
 import shutil
 import errno
 from Config import Config
@@ -193,13 +194,6 @@ class Backend:
             return False
         return os.path.exists(test_result + self._diff_ext)
 
-    def __create_stderr_file(self, stderr, out_path):
-        if not stderr:
-            return
-        stderr_file = open(out_path + '.stderr', 'wb')
-        stderr_file.write(stderr)
-        stderr_file.close()
-
     def __create_failed_file_if_needed(self, status, out_path):
         if os.WIFEXITED(status) or os.WEXITSTATUS(status) == 0:
             return False
@@ -210,10 +204,36 @@ class Backend:
 
         return True
 
-    def _check_exit_status(self, p, out_path):
-        stderr = p.stderr.read()
-        self.__create_stderr_file(stderr, out_path)
+    def __redirect_stderr_to_file(self, fd, out_path):
+        stderr_file = None
+        read_set = [fd]
+        while read_set:
+            try:
+                rlist, wlist, xlist = select.select(read_set, [], [])
+            except select.error as e:
+                continue
+
+            if fd in rlist:
+                try:
+                    chunk = os.read(fd, 1024)
+                except OSError as e:
+                    if e.errno == errno.EIO:
+                        # Child process finished.
+                        chunk = ''
+                    else:
+                        raise e
+                if chunk:
+                    if stderr_file is None:
+                        stderr_file = open(out_path + '.stderr', 'wb')
+                    stderr_file.write(chunk)
+                else:
+                    read_set.remove(fd)
 
+        if stderr_file is not None:
+            stderr_file.close()
+
+    def _check_exit_status(self, p, out_path):
+        self.__redirect_stderr_to_file(p.stderr.fileno(), out_path)
         status = p.wait()
 
         if not os.WIFEXITED(status):
commit f8f82f1cc3a948239a05d7762210a3f244299db6
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date:   Fri Nov 29 10:03:24 2013 +0100

    regtest: Read test results in chunks to get the md5 digest
    
    Some backends can generate huge results, like huge postscript files that
    we don't want to load in memory to get the md5. So, instead of creating
    thr md5 object with the entire file, we feed it with chunks of data
    using the update method. This improves a lot the memory consumption and
    performance as well.

diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py
index eab154d..aa12022 100644
--- a/regtest/backends/__init__.py
+++ b/regtest/backends/__init__.py
@@ -16,7 +16,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-from hashlib import md5
+import hashlib
 import os
 import shutil
 import errno
@@ -47,6 +47,14 @@ class Backend:
     def get_diff_ext(self):
         return self._diff_ext
 
+    def __md5sum(self, ref_path):
+        md5 = hashlib.md5()
+        with open(ref_path,'rb') as f:
+            for chunk in iter(lambda: f.read(128 * md5.block_size), b''):
+                md5.update(chunk)
+
+        return md5.hexdigest()
+
     def __should_have_checksum(self, entry):
         if not entry.startswith(self._name):
             return False
@@ -62,9 +70,7 @@ class Backend:
             if not self.__should_have_checksum(entry):
                 continue
             ref_path = os.path.join(refs_path, entry)
-            f = open(ref_path, 'rb')
-            md5_file.write("%s %s\n" % (md5(f.read()).hexdigest(), ref_path))
-            f.close()
+            md5_file.write("%s %s\n" % (self.__md5sum(ref_path), ref_path))
             if delete_refs:
                 os.remove(ref_path)
 
@@ -90,10 +96,9 @@ class Backend:
                 continue
 
             result_path = os.path.join(out_path, basename)
-            f = open(result_path, 'rb')
-            result_md5sum = md5(f.read()).hexdigest()
+
+            result_md5sum = self.__md5sum(result_path);
             matched = md5sum == result_md5sum
-            f.close()
 
             if update_refs:
                 result_md5.append("%s %s\n" % (result_md5sum, ref_path))
commit 3444a44397a890dbeb1bd10357dbc8246fd21ad0
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date:   Fri Nov 29 10:01:20 2013 +0100

    regtest: Remove unused method _check_exit_status2
    
    It was used when the backends ran in parallel odd and even pages, but
    it's no longer used since threads support was added.

diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py
index ff6ef84..eab154d 100644
--- a/regtest/backends/__init__.py
+++ b/regtest/backends/__init__.py
@@ -220,26 +220,6 @@ class Backend:
 
         return True
 
-    def _check_exit_status2(self, p1, p2, out_path):
-        p1_stderr = p1.stderr.read()
-        status1 = p1.wait()
-        p2_stderr = p2.stderr.read()
-        status2 = p2.wait()
-
-        if p1_stderr or p2_stderr:
-            self.__create_stderr_file(p1_stderr + p2_stderr, out_path)
-
-        if not os.WIFEXITED(status1) or not os.WIFEXITED(status2):
-            open(out_path + '.crashed', 'w').close()
-            return False
-
-        if self.__create_failed_file_if_needed(status1, out_path):
-            return False
-        if self.__create_failed_file_if_needed(status2, out_path):
-            return False
-
-        return True
-
     def _diff_png(self, ref_path, result_path):
         try:
             import Image, ImageChops
commit 5f825df417947c51943f1db327e1aa6c3faa15b0
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date:   Fri Nov 29 09:57:57 2013 +0100

    regtest: Do not store the current line in Printer but only its length
    
    We are not using the line text anymore, but only the length.

diff --git a/regtest/Printer.py b/regtest/Printer.py
index 23dfd34..1de693d 100644
--- a/regtest/Printer.py
+++ b/regtest/Printer.py
@@ -32,19 +32,19 @@ class Printer:
         self._verbose = Config().verbose
         self._stream = sys.stdout
         self._rewrite = self._stream.isatty() and not self._verbose
-        self._current_line = None
+        self._current_line_len = 0
 
         self._lock = RLock()
 
         Printer.__single = self
 
     def _erase_current_line(self):
-        if self._current_line is None:
+        if not self._current_line_len:
             return
 
-        line_len = len(self._current_line)
+        line_len = self._current_line_len
         self._stream.write('\b' * line_len + ' ' * line_len + '\b' * line_len)
-        self._current_line = None
+        self._current_line_len = 0
 
     def _ensure_new_line(self, msg):
         if not msg.endswith('\n'):
@@ -62,7 +62,7 @@ class Printer:
         with self._lock:
             self._erase_current_line()
             self._print(msg)
-            self._current_line = msg[msg.rfind('\n') + 1:]
+            self._current_line_len = len(msg[msg.rfind('\n') + 1:])
 
     def printout_ln(self, msg=''):
         with self._lock:


More information about the poppler mailing list