[poppler] 4 commits - regtest/backends regtest/Printer.py
Carlos Garcia Campos
carlosgc at kemper.freedesktop.org
Fri Nov 29 01:24:18 PST 2013
regtest/Printer.py | 10 ++---
regtest/backends/__init__.py | 79 ++++++++++++++++++++++---------------------
2 files changed, 47 insertions(+), 42 deletions(-)
New commits:
commit 817cc333ca8009998f2099583fd0a2fc703f3db3
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Fri Nov 29 10:07:16 2013 +0100
regtest: Do not buffer stderr output
Some buggy documents can produce a huge stderr output because of parsing
errors or whatever. We could give a file directly to Popen to write the
stderr file, but we only want to create the file when there's output,
because it's what we use to know whether the command produced output or
not. So, instead of buffering the whole output and then write it to the
file, now we read from the pipe while the command is running, writing
the output in chunks to the file. This improves a lot the memory
consumption when running some tests.
diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py
index aa12022..b57d8aa 100644
--- a/regtest/backends/__init__.py
+++ b/regtest/backends/__init__.py
@@ -18,6 +18,7 @@
import hashlib
import os
+import select
import shutil
import errno
from Config import Config
@@ -193,13 +194,6 @@ class Backend:
return False
return os.path.exists(test_result + self._diff_ext)
- def __create_stderr_file(self, stderr, out_path):
- if not stderr:
- return
- stderr_file = open(out_path + '.stderr', 'wb')
- stderr_file.write(stderr)
- stderr_file.close()
-
def __create_failed_file_if_needed(self, status, out_path):
if os.WIFEXITED(status) or os.WEXITSTATUS(status) == 0:
return False
@@ -210,10 +204,36 @@ class Backend:
return True
- def _check_exit_status(self, p, out_path):
- stderr = p.stderr.read()
- self.__create_stderr_file(stderr, out_path)
+ def __redirect_stderr_to_file(self, fd, out_path):
+ stderr_file = None
+ read_set = [fd]
+ while read_set:
+ try:
+ rlist, wlist, xlist = select.select(read_set, [], [])
+ except select.error as e:
+ continue
+
+ if fd in rlist:
+ try:
+ chunk = os.read(fd, 1024)
+ except OSError as e:
+ if e.errno == errno.EIO:
+ # Child process finished.
+ chunk = ''
+ else:
+ raise e
+ if chunk:
+ if stderr_file is None:
+ stderr_file = open(out_path + '.stderr', 'wb')
+ stderr_file.write(chunk)
+ else:
+ read_set.remove(fd)
+ if stderr_file is not None:
+ stderr_file.close()
+
+ def _check_exit_status(self, p, out_path):
+ self.__redirect_stderr_to_file(p.stderr.fileno(), out_path)
status = p.wait()
if not os.WIFEXITED(status):
commit f8f82f1cc3a948239a05d7762210a3f244299db6
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Fri Nov 29 10:03:24 2013 +0100
regtest: Read test results in chunks to get the md5 digest
Some backends can generate huge results, like huge postscript files that
we don't want to load in memory to get the md5. So, instead of creating
thr md5 object with the entire file, we feed it with chunks of data
using the update method. This improves a lot the memory consumption and
performance as well.
diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py
index eab154d..aa12022 100644
--- a/regtest/backends/__init__.py
+++ b/regtest/backends/__init__.py
@@ -16,7 +16,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-from hashlib import md5
+import hashlib
import os
import shutil
import errno
@@ -47,6 +47,14 @@ class Backend:
def get_diff_ext(self):
return self._diff_ext
+ def __md5sum(self, ref_path):
+ md5 = hashlib.md5()
+ with open(ref_path,'rb') as f:
+ for chunk in iter(lambda: f.read(128 * md5.block_size), b''):
+ md5.update(chunk)
+
+ return md5.hexdigest()
+
def __should_have_checksum(self, entry):
if not entry.startswith(self._name):
return False
@@ -62,9 +70,7 @@ class Backend:
if not self.__should_have_checksum(entry):
continue
ref_path = os.path.join(refs_path, entry)
- f = open(ref_path, 'rb')
- md5_file.write("%s %s\n" % (md5(f.read()).hexdigest(), ref_path))
- f.close()
+ md5_file.write("%s %s\n" % (self.__md5sum(ref_path), ref_path))
if delete_refs:
os.remove(ref_path)
@@ -90,10 +96,9 @@ class Backend:
continue
result_path = os.path.join(out_path, basename)
- f = open(result_path, 'rb')
- result_md5sum = md5(f.read()).hexdigest()
+
+ result_md5sum = self.__md5sum(result_path);
matched = md5sum == result_md5sum
- f.close()
if update_refs:
result_md5.append("%s %s\n" % (result_md5sum, ref_path))
commit 3444a44397a890dbeb1bd10357dbc8246fd21ad0
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Fri Nov 29 10:01:20 2013 +0100
regtest: Remove unused method _check_exit_status2
It was used when the backends ran in parallel odd and even pages, but
it's no longer used since threads support was added.
diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py
index ff6ef84..eab154d 100644
--- a/regtest/backends/__init__.py
+++ b/regtest/backends/__init__.py
@@ -220,26 +220,6 @@ class Backend:
return True
- def _check_exit_status2(self, p1, p2, out_path):
- p1_stderr = p1.stderr.read()
- status1 = p1.wait()
- p2_stderr = p2.stderr.read()
- status2 = p2.wait()
-
- if p1_stderr or p2_stderr:
- self.__create_stderr_file(p1_stderr + p2_stderr, out_path)
-
- if not os.WIFEXITED(status1) or not os.WIFEXITED(status2):
- open(out_path + '.crashed', 'w').close()
- return False
-
- if self.__create_failed_file_if_needed(status1, out_path):
- return False
- if self.__create_failed_file_if_needed(status2, out_path):
- return False
-
- return True
-
def _diff_png(self, ref_path, result_path):
try:
import Image, ImageChops
commit 5f825df417947c51943f1db327e1aa6c3faa15b0
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Fri Nov 29 09:57:57 2013 +0100
regtest: Do not store the current line in Printer but only its length
We are not using the line text anymore, but only the length.
diff --git a/regtest/Printer.py b/regtest/Printer.py
index 23dfd34..1de693d 100644
--- a/regtest/Printer.py
+++ b/regtest/Printer.py
@@ -32,19 +32,19 @@ class Printer:
self._verbose = Config().verbose
self._stream = sys.stdout
self._rewrite = self._stream.isatty() and not self._verbose
- self._current_line = None
+ self._current_line_len = 0
self._lock = RLock()
Printer.__single = self
def _erase_current_line(self):
- if self._current_line is None:
+ if not self._current_line_len:
return
- line_len = len(self._current_line)
+ line_len = self._current_line_len
self._stream.write('\b' * line_len + ' ' * line_len + '\b' * line_len)
- self._current_line = None
+ self._current_line_len = 0
def _ensure_new_line(self, msg):
if not msg.endswith('\n'):
@@ -62,7 +62,7 @@ class Printer:
with self._lock:
self._erase_current_line()
self._print(msg)
- self._current_line = msg[msg.rfind('\n') + 1:]
+ self._current_line_len = len(msg[msg.rfind('\n') + 1:])
def printout_ln(self, msg=''):
with self._lock:
More information about the poppler
mailing list