[Piglit] [PATCH 7/9] backend/json_.py: Make writes atomic

Tue Sep 23 17:55:54 PDT 2014

This patch changes the way that the JSONBackend writes tests by making
them atomic. What this means is that the writes completely succeed or
completely fail. This has a number of advantages. First, it means that
there is no recovery mechanism. Second, it makes resume very easy.
Finally, it allows us to throw away lots of handrolled code and replace
it with json.dump and json.load

Signed-off-by: Dylan Baker <dylanx.c.baker at intel.com>
---
 framework/backends/json_.py       | 256 ++++++++++++--------------------------
 framework/programs/run.py         |  13 +-
 framework/results.py              |  42 +++++++
 framework/tests/backends_tests.py | 117 ++++++++++++++++-
 framework/tests/results_tests.py  |  60 +++++++++
 5 files changed, 301 insertions(+), 187 deletions(-)

diff --git a/framework/backends/json_.py b/framework/backends/json_.py
index a61051d..8b377d1 100644
--- a/framework/backends/json_.py
+++ b/framework/backends/json_.py
@@ -21,7 +21,8 @@
 """ Module providing json backend for piglit """
 
 import os
-import threading
+import shutil
+import itertools
 try:
     import simplejson as json
 except ImportError:
@@ -54,81 +55,26 @@ def piglit_encoder(obj):
 
 
 class JSONBackend(FSyncMixin, Backend):
-    '''
-    Writes to a JSON file stream
-
-    JSONWriter is threadsafe.
-
-    Example
-    -------
-
-    This call to ``json.dump``::
-        json.dump(
-            {
-                'a': [1, 2, 3],
-                'b': 4,
-                'c': {
-                    'x': 100,
-                },
-            }
-            file,
-            indent=JSONWriter.INDENT)
-
-    is equivalent to::
-        w = JSONWriter(file)
-        w._open_dict()
-        w._write_dict_item('a', [1, 2, 3])
-        w._write_dict_item('b', 4)
-        w._write_dict_item('c', {'x': 100})
-        w._close_dict()
-
-    which is also equivalent to::
-        w = JSONWriter(file)
-        w._open_dict()
-        w._write_dict_item('a', [1, 2, 3])
-        w._write_dict_item('b', 4)
-
-        w._write_dict_key('c')
-        w._open_dict()
-        w._write_dict_item('x', 100)
-        w._close_dict()
-
-        w._close_dict()
-    '''
+    """ Piglit's native JSON backend
 
+    This writes out to piglit's native json backend. This class uses the python
+    json module or the simplejson.
+
+    This class is atomic, writes either completely fail or completley succeed.
+    To achieve this it writes individual files for each test and for the
+    metadata, and composes them at the end into a single file and removes the
+    intermediate files. When it tries to compose these files if it cannot read
+    a file it just ignores it, making the result atomic.
+
+    """
     INDENT = 4
-    _LOCK = threading.RLock()
 
-    def __init__(self, f, **options):
-        self._file = open(os.path.join(f, 'results.json'), 'w')
+    def __init__(self, dest, start_count=0, **options):
+        self._dest = dest
         FSyncMixin.__init__(self, **options)
-        self.__indent_level = 0
-        self.__inhibit_next_indent = False
-        self.__encoder = json.JSONEncoder(indent=self.INDENT,
-                                          default=piglit_encoder)
-
-        # self.__is_collection_empty
-        #
-        # A stack that indicates if the currect collection is empty
-        #
-        # When _open_dict is called, True is pushed onto the
-        # stack. When the first element is written to the newly
-        # opened dict, the top of the stack is set to False.
-        # When the _close_dict is called, the stack is popped.
-        #
-        # The top of the stack is element -1.
-        #
-        self.__is_collection_empty = []
-
-        # self._open_containers
-        #
-        # A FILO stack that stores container information, each time
-        # self._open_dict() 'dict' is added to the stack, (other elements like
-        # 'list' could be added if support was added to JSONWriter for handling
-        # them), each to time self._close_dict() is called an element is
-        # removed. When self.close_json() is called each element of the stack
-        # is popped and written into the json
-        self._open_containers = []
+
+        # A counter the ensures each test gets a unique name
+        self._counter = itertools.count(start_count)
 
     def initialize(self, metadata):
         """ Write boilerplate json code
@@ -136,36 +82,25 @@ class JSONBackend(FSyncMixin, Backend):
         This writes all of the json except the actual tests.
 
         Arguments:
-        options -- any values to be put in the options dictionary, must be a
-                   dict-like object
-        name -- the name of the test
-        env -- any environment information to be written into the results, must
-               be a dict-like object
+        metadata -- a dictionary of values to be written
 
         """
-        with self._LOCK:
-            self._open_dict()
-            self._write_dict_item('results_version', CURRENT_JSON_VERSION)
-            self._write_dict_item('name', metadata['name'])
-
-            self._write_dict_key('options')
-            self._open_dict()
-            for key, value in metadata.iteritems():
-                # Dont' write env or name into the options dictionary
-                if key in ['env', 'name']:
-                    continue
-
-                # Loading a NoneType will break resume, and are a bug
-                assert value is not None, "Value {} is NoneType".format(key)
-                self._write_dict_item(key, value)
-            self._close_dict()
-
-            for key, value in metadata['env'].iteritems():
-                self._write_dict_item(key, value)
-
-            # Open the tests dictinoary so that tests can be written
-            self._write_dict_key('tests')
-            self._open_dict()
+        # If metadata is None then this is a loaded result and there is no need
+        # to initialize
+        out = {
+            'results_version': CURRENT_JSON_VERSION,
+            'name': metadata['name'],
+            'options': {k: v for k, v in metadata.iteritems() if k != 'name'},
+        }
+
+        with open(os.path.join(self._dest, 'metadata.json'), 'w') as f:
+            json.dump(out, f, default=piglit_encoder)
+
+        # make the directory for the tests
+        try:
+            os.mkdir(os.path.join(self._dest, 'tests'))
+        except OSError:
+            pass
 
     def finalize(self, metadata=None):
         """ End json serialization and cleanup
@@ -174,85 +109,48 @@ class JSONBackend(FSyncMixin, Backend):
         containers that are still open and closes the file
 
         """
-        # Ensure that there are no tests still writing by taking the lock here
-        with self._LOCK:
-            # Close the tests dictionary
-            self._close_dict()
-
-            # Write closing metadata
-            if metadata:
-                for key, value in metadata.iteritems():
-                    self._write_dict_item(key, value)
-
-            # Close the root dictionary object
-            self._close_dict()
-
-            # Close the file.
-            assert self._open_containers == [], \
-                "containers stack: {0}".format(self._open_containers)
-            self._file.close()
-
-    def __write_indent(self):
-        if self.__inhibit_next_indent:
-            self.__inhibit_next_indent = False
-            return
-        else:
-            i = ' ' * self.__indent_level * self.INDENT
-            self._file.write(i)
-
-    def __write(self, obj):
-        lines = list(self.__encoder.encode(obj).split('\n'))
-        n = len(lines)
-        for i in range(n):
-            self.__write_indent()
-            self._file.write(lines[i])
-            if i != n - 1:
-                self._file.write('\n')
-
-    def _open_dict(self):
-        self.__write_indent()
-        self._file.write('{')
-
-        self.__indent_level += 1
-        self.__is_collection_empty.append(True)
-        self._open_containers.append('dict')
-        self._fsync(self._file)
-
-    def _close_dict(self):
-        self.__indent_level -= 1
-        self.__is_collection_empty.pop()
-
-        self._file.write('\n')
-        self.__write_indent()
-        self._file.write('}')
-        assert self._open_containers[-1] == 'dict'
-        self._open_containers.pop()
-        self._fsync(self._file)
-
-    def _write_dict_item(self, key, value):
-        # Write key.
-        self._write_dict_key(key)
-
-        # Write value.
-        self.__write(value)
-
-        self._fsync(self._file)
-
-    def _write_dict_key(self, key):
-        # Write comma if this is not the initial item in the dict.
-        if self.__is_collection_empty[-1]:
-            self.__is_collection_empty[-1] = False
-        else:
-            self._file.write(',')
-
-        self._file.write('\n')
-        self.__write(key)
-        self._file.write(': ')
-
-        self.__inhibit_next_indent = True
-        self._fsync(self._file)
+        # Create a dictionary that is full of data to be written to a single
+        # file
+        data = {}
+
+        # Load the metadata and put it into a dictionary
+        with open(os.path.join(self._dest, 'metadata.json'), 'r') as f:
+            data.update(json.load(f))
+
+        # If there is more metadata add it the dictionary
+        if metadata:
+            data.update(metadata)
+
+        # Add the tests to the dictionary
+        data['tests'] = {}
+
+        test_dir = os.path.join(self._dest, 'tests')
+        for test in os.listdir(test_dir):
+            test = os.path.join(test_dir, test)
+            if os.path.isfile(test):
+                # Try to open the json snippets. If we fail to open a test then
+                # throw the whole thing out. This gives us atomic writes, the
+                # writing worked and is valid or it didn't work.
+                try:
+                    with open(test, 'r') as f:
+                        data['tests'].update(json.load(f))
+                except ValueError:
+                    pass
+        assert data['tests']
+
+        # write out the combined file.
+        with open(os.path.join(self._dest, 'results.json'), 'w') as f:
+            json.dump(data, f, default=piglit_encoder,
+                      indent=JSONBackend.INDENT)
+
+        # Delete the temporary files
+        os.unlink(os.path.join(self._dest, 'metadata.json'))
+        shutil.rmtree(os.path.join(self._dest, 'tests'))
 
     def write_test(self, name, data):
         """ Write a test into the JSON tests dictionary """
-        with self._LOCK:
-            self._write_dict_item(name, data)
+        t = os.path.join(self._dest, 'tests',
+                         '{}.json'.format(self._counter.next()))
+        with open(t, 'w') as f:
+            json.dump({name: data}, f, default=piglit_encoder)
+            self._fsync(f)
diff --git a/framework/programs/run.py b/framework/programs/run.py
index f0d703d..749fb07 100644
--- a/framework/programs/run.py
+++ b/framework/programs/run.py
@@ -255,7 +255,6 @@ def run(input_):
     # part of profile.run
     options['test_count'] = 0
 
-    # Begin json.
     backend = backends.get_backend(args.backend)(
         args.results_path,
         file_fsync=opts.sync,
@@ -292,7 +291,7 @@ def resume(input_):
                              "Default is piglit.conf")
     args = parser.parse_args(input_)
 
-    results = framework.results.load_results(args.results_path)
+    results = framework.results.TestrunResult.resume(args.results_path)
     opts = core.Options(concurrent=results.options['concurrent'],
                         exclude_filter=results.options['exclude_filter'],
                         include_filter=results.options['filter'],
@@ -311,12 +310,12 @@ def resume(input_):
     # Resume only works with the JSON backend
     backend = backends.get_backend('json')(
         args.results_path,
-        file_fsync=opts.sync)
-    backend.initialize(results.options)
+        file_fsync=opts.sync,
+        start_count=len(results.tests) + 1)
+    # Specifically do not initialize again, everything initialize does is done.
 
-    for key, value in results.tests.iteritems():
-        backend.write_test(key, value)
-        opts.exclude_tests.add(key)
+    for name in results.tests.iterkeys():
+        opts.exclude_tests.add(name)
 
     profile = framework.profile.merge_test_profiles(results.options['profile'])
     profile.results_dir = args.results_path
diff --git a/framework/results.py b/framework/results.py
index 5a2ebf8..25357cd 100644
--- a/framework/results.py
+++ b/framework/results.py
@@ -200,6 +200,48 @@ class TestrunResult(object):
                            if k in self.serialized_keys),
                       f, default=piglit_encoder, indent=JSONBackend.INDENT)
 
+    @classmethod
+    def resume(cls, results_dir):
+        """ Create a TestrunResult from an interupted run
+
+        This class method creates and returns a TestrunResult from a partial
+        result file. This does not load old streaminmg results by design, one
+        should not resume a run with a different piglit, it leads to all kinds
+        of problems.
+
+        """
+        # Pylint can't infer that the json being loaded is a dict
+        # pylint: disable=maybe-no-member
+        assert os.path.isdir(results_dir), \
+            "TestrunResult.resume() requires a directory"
+
+        # Load the metadata
+        with open(os.path.join(results_dir, 'metadata.json'), 'r') as f:
+            meta = json.load(f)
+        assert meta['results_version'] == CURRENT_JSON_VERSION, \
+            "Old results version, resume impossible"
+
+        testrun = TestrunResult()
+        testrun.name = meta['name']
+        testrun.options = meta['options']
+        testrun.uname = meta.get('uname')
+        testrun.glxinfo = meta.get('glxinfo')
+        testrun.lspci = meta.get('lspci')
+
+        # Load all of the test names and added them to the test list
+        for file_ in os.listdir(os.path.join(results_dir, 'tests')):
+            with open(os.path.join(results_dir, 'tests', file_), 'r') as f:
+                try:
+                    test = json.load(f)
+                except json.JSONDecodeError:
+                    continue
+            # XXX: There has to be a better way to get a single key: value out
+            # of a dict even when the key name isn't known
+            for key, value in test.iteritems():
+                testrun.tests[key] = TestResult(value)
+
+        return testrun
+
 
 def load_results(filename):
     """ Loader function for TestrunResult class
diff --git a/framework/tests/backends_tests.py b/framework/tests/backends_tests.py
index 816f672..d1b3d23 100644
--- a/framework/tests/backends_tests.py
+++ b/framework/tests/backends_tests.py
@@ -27,6 +27,10 @@ try:
     from lxml import etree
 except ImportError:
     import xml.etree.cElementTree as etree
+try:
+    import simplejson as json
+except ImportError:
+    import json
 import nose.tools as nt
 import framework.results as results
 import framework.backends as backends
@@ -37,7 +41,6 @@ BACKEND_INITIAL_META = {
     'name': 'name',
     'env': {},
     'test_count': 0,
-    'test_suffix': '',
 }
 
 JUNIT_SCHEMA = 'framework/tests/schema/junit-7.xsd'
@@ -157,3 +160,115 @@ class TestJUnitMultiTest(TestJUnitSingleTest):
     def test_xml_valid(self):
         """ JUnitBackend.write_test() (twice) produces valid xml """
         super(TestJUnitMultiTest, self).test_xml_valid()
+
+
+def test_json_initialize_metadata():
+    """ JSONBackend.initialize() produces a metadata.json file """
+    baseline = {
+        'name': BACKEND_INITIAL_META['name'],
+        'results_version': backends.CURRENT_JSON_VERSION,
+        'options': {
+            'test_count': BACKEND_INITIAL_META['test_count'],
+            'env': BACKEND_INITIAL_META['env'],
+        }
+    }
+
+    with utils.tempdir() as f:
+        test = backends.JSONBackend(f)
+        test.initialize(BACKEND_INITIAL_META)
+
+        with open(os.path.join(f, 'metadata.json'), 'r') as t:
+            test = json.load(t)
+
+    nt.assert_dict_equal(baseline, test)
+
+
+class TestJSONTestMethod(utils.StaticDirectory):
+    @classmethod
+    def setup_class(cls):
+        cls.test_name = 'a/test/group/test1'
+        cls.result = results.TestResult({
+            'time': 1.2345,
+            'result': 'pass',
+            'out': 'this is stdout',
+            'err': 'this is stderr',
+        })
+        super(TestJSONTestMethod, cls).setup_class()
+        test = backends.JSONBackend(cls.tdir)
+        test.initialize(BACKEND_INITIAL_META)
+        test.write_test(cls.test_name, cls.result)
+
+    def test_write_test(self):
+        """ JSONBackend.write_test() adds tests to a 'tests' directory """
+        assert os.path.exists(os.path.join(self.tdir, 'tests', '0.json'))
+
+    def test_json_is_valid(self):
+        """ JSONBackend.write_test() produces valid json """
+        with open(os.path.join(self.tdir, 'tests', '0.json'), 'r') as f:
+            try:
+                json.load(f)
+            except Exception as e:
+                raise AssertionError(e)
+
+    def test_json_is_correct(self):
+        """ JSONBackend.write_test() produces correct json """
+        with open(os.path.join(self.tdir, 'tests', '0.json'), 'r') as f:
+            test = json.load(f)
+
+        nt.assert_dict_equal({self.test_name: self.result}, test)
+
+
+class TestJSONTestFinalize(utils.StaticDirectory):
+    @classmethod
+    def setup_class(cls):
+        cls.test_name = 'a/test/group/test1'
+        cls.result = results.TestResult({
+            'time': 1.2345,
+            'result': 'pass',
+            'out': 'this is stdout',
+            'err': 'this is stderr',
+        })
+        super(TestJSONTestFinalize, cls).setup_class()
+        test = backends.JSONBackend(cls.tdir)
+        test.initialize(BACKEND_INITIAL_META)
+        test.write_test(cls.test_name, cls.result)
+        test.finalize()
+
+    def test_remove_metadata(self):
+        """ JSONBackend.finalize() removes metadata.json """
+        assert not os.path.exists(os.path.join(self.tdir, 'metadata.json'))
+
+    def test_remove_tests(self):
+        """ JSONBackend.finalize() removes tests directory """
+        assert not os.path.exists(os.path.join(self.tdir, 'tests'))
+
+    def test_create_results(self):
+        """ JSONBackend.finalize() creates a results.json file """
+        assert os.path.exists(os.path.join(self.tdir, 'results.json'))
+
+    def test_results_valid(self):
+        """ JSONBackend.finalize() results.json is valid """
+        with open(os.path.join(self.tdir, 'results.json'), 'r') as f:
+            try:
+                json.load(f)
+            except Exception as e:
+                raise AssertionError(e)
+
+    def test_results_correct(self):
+        """ JSONBackend.finalize() results are expected """
+        baseline = {
+            'name': BACKEND_INITIAL_META['name'],
+            'results_version': backends.CURRENT_JSON_VERSION,
+            'options': {
+                'test_count': BACKEND_INITIAL_META['test_count'],
+                'env': BACKEND_INITIAL_META['env'],
+            },
+            'tests': {
+                self.test_name: dict(self.result)
+            }
+        }
+
+        with open(os.path.join(self.tdir, 'results.json'), 'r') as f:
+            test = json.load(f)
+
+        nt.assert_equal(baseline, test)
diff --git a/framework/tests/results_tests.py b/framework/tests/results_tests.py
index a020775..0b4cd23 100644
--- a/framework/tests/results_tests.py
+++ b/framework/tests/results_tests.py
@@ -27,6 +27,8 @@ import nose.tools as nt
 import framework.tests.utils as utils
 import framework.results as results
 import framework.status as status
+import framework.backends as backends
+from framework.tests.backends_tests import BACKEND_INITIAL_META
 
 
 def check_initialize(target):
@@ -147,3 +149,61 @@ def test_update_results_old():
         res = results.update_results(base, f.name)
 
     nt.assert_equal(res.results_version, results.CURRENT_JSON_VERSION)
+
+
+ at nt.raises(AssertionError)
+def test_resume_non_folder():
+    """ TestrunResult.resume doesn't accept a file """
+    with utils.with_tempfile('') as f:
+        results.TestrunResult.resume(f)
+
+
+def test_resume_load():
+    """ TestrunResult.resume loads with good results """
+    with utils.tempdir() as f:
+        backend = backends.JSONBackend(f)
+        backend.initialize(BACKEND_INITIAL_META)
+        backend.write_test("group1/test1", {'result': 'fail'})
+        backend.write_test("group1/test2", {'result': 'pass'})
+        backend.write_test("group2/test3", {'result': 'fail'})
+
+        try:
+            results.TestrunResult.resume(f)
+        except Exception as e:
+            raise AssertionError(e)
+
+
+def test_resume_load_valid():
+    """ TestrunResult.resume loads valid results """
+    with utils.tempdir() as f:
+        backend = backends.JSONBackend(f)
+        backend.initialize(BACKEND_INITIAL_META)
+        backend.write_test("group1/test1", {'result': 'fail'})
+        backend.write_test("group1/test2", {'result': 'pass'})
+        backend.write_test("group2/test3", {'result': 'fail'})
+
+        test = results.TestrunResult.resume(f)
+
+        nt.assert_set_equal(
+            set(test.tests.keys()),
+            set(['group1/test1', 'group1/test2', 'group2/test3']),
+        )
+
+
+def test_resume_load_invalid():
+    """ TestrunResult.resume ignores invalid results """
+    with utils.tempdir() as f:
+        backend = backends.JSONBackend(f)
+        backend.initialize(BACKEND_INITIAL_META)
+        backend.write_test("group1/test1", {'result': 'fail'})
+        backend.write_test("group1/test2", {'result': 'pass'})
+        backend.write_test("group2/test3", {'result': 'fail'})
+        with open(os.path.join(f, 'tests', 'x.json'), 'w') as w:
+            w.write('foo')
+
+        test = results.TestrunResult.resume(f)
+
+        nt.assert_set_equal(
+            set(test.tests.keys()),
+            set(['group1/test1', 'group1/test2', 'group2/test3']),
+        )
-- 
2.1.1